Skip to content
Navigation Menu
Toggle navigation
Sign in
In this repository
All GitHub Enterprise
↵
Jump to
↵
No suggested jump to results
In this repository
All GitHub Enterprise
↵
Jump to
↵
In this user
All GitHub Enterprise
↵
Jump to
↵
In this repository
All GitHub Enterprise
↵
Jump to
↵
Sign in
Reseting focus
You signed in with another tab or window.
Reload
to refresh your session.
You signed out in another tab or window.
Reload
to refresh your session.
You switched accounts on another tab or window.
Reload
to refresh your session.
Dismiss alert
{{ message }}
donald
/
autopubs
Public
Notifications
You must be signed in to change notification settings
Fork
0
Star
0
Code
Issues
0
Pull requests
0
Actions
Projects
0
Security
Insights
Additional navigation options
Code
Issues
Pull requests
Actions
Projects
Security
Insights
Files
main
autopubs.py
Breadcrumbs
autopubs
/
autopubs.py
Blame
Blame
Latest commit
History
History
executable file
·
241 lines (197 loc) · 8.66 KB
Breadcrumbs
autopubs
/
autopubs.py
Top
File metadata and controls
Code
Blame
executable file
·
241 lines (197 loc) · 8.66 KB
Raw
#! /usr/bin/env python3 import os from pprint import pprint import re import sys import requests def parse_organization(name): # "Herwig Lab/Bioinformatics, Dept. of Computational Molecular Biology (Head: Martin " # "Vingron), Max Planck Institute for Molecular Genetics, Max Planck Society" match = re.match(r"([^,]+),.*Max Planck Institute for Molecular Genetics", name, flags=re.I) if not match: return None unit = match.group(1).strip() unit = re.sub(r'^([^/]+)/.*', r'\1', unit).strip() return unit OU_URL = { 'Aktas Lab': 'https://www.molgen.mpg.de/aktas-lab', 'Arndt Lab': 'https://www.molgen.mpg.de/arndt-lab', 'Bulut-Karslioglu Lab': 'https://www.molgen.mpg.de/bulut-karslioglu-lab', 'Herrmann Lab': 'https://www.molgen.mpg.de/herrmann-lab', 'Herwig Lab': 'https://www.molgen.mpg.de/herwig-lab', 'Ibrahim Lab': 'https://www.molgen.mpg.de/ibrahim-lab', 'Kalscheuer Lab': 'https://www.molgen.mpg.de/kalscheuer-lab', 'Kinkley Lab': 'https://www.molgen.mpg.de/kinkley-lab', 'Kraushar Lab': 'https://www.molgen.mpg.de/kraushar-lab', 'Kretzmer Lab': 'https://www.molgen.mpg.de/kretzmer-lab', 'Mayer Lab': 'https://www.molgen.mpg.de/en/mayer-lab', 'Meissner Lab': 'https://www.molgen.mpg.de/meissner-lab', 'Metzger Lab': 'https://www.molgen.mpg.de/metzger-lab', 'Müller Lab': 'https://www.molgen.mpg.de/mueller-lab', 'Mundlos Lab': 'https://www.molgen.mpg.de/mundlos-lab', 'Ralser Lab': 'https://www.molgen.mpg.de/ralser-lab', 'Reinert Lab': 'https://www.molgen.mpg.de/reinert-lab', 'Smith Lab': 'https://www.molgen.mpg.de/en/smith-lab', 'Vallier Lab': 'https://www.molgen.mpg.de/vallier-lab', 'Vingron Lab': 'https://www.molgen.mpg.de/en/vingron-lab', 'Yaspo Lab': 'https://www.molgen.mpg.de/yaspo-lab', 'Multi-Level Gene Control (Denes Hnisz)': 'https://www.molgen.mpg.de/hnisz-lab', 'Mass Spectrometry': 'https://www.molgen.mpg.de/mass-spectrometry', 'Microscopy and Cryo Electron Microscopy': 'https://www.molgen.mpg.de/microscopy', 'Sequencing': 'https://www.molgen.mpg.de/sequencing', } def add_ou_href(name): url = OU_URL.get(name) if url is not None: return f'<a href={url}>{name}</a>' if name not in OU_URL: print(f"WARING: no link for ou '{name}'") OU_URL[name] = None return name def get_entries(): # https://pure.mpg.de/rest/swagger-ui.html # https://colab.mpdl.mpg.de/mediawiki/PubMan_REST_API_Documentation BASE = 'https://pure.mpg.de/rest' # create search with https://pure.mpg.de/pubman/faces/AdvancedSearchPage.jsp # For example: # Organization: "Max Planck Institute for Molecular Genetics, Max Planck Society" # AND # Kontext: "Publications of the MPI for Molecular Genetics" # AND # Datum Von: 2025-01-01 # # now: "Abfrage in REST-Schnittstelle übernehmen" # now: sort by lastModificationDate descending, max records: 100, export format json # download curl command EA_QUERY = { "bool": {"must": [ {"term": {"publicState": {"value": "RELEASED"}}}, {"term": {"versionState": {"value": "RELEASED"}}}, {"bool": {"must": [ {"bool": {"should": [ {"term": {"metadata.creators.person.organizations.identifierPath": {"value": "ou_1433545"}}}, {"term": {"metadata.creators.organization.identifierPath": {"value": "ou_1433545"}}}]}}, {"term": {"context.objectId": {"value": "ctx_1479061"}}}, {"bool": {"should": [ {"range": {"metadata.datePublishedInPrint": {"gte": "2025-03-01||/d"}}}, {"range": {"metadata.datePublishedOnline": {"gte": "2025-03-01||/d"}}}, {"range": {"metadata.dateAccepted": {"gte": "2025-03-01||/d"}}}, {"range": {"metadata.dateSubmitted": {"gte": "2025-03-01||/d"}}}, {"range": {"metadata.dateModified": {"gte": "2025-03-01||/d"}}}, {"range": {"metadata.dateCreated": {"gte": "2025-03-01||/d"}}}]}}]}}]} } payload = { 'query': EA_QUERY, "sort": [{"lastModificationDate": {"order": "desc"}}], "size": "1000", "from": "0" } headers = { 'Cache-Control': 'no-cache', 'Content-Type': 'application/json', } entries = [] response = requests.post(BASE + '/items/search?format=json', json=payload, headers=headers) response.raise_for_status() o = response.json() records = o['records'] for record in records: # pprint(record) data = record['data'] metadata = data['metadata'] creators = metadata['creators'] datePublishedOnline = metadata.get('datePublishedOnline', None) datePublishedInPrint = metadata.get('datePublishedInPrint', None) identifiers = metadata.get('identifiers', []) sources = metadata.get('sources', []) title = metadata['title'] if datePublishedOnline is None: if datePublishedInPrint is None: continue else: datePublished = datePublishedInPrint else: if datePublishedInPrint is None: datePublished = datePublishedOnline else: datePublished = datePublishedInPrint if datePublishedInPrint < datePublishedOnline else datePublishedOnline authors = [] ou_names = set() for creator in creators: person = creator['person'] # {...} role = creator['role'] # 'AUTHOR' type = creator['type'] # 'PERSON' if role != "AUTHOR" or type != "PERSON": continue familyName = person['familyName'] # 'Altay' givenName = person['givenName'] # 'Aybuge' orcid = person.get(('orcid'), None) # 'https://orcid.org/0000-0001-7341-7091' organizations = person.get('organizations', []) # [...] for organization in organizations: ou_name = parse_organization(organization['name']) if ou_name is not None: ou_names.add(ou_name) name = f"{familyName}, {givenName}".strip() if orcid is None: authors.append(name) else: authors.append(f'<b><a href="{orcid}">{name}</a></b>') doi = None for identifier in identifiers: id = identifier['id'] # '10.1093/nargab/lqae135 ' type = identifier['type'] # 'DOI' if type == "DOI": doi = id.strip() continue if doi is None: continue source_text = None for source in sources: issue = source.get('issue', None) # '4' src_title = source['title'] # 'NAR: genomics and bioinformatics' volume = source.get('volume', None) # '6' source_text = src_title if volume is not None: source_text += f" volume {volume}" if issue is not None: source_text += f" issue {issue}" continue if source_text is None: continue entries.append({ 'datePublished': datePublished, 'authors': '; '.join(authors), 'title': title, 'doi': doi, 'source': source_text, 'ou_names': ou_names, }) return entries def update_django(entries): import django os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'config.settings.local') sys.path.append('/project/intranet/mpicms') django.setup() from mpicms.publications.models import Publication #Publication.objects.all().delete() for entry in sorted(entries, key=lambda e: e['datePublished']): template = { 'title': "<b>" + entry['title'] + "</b>", 'authors': entry['authors'], 'source': entry['source'], 'groups': " ".join(map(add_ou_href, sorted(entry['ou_names']))), } (o, created) = Publication.objects.get_or_create(doi=entry['doi'], defaults=template) if created: print(f"created: {entry['title']}") else: need_to_save = False for key, value in template.items(): if getattr(o, key) != value: setattr(o, key, value) need_to_save = True if need_to_save: o.save() print(f"updated: {entry['title']}") entries = get_entries() update_django(entries)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
You can’t perform that action at this time.