diff --git a/COSMOS.md b/doc/COSMOS.md similarity index 100% rename from COSMOS.md rename to doc/COSMOS.md diff --git a/doc/bibliography.md b/doc/bibliography.md new file mode 100644 index 0000000..70e1579 --- /dev/null +++ b/doc/bibliography.md @@ -0,0 +1,25 @@ +# How the bibliography is made + +References are stored in a bibtex file. The XML file contains +citations that are similar to a LaTeX citation command. + +During processing, the bibtex file and the bibliography and citation +type (anthology or monograph, numeric or author/year) are read out of +the XML source file. + +`pandoc-citeproc` is used to construct a JSON file out of the bibtex +database. This is helpful during the next parts of processing. + + + +The next step formats the bibliography, depending on the bibliography +type, as a complete list or as per-chapter-list. In the second case, +several markdown and html files are created, one for each chapter. +They are named after the `xml:id` of the chapter. + +The function `format_citations` then creates two outputs: the list of +references, formatted in html and a dictionary with the citekey as +headword and three manifestations of the data (author-year-citation, +year-citation and title). diff --git a/doc/create_tmpbib.md b/doc/create_tmpbib.md new file mode 100644 index 0000000..8c15ae6 --- /dev/null +++ b/doc/create_tmpbib.md @@ -0,0 +1,19 @@ +# Create temporary bibliography + +In cases where authors hand in a formatted version of the bibliography +(rather than a reference database), this tool can help creating a +database in BibTeX format. + +Required argument is a textfile with a formatted bibliography (one +entry per line). The option `k` is there to supply an entry with a +keyword, for example the name of the chapter author. + +We require authors to use shortcuts in their docx manuscript when +citing, including the use of a citekey (`LASTNAME_YEAR`), there should +already be citekeys in the manuscript. When running `fix_tei.py`, +these citekeys are gathered together and can be used as an input to +this tool. + +The tool creates temporary citekeys out of the formatted bibliography +and in an interactive session, the user selects the most likely entry. +With this, rudimentary entries can be created. diff --git a/doc/datapickle.md b/doc/datapickle.md new file mode 100644 index 0000000..04299d3 --- /dev/null +++ b/doc/datapickle.md @@ -0,0 +1,582 @@ +# Documentation for the contents of `data.pickle` + +This file is quite important. + +What does it contain? + +It assigns numbers to elements. The thirteenth figure in the first (numbered) chapter + +It tells me that an element with a certain ID has a human readable reference + + data["chapterdict"] = chapterdict + data["figdict"] = figdict + data["eqdict"] = eqdict + data["fndict"] = fndict + data["listdict"] = listdict + data["pagelabeldict"] = pagelabeldict + data["secdict"] = secdict + data["tabdict"] = tabdict + data["theoremdict"] = theoremdict + + +{ + 'chapterdict': + {'uid1': '1', + 'uid18': '2', + 'uid33': '3', + 'uid67': '4' + 'uid114': '5', + 'uid223': '6', + 'uid257': '7', + 'uid304': '8',}, + + 'eqdict': {}, + + 'figdict': + { + 'uid3': '1.1', + 'uid4': '1.2', + 'uid6': '1.3', + 'uid7': '1.4', + 'uid9': '1.5' + 'uid10': '1.6', + 'uid11': '1.7', + 'uid12': '1.8', + 'uid13': '1.9', + 'uid14': '1.10', + 'uid15': '1.11', + 'uid16': '1.12', + 'uid17': '1.13', +}, + + 'fndict': {}, + + 'listdict': { + 'uid100': '31', + 'uid101': '32', + 'uid102': '33', + 'uid103': '34', + 'uid104': '35', + 'uid105': '36', + 'uid106': '37', + 'uid107': '38', + 'uid116': '1', + 'uid117': '2', + 'uid118': '3', + 'uid119': '4', + 'uid120': '5', + 'uid121': '6', + 'uid122': '1', + 'uid123': '2', + 'uid124': '3', + 'uid125': '4', + 'uid126': '5', + 'uid127': '6', + 'uid128': '7', + 'uid129': '8', + 'uid130': '9', + 'uid131': '10', + 'uid132': '11', + 'uid133': '12', + 'uid134': '13', + 'uid135': '14', + 'uid136': '15', + 'uid137': '16', + 'uid138': '17', + 'uid139': '18', + 'uid140': '19', + 'uid141': '20', + 'uid142': '21', + 'uid143': '22', + 'uid144': '23', + 'uid145': '24', + 'uid146': '25', + 'uid147': '26', + 'uid148': '27', + 'uid149': '28', + 'uid150': '29', + 'uid151': '30', + 'uid152': '1', + 'uid153': '2', + 'uid154': '3', + 'uid155': '4', + 'uid156': '5', + 'uid157': '1', + 'uid158': '2', + 'uid159': '3', + 'uid160': '4', + 'uid161': '5', + 'uid162': '6', + 'uid163': '7', + 'uid164': '8', + 'uid165': '9', + 'uid166': '10', + 'uid167': '11', + 'uid168': '12', + 'uid169': '1', + 'uid170': '2', + 'uid171': '3', + 'uid172': '4', + 'uid173': '5', + 'uid174': '6', + 'uid175': '7', + 'uid176': '8', + 'uid177': '9', + 'uid178': '10', + 'uid179': '11', + 'uid180': '12', + 'uid181': '13', + 'uid182': '14', + 'uid183': '15', + 'uid184': '16', + 'uid185': '17', + 'uid186': '18', + 'uid187': '19', + 'uid188': '20', + 'uid189': '21', + 'uid190': '22', + 'uid191': '23', + 'uid192': '24', + 'uid193': '25', + 'uid194': '26', + 'uid195': '27', + 'uid196': '1', + 'uid197': '2', + 'uid198': '1', + 'uid199': '2', + 'uid200': '3', + 'uid201': '4', + 'uid202': '5', + 'uid203': '6', + 'uid204': '7', + 'uid205': '1', + 'uid206': '2', + 'uid207': '3', + 'uid208': '4', + 'uid209': '5', + 'uid210': '6', + 'uid211': '7', + 'uid212': '8', + 'uid213': '9', + 'uid214': '1', + 'uid215': '2', + 'uid216': '3', + 'uid217': '4', + 'uid218': '5', + 'uid219': '6', + 'uid220': '1', + 'uid221': '2', + 'uid224': '1', + 'uid225': '2', + 'uid226': '3', + 'uid227': '4', + 'uid228': '5', + 'uid229': '6', + 'uid230': '7', + 'uid231': '8', + 'uid232': '9', + 'uid233': '10', + 'uid234': '11', + 'uid235': '12', + 'uid236': '13', + 'uid237': '14', + 'uid238': '15', + 'uid239': '16', + 'uid240': '17', + 'uid241': '18', + 'uid242': '1', + 'uid243': '2', + 'uid244': '3', + 'uid245': '4', + 'uid246': '5', + 'uid247': '6', + 'uid248': '7', + 'uid249': '8', + 'uid250': '9', + 'uid251': '10', + 'uid252': '11', + 'uid253': '12', + 'uid254': '13', + 'uid255': '14', + 'uid256': '15', + 'uid260': '1', + 'uid261': '2', + 'uid262': '3', + 'uid263': '4', + 'uid264': '5', + 'uid265': '6', + 'uid266': '7', + 'uid267': '8', + 'uid268': '9', + 'uid269': '10', + 'uid270': '11', + 'uid271': '12', + 'uid272': '13', + 'uid273': '14', + 'uid274': '15', + 'uid275': '16', + 'uid276': '17', + 'uid277': '18', + 'uid278': '19', + 'uid279': '20', + 'uid280': '21', + 'uid281': '22', + 'uid282': '23', + 'uid283': '24', + 'uid284': '25', + 'uid285': '26', + 'uid286': '27', + 'uid287': '28', + 'uid288': '29', + 'uid289': '30', + 'uid290': '31', + 'uid291': '32', + 'uid292': '33', + 'uid293': '34', + 'uid294': '35', + 'uid295': '36', + 'uid296': '37', + 'uid297': '38', + 'uid306': '1', + 'uid307': '2', + 'uid308': '3', + 'uid309': '4', + 'uid310': '5', + 'uid311': '6', + 'uid312': '1', + 'uid313': '2', + 'uid314': '3', + 'uid315': '4', + 'uid316': '5', + 'uid317': '6', + 'uid318': '7', + 'uid319': '8', + 'uid320': '9', + 'uid321': '10', + 'uid322': '11', + 'uid323': '12', + 'uid324': '13', + 'uid325': '14', + 'uid326': '15', + 'uid327': '16', + 'uid328': '17', + 'uid329': '18', + 'uid330': '19', + 'uid331': '20', + 'uid332': '21', + 'uid333': '22', + 'uid334': '23', + 'uid335': '24', + 'uid336': '25', + 'uid337': '26', + 'uid338': '27', + 'uid339': '28', + 'uid34': '1', + 'uid340': '29', + 'uid341': '30', + 'uid342': '1', + 'uid343': '2', + 'uid344': '3', + 'uid345': '4', + 'uid346': '5', + 'uid347': '1', + 'uid348': '2', + 'uid349': '3', + 'uid35': '2', + 'uid350': '4', + 'uid351': '5', + 'uid352': '6', + 'uid353': '7', + 'uid354': '8', + 'uid355': '9', + 'uid356': '10', + 'uid357': '11', + 'uid358': '12', + 'uid359': '1', + 'uid36': '3', + 'uid360': '2', + 'uid361': '3', + 'uid362': '4', + 'uid363': '5', + 'uid364': '6', + 'uid365': '7', + 'uid366': '8', + 'uid367': '9', + 'uid368': '10', + 'uid369': '11', + 'uid37': '4', + 'uid370': '12', + 'uid371': '13', + 'uid372': '14', + 'uid373': '15', + 'uid374': '16', + 'uid375': '17', + 'uid376': '18', + 'uid377': '19', + 'uid378': '20', + 'uid379': '21', + 'uid38': '5', + 'uid380': '22', + 'uid381': '23', + 'uid382': '24', + 'uid383': '25', + 'uid384': '26', + 'uid385': '27', + 'uid386': '1', + 'uid387': '2', + 'uid388': '3', + 'uid389': '4', + 'uid39': '6', + 'uid390': '5', + 'uid391': '6', + 'uid392': '7', + 'uid393': '1', + 'uid394': '2', + 'uid395': '3', + 'uid396': '4', + 'uid397': '5', + 'uid398': '6', + 'uid399': '7', + 'uid40': '7', + 'uid400': '8', + 'uid401': '9', + 'uid402': '1', + 'uid403': '2', + 'uid404': '3', + 'uid405': '4', + 'uid406': '5', + 'uid407': '6', + 'uid408': '1', + 'uid409': '2', + 'uid41': '8', + 'uid42': '9', + 'uid43': '10', + 'uid44': '11', + 'uid45': '12', + 'uid46': '13', + 'uid47': '14', + 'uid48': '15', + 'uid49': '16', + 'uid50': '17', + 'uid51': '18', + 'uid52': '1', + 'uid53': '2', + 'uid54': '3', + 'uid55': '4', + 'uid56': '5', + 'uid57': '6', + 'uid58': '7', + 'uid59': '8', + 'uid60': '9', + 'uid61': '10', + 'uid62': '11', + 'uid63': '12', + 'uid64': '13', + 'uid65': '14', + 'uid66': '15', + 'uid70': '1', + 'uid71': '2', + 'uid72': '3', + 'uid73': '4', + 'uid74': '5', + 'uid75': '6', + 'uid76': '7', + 'uid77': '8', + 'uid78': '9', + 'uid79': '10', + 'uid80': '11', + 'uid81': '12', + 'uid82': '13', + 'uid83': '14', + 'uid84': '15', + 'uid85': '16', + 'uid86': '17', + 'uid87': '18', + 'uid88': '19', + 'uid89': '20', + 'uid90': '21', + 'uid91': '22', + 'uid92': '23', + 'uid93': '24', + 'uid94': '25', + 'uid95': '26', + 'uid96': '27', + 'uid97': '28', + 'uid98': '29', + 'uid99': '30'}, + + 'pagelabeldict': { + 'img001': ' on input line 3', + 'img002': ' on input line 4', + 'img003': ' on input line 5', + 'img004': ' on input line 6', + 'img005': ' on input line 7', + 'img006': ' on input line 8', + 'img007': ' on input line 9', + 'img008': ' on input line 10', + 'img009': ' on input line 11', + 'img010': ' on input line 12', + 'img011': ' on input line 13', + 'img012': ' on input line 14', + 'img013': ' on input line 15', + 'img014': ' on input line 16', + 'img015': ' on input line 17', + 'img016': ' on input line 18', + 'img017': ' on input line 19', + 'img018': ' on input line 20', + 'img019': ' on input line 21', + 'img020': ' on input line 22', + 'img021': ' on input line 23', + 'img022': ' on input line 24', + 'img023': ' on input line 25', + 'img024': ' on input line 26', + 'img025': ' on input line 27', + 'img026': ' on input line 28', + 'img027': ' on input line 29', + 'img028': ' on input line 30', + 'img029': ' on input line 31', + 'img030': ' on input line 32', + 'img031': ' on input line 33', + 'img032': ' on input line 34', + 'img033': ' on input line 35', + 'img034': ' on input line 36', + 'img035': ' on input line 37', + 'img036': ' on input line 38', + 'img037': ' on input line 39', + 'img038': ' on input line 40', + 'img039': ' on input line 41', + 'img040': ' on input line 42', + 'img041': ' on input line 43', + 'img042': ' on input line 44', + 'img043': ' on input line 45', + 'img044': ' on input line 46', + 'img045': ' on input line 47', + 'img046': ' on input line 48', + 'img047': ' on input line 49', + 'img048': ' on input line 50', + 'img049': ' on input line 51', + 'img050': ' on input line 52', + 'img051': ' on input line 53', + 'img052': ' on input line 54', + 'img053': ' on input line 55', + 'img054': ' on input line 56', + 'img055': ' on input line 57', + 'img056': ' on input line 58', + 'img057': ' on input line 59', + 'img058': ' on input line 60', + 'img059': ' on input line 61', + 'img060': ' on input line 62', + 'img061': ' on input line 63', + 'img062': ' on input line 64', + 'img063': ' on input line 65', + 'img064': ' on input line 66', + 'img065': ' on input line 67', + 'img066': ' on input line 68', + 'img067': ' on input line 69', + 'img068': ' on input line 70', + 'img069': ' on input line 71', + 'img070': ' on input line 72', + 'img071': ' on input line 73', + 'img072': ' on input line 74', + 'img073': ' on input line 75', + 'img074': ' on input line 76', + 'img075': ' on input line 77', + 'img076': ' on input line 78', + 'img077': ' on input line 79', + 'img078': ' on input line 80', + 'img079': ' on input line 81', + 'img080': ' on input line 82', + 'img081': ' on input line 83', + 'img082': ' on input line 84', + 'img083': ' on input line 85', + 'img084': ' on input line 86', + 'img085': ' on input line 87', + 'img086': ' on input line 88', + 'img087': ' on input line 89', + 'img088': ' on input line 90', + 'img089': ' on input line 91', + 'img090': ' on input line 92', + 'img091': ' on input line 93', + 'img092': ' on input line 94', + 'img093': ' on input line 95', + 'img094': ' on input line 96', + 'img095': ' on input line 97', + 'img096': ' on input line 98', + 'img097': ' on input line 99', + 'img098': ' on input line 100', + 'img099': ' on input line 101', + 'img100': ' on input line 102', + 'img101': ' on input line 103', + 'img102': ' on input line 104', + 'img103': ' on input line 105', + 'img104': ' on input line 106', + 'img105': ' on input line 107', + 'img106': ' on input line 108', + 'img107': ' on input line 109', + 'img108': ' on input line 110', + 'img109': ' on input line 111', + 'img110': ' on input line 112', + 'img111': ' on input line 113', + 'img112': ' on input line 114', + 'img113': ' on input line 115', + 'img114': ' on input line 116', + 'img115': ' on input line 117', + 'img116': ' on input line 118', + 'img117': ' on input line 119', + 'img118': ' on input line 120', + 'img119': ' on input line 121', + 'sec10:Figure10': '41', + 'sec11:Figure11': '43', + 'sec12:Figure12': '45', + 'sec13:Figure13': '47', + 'sec1:Figure1': '11', + 'sec2:Figure2': '13', + 'sec3:Figure3': '17', + 'sec4:Figure4': '26', + 'sec5:Figure5': '31', + 'sec6:Figure6': '32', + 'sec7:Figure7': '33', + 'sec8:Figure8': '35', + 'sec9:Figure9': '37' + }, + 'secdict': { + 'uid2': '1.1', + 'uid5': '1.2', + 'uid8': '1.3', + 'uid19': '2.1', + 'uid20': '2.2', + 'uid21': '2.2.1', + 'uid22': '2.2.2', + 'uid23': '2.2.3', + 'uid24': '2.2.4', + 'uid25': '2.2.5', + 'uid26': '2.3', + 'uid27': '2.3.1', + 'uid28': '2.3.2', + 'uid29': '2.3.3', + 'uid30': '2.3.4', + 'uid31': '2.3.5', + 'uid32': '2.3.6', + 'uid68': '4.1', + 'uid69': '4.2', + 'uid108': '4.3', + 'uid109': '4.4', + 'uid110': '4.5', + 'uid111': '4.6', + 'uid112': '4.7', + 'uid113': '4.8', + 'uid115': '5.1', + 'uid222': '5.2', + 'uid258': '7.1', + 'uid259': '7.2', + 'uid298': '7.3', + 'uid299': '7.4', + 'uid300': '7.5', + 'uid301': '7.6', + 'uid302': '7.7', + 'uid303': '7.8', + 'uid305': '8.1', + 'uid410': '8.2', + }, + 'tabdict': {}, + 'theoremdict': {}} diff --git a/doc/eoaclassic-workflow.md b/doc/eoaclassic-workflow.md new file mode 100644 index 0000000..8ebb561 --- /dev/null +++ b/doc/eoaclassic-workflow.md @@ -0,0 +1,35 @@ +# The EOA classic workflow +This document documents the different parts of the *EOA classic* workflow, which is based on EOATeX files. + +## eoatex2imxml.py +### Steps in this program +- Preparation, setup and checks +- Tralics conversion of EOATeX source + - Includes slight output correction +- Processing elements + - Mostly chapter by chapter +- Creation of bibliographical entries +- Cleanup +- Write output files and data +### Available functions +- getchildren +- TeX2PNG +- makebibchecker +- sanitizebibentry +- createBibEntryAuthorYear +- createBibEntryNumeric +- pdf_burst +- progress +- cleanup +## imxml2django.py +## imxml2epub.py +## imxml2tei.py +## Libraries +### libeoabibitem.py +### libeoaconvert.py +## Other files +### config +### debug +### mkimage.py +### data +### tmp_files diff --git a/doc/eoadocx-workflow.md b/doc/eoadocx-workflow.md new file mode 100644 index 0000000..24d7728 --- /dev/null +++ b/doc/eoadocx-workflow.md @@ -0,0 +1,6 @@ +# The EOA DocX workflow +This document documents the different parts of the *EOA TEI* workflow, which is based on DocX files. + + +tei2eoatex.xsl +tei2imxml.py diff --git a/metadatamapping.md b/doc/metadatamapping.md similarity index 100% rename from metadatamapping.md rename to doc/metadatamapping.md