Permalink
Cannot retrieve contributors at this time
Name already in use
A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
EOASkripts/eoatex2imxml.py
Go to fileThis commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
executable file
1699 lines (1526 sloc)
79.4 KB
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# -*- coding: utf-8; mode: python -*- | |
# Time-stamp: <2018-09-12 11:42:33 (kthoden)> | |
""" | |
Converts Latex files into a customized DocBook XML file. | |
The program depends on the external program tralics for the conversion | |
as well as xelatex, pdfcrop (part of latex distributions) and | |
pandoc-citeproc for additional formatting. | |
""" | |
# license? | |
__version__= "1.0" | |
__author__ = "Klaus Thoden" | |
__date__="20171205" | |
# can the job done by BeautifulSoup also be done by lxml.html soupparser? | |
# as described in http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/soupparser.html | |
# from lxml.html import soupparser | |
# imports | |
import argparse | |
from lxml import etree | |
from libeoabibitem import Bibitem | |
from bs4 import BeautifulSoup | |
import libeoaconvert | |
import glob | |
import os | |
import re | |
import string | |
import shlex | |
import json | |
import subprocess | |
import sys | |
import shutil | |
import time | |
import configparser | |
import logging | |
import pickle | |
# current_directory = os.path.dirname(sys.argv[0]) | |
# is this better? | |
current_directory = os.path.realpath(__file__) | |
print("The script is run from {}".format(current_directory)) | |
############################################################### | |
# Preperation of certain files and some checks in advance | |
############################################################### | |
# Options for the command line: filename / configfile | |
parser = argparse.ArgumentParser() | |
parser.add_argument("-c", "--config", help="Name of config file.") | |
group = parser.add_mutually_exclusive_group() | |
group.add_argument("-f", "--filename", help="Name of main EOATeX file.") | |
group.add_argument("-t", "--trash", help="Remove temporary files.") | |
args = parser.parse_args() | |
if args.config is not None: | |
CONFIG_FILE = os.path.abspath(args.config) | |
else: | |
CONFIG_FILE = os.path.dirname(sys.argv[0]) + os.path.sep.join(["", "config", "eoaconvert.cfg"]) | |
logging.debug("The configfile is %s." % CONFIG_FILE) | |
# current biber is not compatible with this code | |
# switch TeX distribution to TeXLive2016, | |
# run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file | |
BIBERFILE = "biber2-1.bbl" | |
######################## | |
# Constant directories # | |
######################## | |
CONVERT_DIR = os.getcwd() + os.path.sep + "CONVERT" | |
################################## | |
# Reading the configuration file # | |
################################## | |
CONFIG = configparser.ConfigParser() | |
CONFIG.read(CONFIG_FILE) | |
###################### | |
# Setting up logging # | |
###################### | |
LOGFILE = CONFIG['General']['logfile'] | |
LOGLEVEL = CONFIG['General']['loglevel'] | |
# numeric_level = getattr(logging, LOGLEVEL.upper(), None) | |
# if not isinstance(numeric_level, int): | |
# raise ValueError('Invalid log level: %s' % loglevel) | |
logging.basicConfig(filename=LOGFILE, level=LOGLEVEL, format='%(asctime)s - %(levelname)s - %(message)s') | |
######################## | |
# Paths to executables # | |
######################## | |
GM_PATH = CONFIG['Executables']['graphicsmagic'] | |
TL_PATH = CONFIG['Executables']['texlive'] | |
TEXBIN_PATH = CONFIG['Executables']['texbin'] | |
TRALICS_PATH_EXEC = CONFIG['Executables']['tralics_path_exec'] | |
############################ | |
# Paths to auxiliary files # | |
############################ | |
TRALICS_PATH_LIB = CONFIG['Auxiliaries']['TRALICS_PATH_LIB'] | |
TEMPLATE_PATH = CONFIG['Auxiliaries']['template_path'] | |
SUPPORT_PATH = CONFIG['Auxiliaries']['support_path'] | |
# AUX_TeX_FILES_PATH = CONFIG['Auxiliaries']['aux_tex_files_path'] | |
# interimResult = "" | |
######################################## | |
# Certain functions for specific tasks # | |
######################################## | |
def getchildren(xmlElement): | |
"""Include all subelements""" | |
1 + 1 | |
return xmlElement | |
# def getchildren ends here | |
def TeX2PNG(LaTeXCode, Type, Chapter, Number): | |
"""Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django)""" | |
# Dictionary contains Type:begin/end | |
Types = { | |
"EOAineq" : ["$", "$"], | |
"EOAequation" : ["\\begin{equation*}", "\\end{equation*}"], | |
"EOAequationnonumber" : ["\\begin{equation*}", "\\end{equation*}"], | |
"EOAequationarray" : ["\\begin{align*}", "\\end{align*}"], | |
"EOAequationarraynonumber" : ["\\begin{align*}", "\\end{align*}"] | |
} | |
LaTeXCode = Types[Type][0] + LaTeXCode + Types[Type][1] | |
dictRebindedCommands = { | |
"\|ket\|" : r"\\ket", | |
"\|braket\|" : r"\\braket", | |
"\|bra\|" : r"\\bra", | |
"\|Bra\|" : r"\\Bra", | |
"\|Ket\|" : r"\\Ket", | |
"\slashed\|" : r"\\slashed" | |
} | |
for strCommand in dictRebindedCommands.keys(): | |
LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode) | |
# Open plain LaTeX-Template | |
tmp = open(TEMPLATE_PATH + "formula.tex", "r") | |
Template = tmp.read() | |
tmp.close() | |
# Get tmp-directory for this user account | |
# tmpDir = os.getenv("TMPDIR") | |
# use local tmpdir | |
formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/" | |
# Make directory items if it doesn't already exist | |
if not os.path.exists(os.getcwd() + "/items"): | |
os.mkdir(os.getcwd() + "/items") | |
s = string.Template(Template) | |
e = s.substitute(DERINHALT=LaTeXCode) | |
tmpFile = formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".tex" | |
tmp = open(tmpFile, "w") | |
tmp.write(e) | |
tmp.close() | |
Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile | |
Argumente = shlex.split(Kommando) | |
# Redirecting stderr to save XeLaTeX-Output | |
Datei = open('tmp_files/xelatex-run.log', 'w') | |
Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
if Ergebnis == 0: | |
print("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number)) | |
if Ergebnis == 1: | |
print("[ERROR]: Failed to convert formula " + Type + str(Chapter) + "_" + str(Number)) | |
Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
return LaTeXCode | |
# def TeX2PNG ends here | |
def make_bibchecker(bib_database, set_citations): | |
"""Construct a separate latex file with bibliography. | |
The HTML bibliography is still not perfectly formatted like the | |
LaTeX version. To check both files, a separate file is made that and | |
which is then also converted in the various formats. | |
""" | |
tmp_latex = open(TEMPLATE_PATH + "largebib.tex", "r") | |
largebib_template = tmp_latex.read() | |
tmp_latex.close() | |
tmp_xml = open(TEMPLATE_PATH + "largebib-xml.tex", "r") | |
largebib_xml_template = tmp_xml.read() | |
tmp_xml.close() | |
string_citations = ", ".join(set_citations) | |
largebib_template_string = string.Template(largebib_template) | |
largebib_replacement = largebib_template_string.substitute(INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations) | |
largebib_template_string_xml = string.Template(largebib_xml_template) | |
largebib_replacement_xml = largebib_template_string_xml.substitute(INSERT_BIB_DATABASE = bib_database, INSERT_CITEKEYS = string_citations) | |
tmp_latex_file = "%s/debug/debug_onlybib.tex" % (os.getcwd()) | |
tmp_latex = open(tmp_latex_file, "w") | |
tmp_latex.write(largebib_replacement) | |
tmp_latex.close() | |
tmp_xml_file = "%s/debug/debug_onlybib-xml.tex" % (os.getcwd()) | |
tmp_xml = open(tmp_xml_file, "w") | |
tmp_xml.write(largebib_replacement_xml) | |
tmp_xml.close() | |
# def make_bibchecker ends here | |
def sanitize_bibentry(bibEntry): | |
"""Some additional cleanup actions""" | |
bibEntry = bibEntry.replace(". , ", ", ") | |
bibEntry = bibEntry.replace("vols..", "vols.") | |
return(bibEntry.strip()) | |
# def sanitize_bibentry ends here | |
def createBibEntryAuthorYear(bibEntry, boolSameAuthor): | |
"""Function to create a complete Entry of a publication (epub & django) for author-year citation""" | |
strBibEntry = "" | |
if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types: | |
print("[ERROR]: You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types))) | |
sys.exit() | |
bool_edited_book = False | |
book_without_author = False | |
editor_postfix = bibEntry.fullauthorlastfirst()[1] | |
if boolSameAuthor == False: | |
if len(editor_postfix) != 0: | |
if editor_postfix == "no_author_only_title": | |
book_without_author = True | |
strAuthor = bibEntry.title() | |
else: | |
bool_edited_book = True | |
strAuthor = bibEntry.fullauthorlastfirst()[0] + ", " + editor_postfix | |
else: | |
strAuthor = bibEntry.fullauthorlastfirst()[0] | |
if boolSameAuthor == True: | |
# if there is no author, but a publisher, the localized | |
# postfix is returned from the function together with the name in a tuple | |
if len(editor_postfix) != 0: | |
bool_edited_book = True | |
strAuthor = "– " + editor_postfix | |
else: | |
strAuthor = "–" | |
# Next line good for debugging | |
# print(bibEntry.citekey(), strAuthor) | |
if bibEntry.entrytype() == "book": | |
if bool_edited_book == True: | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url() | |
elif book_without_author == True: | |
strBibEntry = "<i>" + str(bibEntry.title()) + "</i> " + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + ")." + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url() | |
else: | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " + bibEntry.editor() + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url() | |
if bibEntry.entrytype() == "booklet": | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i>" + str(bibEntry.location()) + bibEntry.howpublished() + "." | |
if bibEntry.entrytype() == "report": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>" | |
if bibEntry.entrytype() == "thesis": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>" + bibEntry.thesistype() + bibEntry.institution() + "." + bibEntry.url() | |
if bibEntry.entrytype() == "misc": | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " + bibEntry.howpublished() + ". " + bibEntry.note() + bibEntry.location() + bibEntry.thesistype() + "." + bibEntry.url() | |
if bibEntry.entrytype() == "incollection": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + bibEntry.booktitle() + ". " + bibEntry.editor() + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + bibEntry.location() + bibEntry.pages() + "." + bibEntry.url() | |
if bibEntry.entrytype() == "inproceedings": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + bibEntry.booktitle() + bibEntry.editor() + bibEntry.series() + bibEntry.location() + bibEntry.pages() | |
if bibEntry.entrytype() == "article": | |
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). " + str(bibEntry.title()) + str(bibEntry.journaltitle()) + bibEntry.volumenumberpages() + ". " + bibEntry.note() + bibEntry.url() | |
if bibEntry.entrytype() == "newspaper": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>" | |
# print(strBibEntry) | |
return sanitize_bibentry(strBibEntry) | |
# def createBibEntryAuthorYear ends here | |
def createBibEntryNumeric(bibEntry): | |
"""Function to create a complete Entry of a publication (epub & django) for numeric citation""" | |
strBibEntry = "" | |
if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types: | |
print("[ERROR]: You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types))) | |
sys.exit() | |
strAuthor = bibEntry.fullauthorfirstlast() | |
if bibEntry.entrytype() == "book": | |
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>." + bibEntry.location() + ", " + bibEntry.year() | |
if bibEntry.entrytype() == "booklet": | |
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.howpublished() + ". " + bibEntry.location() + ", " + bibEntry.year() | |
if bibEntry.entrytype() == "report": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." | |
if bibEntry.entrytype() == "thesis": | |
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.thesistype() + bibEntry.institution() + ", " + bibEntry.year() | |
if bibEntry.entrytype() == "misc": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". " | |
if bibEntry.entrytype() == "incollection": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + bibEntry.editor() + ". " + bibEntry.location() + ", " + bibEntry.year() + ". " + bibEntry.pages() + "." | |
if bibEntry.entrytype() == "inproceedings": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". " + bibEntry.volumenumeric() + bibEntry.year() + ". " + bibEntry.pages() + "." | |
if bibEntry.entrytype() == "article": | |
strBibEntry = strAuthor + ". " + bibEntry.title() + "<i>" + bibEntry.journaltitle() + "</i> " + bibEntry.volumenumberpages() + " (" + bibEntry.year() + "):" + bibEntry.pages() + "." | |
if bibEntry.entrytype() == "newspaper": | |
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>." | |
return strBibEntry | |
# def createBibEntryNumeric ends here | |
def pdf_burst(input_file, tmpDir): | |
"""Split PDF file into single pages""" | |
from PyPDF2 import PdfFileWriter, PdfFileReader | |
input1 = PdfFileReader(open(tmpDir + input_file, "rb")) | |
logging.debug("Input is %s and has %d pages." % (input_file, input1.getNumPages())) | |
for pageno in range(input1.getNumPages()): | |
output = PdfFileWriter() | |
output.addPage(input1.getPage(pageno)) | |
output_filename = tmpDir + "EOAformulas_%d.pdf" % (pageno + 1) | |
output_stream = open(output_filename, 'wb') | |
output.write(output_stream) | |
output_stream.close() | |
logging.debug("Wrote %s." % output_filename) | |
pageno += 1 | |
# def pdf_burst ends here | |
def progress(count, total, status=''): | |
"""Progress bar for command line. Taken from | |
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3""" | |
bar_len = 60 | |
filled_len = int(round(bar_len * count / float(total))) | |
percents = round(100.0 * count / float(total), 1) | |
bar = '#' * filled_len + '-' * (bar_len - filled_len) | |
sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percents, '%', status)) | |
sys.stdout.flush() | |
# def progress ends here | |
def cleanup(): | |
"""Remove support files""" | |
try: | |
os.remove((os.getcwd() + "/classes.dtd")) | |
os.remove((os.getcwd() + "/mathml2-qname-1.mod")) | |
os.remove((os.getcwd() + "/mathml2.dtd")) | |
shutil.rmtree((os.getcwd() + "/html")) | |
shutil.rmtree((os.getcwd() + "/iso8879")) | |
shutil.rmtree((os.getcwd() + "/iso9573-13")) | |
shutil.rmtree((os.getcwd() + "/mathml")) | |
# shutil.rmtree((os.getcwd() + "/mathml2")) | |
logging.debug("Removed support files.") | |
except: | |
print("No temporary files were found.") | |
# def cleanup ends here | |
# Remove temporary files, neccessary for troubleshooting | |
if args.trash == "temp": | |
cleanup() | |
sys.exit() | |
################################################# | |
# Checking for existance of tools and libraries # | |
################################################# | |
if not os.path.exists(TRALICS_PATH_LIB): | |
print("Cannot find the Tralics configuration at %s. Exiting." % TRALICS_PATH_LIB) | |
sys.exit() | |
################################## | |
# Setting up various directories # | |
################################## | |
if not os.path.exists("tmp_files"): | |
os.mkdir(os.path.expanduser("tmp_files")) | |
if not os.path.exists("tmp_files/formulas2png/"): | |
os.mkdir(os.path.expanduser("tmp_files/formulas2png/")) | |
if not os.path.exists(os.getcwd() + "/debug"): | |
os.mkdir(os.getcwd() + "/debug") | |
# Check for folder and necessary files | |
if not os.path.exists(CONVERT_DIR): | |
print("The directory CONVERT has not been created yet. Creating it for you") | |
time.sleep(1) | |
os.makedirs(CONVERT_DIR) | |
if not os.path.exists(CONVERT_DIR + os.path.sep + "publication.cfg"): | |
print("The publication.cfg file is missing in CONVERT directory.") | |
if os.path.exists(os.getcwd() + os.path.sep + "publication.cfg"): | |
shutil.copy("publication.cfg", CONVERT_DIR) | |
print("Copied from current directory.") | |
else: | |
print("Found no publication.cfg. Exiting") | |
sys.exit() | |
if not os.path.exists(CONVERT_DIR + os.path.sep + "cover.jpg"): | |
print("The file Cover.jpg in CONVERT directory is missing.") | |
if os.path.exists(os.getcwd() + os.path.sep + "Cover.jpg"): | |
shutil.copy("Cover.jpg", CONVERT_DIR + os.path.sep + "cover.jpg") | |
print("Copied from current directory.") | |
else: | |
print("No coverfile found. You can create a temporary one with the mkimage.py script") | |
sys.exit() | |
# if os.path.exists(os.getcwd() + "/pre_xml.tex") == False: | |
# print ("pre_xml fehlt") | |
# sys.exit() | |
# Copy Support-Files from /Library/MPIWG to current directory | |
shutil.copy(SUPPORT_PATH + "classes.dtd", os.getcwd()) | |
shutil.copy(SUPPORT_PATH + "mathml2-qname-1.mod", os.getcwd()) | |
shutil.copy(SUPPORT_PATH + "mathml2.dtd", os.getcwd()) | |
shutil.copytree(SUPPORT_PATH + "html", (os.getcwd() + "/html")) | |
shutil.copytree(SUPPORT_PATH + "iso8879", (os.getcwd() + "/iso8879")) | |
shutil.copytree(SUPPORT_PATH + "iso9573-13", (os.getcwd() + "/iso9573-13")) | |
shutil.copytree(SUPPORT_PATH + "mathml", (os.getcwd() + "/mathml")) | |
# shutil.copytree(SUPPORT_PATH + "mathml2", (os.getcwd() + "/mathml2")) | |
############################################################## | |
# Preparing the main document # | |
############################################################## | |
# Convert TeX to XML via Tralics | |
Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output %s.tex" % (TRALICS_PATH_EXEC, args.filename + "-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, args.filename) | |
Argumente = shlex.split(Kommando) | |
Prozess = subprocess.call(Argumente) | |
# Fix underscore und fix EOAtranscripted | |
tmpFile = open ((args.filename) + ".xml", "r") | |
tmpText = tmpFile.read() | |
tmpFile.close() | |
tmpText = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpText) | |
tmpText = re.sub(r"<error n='\\par' l='(.*?)' c='Invalid \\par command: paragraph not started'/>", "", tmpText) | |
tmpFile = open ((args.filename) + ".xml", "w") | |
tmpFile.write(tmpText) | |
tmpFile.close() | |
# Complete XML-Document in xmlTree | |
xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False | |
xmlTree = etree.parse((args.filename + ".xml"), xmlParser) | |
xmlChapters = xmlTree.findall("//div1") | |
# Cleanup of not needed tags in advance. To be cleaned: <error> | |
etree.strip_elements(xmlTree, with_tail=False, *['error']) | |
print("-----------------------------------------------------") | |
print("Move EOAlanguage from <head> into attribute of EOAchapter") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
xmlLanguage = xmlChapter.find(".//EOAlanguage") | |
if xmlLanguage is not None: | |
strLanguage = xmlLanguage.text or "english" | |
xmlChapter.set("language", strLanguage) | |
xmlLanguage.text = None | |
print("The language of Chapter %d is %s." % (intChapterNumber, strLanguage)) | |
xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage") | |
intChapterNumber += 1 | |
############################################################## | |
# Numbering and Typesetting various Elements # | |
############################################################## | |
# Figure out how to number (like essay or regular) | |
try: | |
strSerie = xmlTree.find(".//EOAseries").text or "regular" | |
except AttributeError: | |
print("\n\nYou are most probably using the preamble for the PDF output. Exiting.") | |
sys.exit() | |
if strSerie == "Essay": | |
strNumberingType = "essay" | |
else: | |
strNumberingType = "regular" | |
# Dictionaries containing UIDs and Numbers | |
dictChapters = {} | |
dictFigures = {} | |
dictEquations = {} | |
dictSections = {} | |
dictFootnotes = {} | |
dictPagelabels = {} | |
dictTables = {} | |
dictLists = {} | |
dictTheorems = {} | |
set_citations = set() | |
print("-----------------------------------------------------") | |
print("Numbering Chapters") | |
Chapternumber = 1 | |
for xmlChapter in xmlChapters: | |
if xmlChapter.get('rend') != "nonumber": | |
Chapteruid = xmlChapter.get('id') | |
dictChapters[Chapteruid] = str(Chapternumber) | |
Chapternumber += 1 | |
# EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter | |
intChapterNumber = 1 | |
print("-----------------------------------------------------") | |
print("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations") | |
for xmlChapter in xmlChapters: | |
intEquationnumber = 1 | |
xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations") | |
print("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) | |
for xmlDing in xmlDinge: | |
if xmlDing.tag == "EOAequationarray": | |
# tmpNumberinArray is only being used for filename | |
tmpNumberinArray = intEquationnumber | |
# tmpDictNumberLabel used to insert the attribute value into <EOAequation> | |
tmpDictNumberLabel = {} | |
# Numbering is being done by <mtr>-Tags | |
xmlMathmlrows = xmlDing.findall(".//{http://www.w3.org/1998/Math/MathML}mtr") | |
for xmlMathmlrow in xmlMathmlrows: | |
if "Label" in xmlMathmlrow.attrib: | |
# Label dem Dictionary für die Euqations hinzufügen | |
if xmlChapter.get("rend") != "nonumber": | |
dictEquations[xmlMathmlrow.get("Label")] = str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber) | |
tmpDictNumberLabel[str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)] = xmlMathmlrow.get("Label") | |
if xmlChapter.get("rend") == "nonumber": | |
dictEquations[xmlMathmlrow.get("Label")] = str(intEquationnumber) | |
tmpDictNumberLabel[str(intEquationnumber)] = xmlMathmlrow.get("Label") | |
intEquationnumber += 1 | |
xmlRohTeX = xmlDing.find(".//texmath") | |
xmlNew = etree.Element('EOAequationarray') | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in xmlRohTeX.text.splitlines() if s]) | |
# \rowattributeunknown has to be deleted, its an artefact | |
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode) | |
# Push Down loop to parse the raw code | |
textFormel = "" | |
boolBackslash = False | |
for Buchstabe in textSourcecode: | |
if Buchstabe == "\n": | |
continue | |
if Buchstabe == "\\": | |
if boolBackslash == False: | |
textFormel += Buchstabe | |
boolBackslash = True | |
continue | |
if boolBackslash == True: | |
textFormel += Buchstabe | |
str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray)) | |
if xmlChapter.get("rend") != "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))) | |
if xmlChapter.get("rend") == "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray))) | |
tmpXML.set("TeX", str_latexcode) | |
# Put Label into EOAequation | |
if xmlChapter.get("rend") != "nonumber": | |
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray) | |
if xmlChapter.get("rend") == "nonumber": | |
strTempKey = str(tmpNumberinArray) | |
if strTempKey in tmpDictNumberLabel: | |
#tmpXML.set("label", tmpDictNumberLabel[(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))]) | |
tmpXML.set("label", tmpDictNumberLabel[strTempKey]) | |
xmlNew.append(tmpXML) | |
textFormel = "" | |
boolBackslash = False | |
tmpNumberinArray += 1 | |
continue | |
if Buchstabe != "\\": | |
textFormel += Buchstabe | |
boolBackslash = False | |
# Typeset last equation | |
str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray)) | |
if xmlChapter.get("rend") != "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(dictChapters[xmlChapter.get('id')] + "." + str(tmpNumberinArray))) | |
if xmlChapter.get("rend") == "nonumber": | |
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray))) | |
tmpXML.set("TeX", str_latexcode) | |
# Put Label into EOAequation | |
if xmlChapter.get("rend") != "nonumber": | |
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray) | |
if xmlChapter.get("rend") == "nonumber": | |
strTempKey = str(tmpNumberinArray) | |
if strTempKey in tmpDictNumberLabel: | |
print(strTempKey) | |
print(tmpDictNumberLabel) | |
print(dictChapters) | |
tmpXML.set("label", tmpDictNumberLabel[strTempKey]) | |
xmlNew.append(tmpXML) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on | |
#xmlNew.getparent().tag = "temp" | |
continue | |
if xmlDing.tag == "EOAsubequations": | |
# Enclosing <p>-Tag of the EOAsubequations needs to be removed | |
xmlDing.getparent().tag = "temp" | |
xmlSubequations = xmlDing.findall('.//EOAequation') | |
listCharacters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] | |
tmpI = 0 | |
# Insert Number of this Subequation into dictEquations | |
xmlAnchor = xmlDing.find(".//anchor") | |
print(xmlAnchor) | |
if xmlChapter.get("rend") != "nonumber": | |
dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber) | |
# Delete anchor | |
xmlAnchor.getparent().remove(xmlAnchor) | |
for xmlSubequation in xmlSubequations: | |
# Enclosing <p>-Tag of the EOAsubequation needs to be removed | |
#xmlSubequation.getparent().tag = "temp" | |
# Numbering Subequations with characters | |
strSubequationNumber = str(intEquationnumber) + listCharacters[tmpI] | |
tmpI += 1 | |
textSourcecode = xmlSubequation.find('.//texmath').text | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
str_latexcode = TeX2PNG(textSourcecode, "EOAequation", str(intChapterNumber), strSubequationNumber) | |
xmlAnchor = xmlSubequation.find(".//anchor") | |
# Clear Equation | |
xmlSubequation.clear() | |
if xmlChapter.get("rend") != "nonumber": | |
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png") | |
xmlSubequation.set("number", dictChapters[xmlChapter.get('id')] + "." + strSubequationNumber) | |
xmlSubequation.set("uid", xmlAnchor.get('id')) | |
if xmlChapter.get("rend") == "nonumber": | |
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png") | |
xmlSubequation.set("number", strSubequationNumber) | |
xmlSubequation.set("uid", xmlAnchor.get('id')) | |
xmlSubequation.set("id", xmlAnchor.get('id')) | |
xmlSubequation.set("TeX", str_latexcode) | |
# Insert Number of this Equation into dictEquations | |
if strNumberingType == "regular": | |
dictEquations[xmlAnchor.get('id')] = str(dictChapters[xmlChapter.get('id')]) + "." + strSubequationNumber | |
if strNumberingType == "essay": | |
dictEquations[xmlAnchor.get('id')] = strSubequationNumber | |
# TODO: Anchor direkt unter Subequation aufheben, und der ersten Equation zuordnen, so dass auf 8.16 bei 8.16a und 8.16b verlinkt werden kann | |
xmlDing.tag = "temp" | |
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on | |
#xmlDing.getparent().tag = "temp" | |
intEquationnumber += 1 | |
continue | |
if xmlDing.tag == "EOAequation": | |
# Check, if Equation has already been found in a Subeqation | |
xmlAnchor = xmlDing.find("anchor") | |
if xmlAnchor == None: | |
continue | |
if xmlAnchor.get('id') in dictEquations: | |
continue | |
if xmlDing.find('.//texmath') is not None: | |
textSourcecode = xmlDing.find('.//texmath').text | |
else: | |
textSourcecode = xmlDing.text | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
str_latexcode = TeX2PNG(textSourcecode, "EOAequation", intChapterNumber, intEquationnumber) | |
#print ("Got:") | |
#print (str_latexcode) | |
if xmlChapter.get("rend") != "nonumber": | |
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png") | |
xmlDing.set("number", dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber)) | |
xmlDing.set("uid", xmlAnchor.get('id')) | |
if xmlChapter.get("rend") == "nonumber": | |
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png") | |
xmlDing.set("number", str(intEquationnumber)) | |
xmlDing.set("uid", xmlAnchor.get('id')) | |
xmlDing.set("id", xmlAnchor.get('id')) | |
xmlDing.set("TeX", str_latexcode) | |
#xmlDing.getparent().replace(xmlDing, xmlNew) | |
# Insert Number of this Equation into dictEquations | |
if strNumberingType == "regular": | |
dictEquations[xmlAnchor.get('id')] = \ | |
str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber) | |
if strNumberingType == "essay": | |
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber) | |
intEquationnumber += 1 | |
continue | |
intChapterNumber += 1 | |
intChapterNumber = 1 | |
print("-----------------------------------------------------") | |
print("Processing .//EOAequationnonumber | .//EOAequationarraynonumber") | |
for xmlChapter in xmlChapters: | |
tempImagenumber = 1 | |
xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber") | |
print("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge))) | |
# print ("Working on Chapter " + str(intChapterNumber)) | |
# print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden") | |
for xmlDing in xmlDinge: | |
if xmlDing.tag == "EOAequationarraynonumber": | |
if xmlDing.find(".//texmath") is not None: | |
textSourcecode = xmlDing.find(".//texmath").text | |
else: | |
textSourcecode = xmlDing.text | |
xmlNew = etree.Element('EOAequationarraynonumber') | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
# \rowattributeunknown has to be deleted, its an artefact | |
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode) | |
# TODO: HIer überprüfen, ob und inwiefern es ausreichend ist, EOAequationarraynonumber in eine Grafik zu packen | |
str_latexcode = TeX2PNG(textSourcecode, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) | |
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
xmlNew.set("TeX", str_latexcode) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
tempImagenumber += 1 | |
continue | |
# Push Down loop to parse the raw code (Wird vorerst nicht ausgeführt) | |
textFormel = "" | |
boolBackslash = False | |
for Buchstabe in textSourcecode: | |
if Buchstabe == "\n": | |
continue | |
if Buchstabe == "\\": | |
if boolBackslash == False: | |
textFormel += Buchstabe | |
boolBackslash = True | |
continue | |
if boolBackslash == True: | |
textFormel += Buchstabe | |
str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) | |
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
tmpXML.set("TeX", str_latexcode) | |
xmlNew.append(tmpXML) | |
textFormel = "" | |
boolBackslash = False | |
tempImagenumber += 1 | |
continue | |
if Buchstabe != "\\": | |
textFormel += Buchstabe | |
boolBackslash = False | |
# Typeset last equation | |
str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber)) | |
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
tmpXML.set("TeX", str_latexcode) | |
xmlNew.append(tmpXML) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
continue | |
if xmlDing.tag == "EOAequationnonumber": | |
textSourcecode = xmlDing.find('.//texmath').text | |
# Blank lines need to be removed otherwise TeX won't work | |
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s]) | |
str_latexcode = TeX2PNG(textSourcecode, "EOAequationnonumber", str(intChapterNumber), tempImagenumber) | |
# TODO: HTML-Code für das fertige Bild einfügen (Ist dieser ToDo noch aktuell?) | |
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationnonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png")) | |
xmlNew.set("TeX", str_latexcode) | |
xmlDing.getparent().replace(xmlDing, xmlNew) | |
tempImagenumber += 1 | |
continue | |
intChapterNumber += 1 | |
print("-----------------------------------------------------") | |
print("Converting EOAineq") | |
intChapterNumber = 1 | |
intEOAineqRunningOrder = 1 | |
dictEOAineqs = {} | |
strTeXEquations = "" | |
all_ineq = xmlTree.findall(".//EOAineq") | |
# if all_ineq is not None: | |
if len(all_ineq) > 0: | |
print("Found " + str(len(all_ineq)) + " formulas") | |
for xmlChapter in xmlChapters: | |
print("Chapter " + str(intChapterNumber)) | |
xmlEOAineqs = xmlChapter.findall(".//EOAineq") | |
intEOAineqnumber = 1 | |
for xmlEOAineq in xmlEOAineqs: | |
if xmlEOAineq.find('.//texmath') is not None: | |
strSourceCode = xmlEOAineq.find('.//texmath').text | |
else: | |
strSourceCode = xmlEOAineq.text | |
progress(intEOAineqnumber, len(xmlEOAineqs),"Processing EOAineq %s of %s." % (intEOAineqnumber, len(xmlEOAineqs))) | |
strSourceCode = os.linesep.join([s for s in strSourceCode.splitlines() if s]) | |
# this occurred once in sources 11 | |
strSourceCode = strSourceCode.replace(r"\@root", r"\root") | |
strTeXEquations = strTeXEquations + "$" + strSourceCode + "$\n\\newpage\n" | |
# Add intEOAineqRunningOrder : Filename to dictionary | |
strFilename = "EOAineq_" + str(intChapterNumber) + "_" + str(intEOAineqnumber) | |
dictEOAineqs[intEOAineqRunningOrder] = strFilename | |
# Prepare XML | |
tmpTail = xmlEOAineq.tail | |
xmlEOAineq.clear() | |
xmlEOAineq.tail = tmpTail | |
xmlEOAineq.set("src", strFilename + ".png") | |
xmlEOAineq.set("TeX", strSourceCode) | |
# increment integers | |
intEOAineqRunningOrder += 1 | |
intEOAineqnumber +=1 | |
intChapterNumber += 1 | |
dictRebindedCommands = { | |
"\|ket\|" : r"\\ket", | |
"\|braket\|" : r"\\braket", | |
"\|bra\|" : r"\\bra", | |
"\|Bra\|" : r"\\Bra", | |
"\|Ket\|" : r"\\Ket", | |
"\slashed\|" : r"\\slashed" | |
} | |
for strCommand in dictRebindedCommands.keys(): | |
strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations) | |
tmp = open(TEMPLATE_PATH + "formula.tex", "r") | |
Template = tmp.read() | |
tmp.close() | |
# Get tmp-directory for this user account | |
# tmpDir = os.getenv("TMPDIR") | |
# use local tmpdir | |
formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/" | |
# Make directory items if it doesn't already exist | |
if not os.path.exists(os.getcwd() + "/items"): | |
os.mkdir(os.getcwd() + "/items") | |
s = string.Template(Template) | |
e = s.substitute(DERINHALT=strTeXEquations) | |
tmpFile = formula_tmp_dir + "EOAinline.tex" | |
tmp = open(tmpFile, "w") | |
tmp.write(e) | |
tmp.close() | |
print("Typesetting all Inline Equations") | |
Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile | |
Argumente = shlex.split(Kommando) | |
Datei = open('tmp_files/xelatex-run.log', 'w') | |
Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
print("Splitting all Inline Equations") | |
pdf_burst("EOAinline.pdf", formula_tmp_dir) | |
print("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys())) | |
counter_dictEOAineqs = 1 | |
for intRunningOrder in dictEOAineqs.keys(): | |
# provide more status information here in output! | |
progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys()))) | |
Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
counter_dictEOAineqs += 1 | |
else: | |
print("Found no EOAineq. Continuing") | |
########### | |
# Formula # | |
########### | |
print("-----------------------------------------------------") | |
print("Converting EOAchem") | |
intChapterNumber = 1 | |
int_EOAchem_running_order = 1 | |
dictEOAchems = {} | |
str_tex_chem = "" | |
all_chem = xmlTree.findall(".//EOAchem") | |
# if all_chem is not None: | |
if len(all_chem) > 0: | |
print("Found " + str(len(all_chem)) + " chemical formulas") | |
for xmlChapter in xmlChapters: | |
print("Chapter " + str(intChapterNumber)) | |
xmlEOAchems = xmlChapter.findall(".//EOAchem") | |
int_EOAchem_number = 1 | |
for xml_EOAchem in xmlEOAchems: | |
str_chem_text = xml_EOAchem.text | |
progress(int_EOAchem_number, len(xmlEOAchems),"Processing EOAchem %s of %s." % (int_EOAchem_number, len(xmlEOAchems))) | |
str_chem_text = os.linesep.join([s for s in str_chem_text.splitlines() if s]) | |
str_tex_chem = str_tex_chem + "\ce{" + str_chem_text + "}\n\\newpage\n" | |
# Add int_EOAchem_running_order : Filename to dictionary | |
strFilename = "EOAchem_" + str(intChapterNumber) + "_" + str(int_EOAchem_number) | |
dictEOAchems[int_EOAchem_running_order] = strFilename | |
# Prepare XML | |
tmpTail = xml_EOAchem.tail | |
xml_EOAchem.clear() | |
xml_EOAchem.tail = tmpTail | |
xml_EOAchem.set("src", strFilename + ".png") | |
xml_EOAchem.set("TeX", str_chem_text) | |
# increment integers | |
int_EOAchem_running_order += 1 | |
int_EOAchem_number +=1 | |
intChapterNumber += 1 | |
tmp = open(TEMPLATE_PATH + "formula.tex", "r") | |
Template = tmp.read() | |
tmp.close() | |
# Get tmp-directory for this user account | |
# tmpDir = os.getenv("TMPDIR") | |
# use local tmpdir | |
formula_tmp_dir = os.getcwd() + "/tmp_files/formulas2png/" | |
# Make directory items if it doesn't already exist | |
if not os.path.exists(os.getcwd() + "/items"): | |
os.mkdir(os.getcwd() + "/items") | |
s = string.Template(Template) | |
e = s.substitute(DERINHALT=str_tex_chem) | |
tmpFile = formula_tmp_dir + "EOAchem.tex" | |
tmp = open(tmpFile, "w") | |
tmp.write(e) | |
tmp.close() | |
print("Typesetting all inline Chemical formulas") | |
Kommando = TEXBIN_PATH + "xelatex --halt-on-error " + tmpFile | |
Argumente = shlex.split(Kommando) | |
Datei = open('tmp_files/xelatex-run.log', 'w') | |
Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
print("Splitting all Inline Chemical formulas") | |
pdf_burst("EOAchem.pdf", formula_tmp_dir) | |
print("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys())) | |
counter_dictEOAchems = 1 | |
for intRunningOrder in dictEOAchems.keys(): | |
# provide more status information here in output! | |
progress(counter_dictEOAchems, len(dictEOAchems.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAchems, len(dictEOAchems.keys()))) | |
Kommando = TL_PATH + "texmf-dist/scripts/pdfcrop/pdfcrop.pl " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAchems[intRunningOrder] + ".png" | |
Argumente = shlex.split(Kommando) | |
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei) | |
counter_dictEOAchems += 1 | |
else: | |
print("Found no EOAchem. Continuing") | |
############### | |
# Formula end # | |
############### | |
print("-----------------------------------------------------") | |
print("EOAFigure Numbering per Chapter") | |
for xmlChapter in xmlChapters: | |
Figurenumber = 1 | |
xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure") | |
for xmlFigure in xmlFigures: | |
xmlAnchor = xmlFigure.find("anchor") | |
# Check if Figure is in a numbered Chapter | |
# Otherwise just put the Number of the figure | |
if xmlChapter.get('id'): | |
dictFigures[xmlAnchor.get('id')] = \ | |
str(dictChapters[xmlChapter.get('id')]) + "." + str(Figurenumber) | |
else: | |
dictFigures[xmlAnchor.get('id')] = str(Figurenumber) | |
xmlFigure.set("id", xmlAnchor.get("id")) | |
Figurenumber += 1 | |
print("-----------------------------------------------------") | |
print("Numbering Theorems") | |
for xmlChapter in xmlChapters: | |
xmlTheorems = xmlChapter.findall(".//theorem") | |
for xmlTheorem in xmlTheorems: | |
strUID = xmlTheorem.get("id") | |
strNumber = xmlTheorem.get("id-text") | |
dictTheorems[strUID] = strNumber | |
print("-----------------------------------------------------") | |
print("Section, Subsection,... Numbering per Chapter") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
strUID = xmlChapter.get("id") | |
#dictChapters[strUID] = str(intChapterNumber) | |
xmlSections = xmlChapter.findall("div2") | |
intSectionNumber = 1 | |
for xmlSection in xmlSections: | |
if xmlSection.get("rend") == "nonumber": | |
continue | |
strUID = xmlSection.get("id") | |
if xmlChapter.get("rend") != "nonumber": | |
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictSections[strUID] = str(intSectionNumber) | |
xmlSubsections = xmlSection.findall("div3") | |
intSubsectionNumber = 1 | |
for xmlSubsection in xmlSubsections: | |
if xmlSubsection.get("rend") == "nonumber": | |
continue | |
strUID = xmlSubsection.get("id") | |
if xmlChapter.get("rend") != "nonumber": | |
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) + "." + str(intSubsectionNumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictSections[strUID] = str(intSectionNumber) + "." + str(intSubsectionNumber) | |
intSubsectionNumber += 1 | |
intSectionNumber += 1 | |
if xmlChapter.get("rend") != "nonumber": | |
intChapterNumber += 1 | |
print("-----------------------------------------------------") | |
print("Numbering of Footnotes per Chapter") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
intNoteNumber = 1 | |
xmlFootnotes = xmlChapter.findall(".//note") | |
for xmlFootnote in xmlFootnotes: | |
strUID = xmlFootnote.get("id") | |
dictFootnotes[strUID] = str(intNoteNumber) | |
intNoteNumber += 1 | |
# here was OU's footnote code, now in libeoaconvert | |
# def get_bigfoot_data(chapter) | |
# bigfoot needs to be integrated into | |
# 'fndict': {'uid11': '2', 'uid12': '3', 'uid9': '1'}, | |
# the new-style footnotes that use LaTeX bigfoot show up in the following order: | |
footnote_groups = ["decimal", "lower-latin"] | |
print("-----------------------------------------------------") | |
print("Numbering of Lists per Chapter") | |
for xmlChapter in xmlChapters: | |
xmlListitems = xmlChapter.findall(".//item") | |
for xmlListitem in xmlListitems: | |
strUID = xmlListitem.get("id") | |
strItemNumber = xmlListitem.get("id-text") | |
dictLists[strUID] = strItemNumber | |
print("-----------------------------------------------------") | |
print("Working on Page Numbers for References") | |
listAuxFiles = glob.glob(os.getcwd() + "/*.aux") | |
if len(listAuxFiles) == 0: | |
print("No aux file found. Exiting") | |
sys.exit(1) | |
else: | |
for strFile in listAuxFiles: | |
tmpFile = open(strFile, "r") | |
lines = tmpFile.readlines() | |
tmpFile.close() | |
for line in lines: | |
# hyperref makes the lines much much longer | |
# \newlabel{BL}{{1.1}{4}{Forschungsüberblick zur Literatur über Alvarus Thomas}{section.1.1}{}} | |
# \newlabel{BL}{{1.1}{4}} | |
matched_label = re.match(r'\\newlabel\{(.*?)\}\{\{(.*?)\}\{(.*?)\}', line) | |
# matchObjectLabel = re.match(r'\newlabel\{(.*?)\}', line) | |
if matched_label: | |
# matchObjectPage = re.match(r'(.*?)\}\{(\d{1,})\}\}$', line) | |
# if matchObjectPage: | |
dictPagelabels[matched_label.group(1)] = matched_label.group(3) | |
# parsing out information on cite works | |
matched_citation = re.match(r'\\abx@aux@cite{(.*?)}', line) | |
if matched_citation is not None: | |
set_citations.add(matched_citation.group(1)) | |
print(dictPagelabels) | |
print(set_citations) | |
print("-----------------------------------------------------") | |
print("Numbering of Tables per Chapter") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
intTableNumber = 1 | |
xmlTables = xmlChapter.findall(".//EOAtable") | |
for xmlTable in xmlTables: | |
xmlTableLabel = xmlTable.find(".//EOAtablelabel") | |
strTableCaption = xmlTable.find(".//EOAtablecaption").text | |
if strTableCaption == "nonumber": | |
continue | |
if not xmlTableLabel.text or xmlTableLabel.text == "": | |
xmlTableLabel.text = "table" + str(intChapterNumber) + str(intTableNumber) | |
strUID = xmlTableLabel.text | |
print(strUID) | |
if xmlChapter.get("rend") != "nonumber": | |
dictTables[strUID] = dictChapters[xmlChapter.get('id')] + "." + str(intTableNumber) | |
if xmlChapter.get("rend") == "nonumber": | |
dictTables[strUID] = str(intTableNumber) | |
intTableNumber += 1 | |
print(dictTables) | |
intChapterNumber += 1 | |
############################################################## | |
# Preparing the Bibliography # | |
############################################################## | |
if xmlTree.find(".//EOAbibliographydatabase") is not None: | |
bib_database = xmlTree.find(".//EOAbibliographydatabase").text | |
HAS_BIBLIOGRAPHY = True | |
else: | |
print("No database found.") | |
HAS_BIBLIOGRAPHY = False | |
input() | |
bib_type = xmlTree.find(".//EOAbibliographytype").text | |
if bib_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]: | |
print("The bibtype must be one of ", ",".join[bib_type] + ". Exiting") | |
sys.exit() | |
# the new solution: pandoc-citeproc | |
interim_bib_json_file = (args.filename) + "-bib.json" | |
citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib" | |
logging.debug(citeproc_command) | |
citeproc_arguments = shlex.split(citeproc_command) | |
citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE) | |
citeproc_json = citeproc_process.stdout.read() | |
citations_json = json.loads(citeproc_json) | |
# for x in citations_json: | |
# print(x["title"]) | |
# with open(interim_bib_json_file, 'w') as ibjf: | |
# json.dump(citeproc_json.decode('utf-8'), ibjf) | |
#################### | |
# the old solution # | |
#################### | |
# # Copy interim .bbl-File to interim bib.tex file | |
# interim_bibtex_file = (args.filename) + "bib.tex" | |
# try: | |
# shutil.copy(BIBERFILE, interim_bibtex_file) | |
# except FileNotFoundError: | |
# print("%s has not been created yet. Switch TeX distribution to TeXLive2016, run biber_2.1 -O biber2-1.bbl %s to obtain this file" % (BIBERFILE, args.filename)) | |
# # Read all lines of Bibliographic TeX | |
# tmpFile = open(interim_bibtex_file, "r") | |
# tmpLines = tmpFile.readlines() | |
# tmpFile.close() | |
# # First line should link to Bibliographic Praeambel | |
# tmpLines[0] = "\\include{%spre_bib}\n" % TEMPLATE_PATH | |
# # Remove unwanted lines | |
# for i in range(18,0,-1): | |
# del tmpLines[i] | |
# # Save changes | |
# tmpFile = open(interim_bibtex_file, "w") | |
# tmpFile.writelines(tmpLines) | |
# tmpFile.close() | |
# # TeX has been sanitized, now tralics to make it intermediate XML | |
# print("TeX has been sanitized, now tralics to make it intermediate XML") | |
# Kommando = "%s -log_file %s -confdir %s/tralics_conf -config %s/tralics.tcf -utf8 -utf8output -entnames=false %sbib.tex" % (TRALICS_PATH_EXEC, args.filename + "-bib-tralics.log", TRALICS_PATH_LIB, TRALICS_PATH_LIB, args.filename) | |
# Argumente = shlex.split(Kommando) | |
# Prozess = subprocess.call(Argumente) | |
# # Sanitize XML to make it useable | |
# tmpFile = open((args.filename) + "bib.xml", "r") | |
# tmpContent = tmpFile.read() | |
# tmpFile.close() | |
# listReplace = [ r"<math mode='display' xmlns='http://www.w3.org/1998/Math/MathML'>", | |
# r"<formula textype='displaymath' type='display'>", | |
# r"<mi>", | |
# r"</mi>", | |
# r"<mn>", | |
# r"<mn>", | |
# r"<mo>", | |
# r"</mo>", | |
# r"<mn>", | |
# r"</mn>", | |
# r"<mrow/>", | |
# r"<msup>", | |
# r"</msup>", | |
# r"</math>", | |
# r"</formula>", | |
# r"<formula type='inline'>", | |
# r"<math xmlns='http://www.w3.org/1998/Math/MathML'>", | |
# r"<formula textype='math' type='inline'>", | |
# r"<mrow>uniquename=(.*?),hash=(.*?)</mrow>", | |
# r"<mrow>hash=(.*?)</mrow>", | |
# ] | |
# for strReplace in listReplace: | |
# tmpContent = re.sub(strReplace, "", tmpContent) | |
# # Put Back Underscore _ | |
# tmpContent = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpContent) | |
# # Remove empty Lines | |
# tmpContent = re.sub(r"\n\n", "\n", tmpContent) | |
# # Put back Ampersand | |
# tmpContent = re.sub(r"&", "&", tmpContent) | |
# tmpFile = open((args.filename) + "bib.xml", "w") | |
# tmpFile.write(tmpContent) | |
# tmpFile.close() | |
# # TeXML has been sanitized, now load xml-Tree | |
# xmlParser2 = etree.XMLParser(no_network=False,load_dtd=False) | |
# xmlBibTree = etree.parse((args.filename + "bib.xml"), xmlParser2) | |
# xml_bib_entries = xmlBibTree.findall(".//entry") | |
########################### | |
# end of the old solution # | |
########################### | |
make_bibchecker(bib_database, set_citations) | |
# If Bibliography-Type is monograph search for EOAbibliography and make it all | |
if bib_type == "monograph": | |
if xmlTree.find(".//EOAprintbibliography") is not None: | |
xmlBibliography = xmlTree.find(".//EOAprintbibliography") | |
xmlBibliography.clear() | |
xmlBibliography.tag = "div" | |
bib_parent = xmlBibliography.getparent() | |
bib_parent.tag = "div" | |
xmlBibliographyDiv = etree.Element("div") | |
xmlBibliography.addnext(xmlBibliographyDiv) | |
############### | |
# new version # | |
############### | |
tmp_citation_filename = "used_citations-monograph" | |
formatted_references = libeoaconvert.format_citations(set_citations, bib_database + ".bib", strLanguage, tmp_citation_filename)[0] | |
fixed_entries = libeoaconvert.fix_bib_entries(formatted_references) | |
for entry in fixed_entries: | |
xmlBibliographyDiv.append(entry) | |
###################### | |
# end of new version # | |
###################### | |
############### | |
# old version # | |
############### | |
# xml_bib_entries = xmlBibTree.findall(".//entry") | |
# intNumberOfEntry = 0 | |
# for xmlEntry in xml_bib_entries: | |
# if intNumberOfEntry == 0: | |
# # Don't check for previous author if first entry of the Bibliography | |
# bibEntry = Bibitem(xmlEntry) | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
# else: | |
# bibEntry = Bibitem(xmlEntry) | |
# # Check if author of previous Entry is the same | |
# bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1]) | |
# if bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]: | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>" | |
# elif bibEntryPrevious.fullauthorlastfirst()[0] == bibEntry.fullauthorlastfirst()[0]: | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>" | |
# elif bibEntry.fullauthorlastfirst()[0] == bibEntryPrevious.fullauthorlastfirst()[0]: | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>" | |
# else: | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
# xmlNew = etree.fromstring(strNewentry) | |
# next one writes the bibliography into the document | |
# xmlBibliography.append(xmlNew) | |
# intNumberOfEntry += 1 | |
###################### | |
# end of old version # | |
###################### | |
# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter | |
if bib_type == "anthology": | |
print("bib type is anthology") | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) | |
if xmlChapter.find(".//EOAprintbibliography") is not None: | |
xmlBibliography = xmlChapter.find(".//EOAprintbibliography") | |
xmlBibliography.getparent().tag = "div" | |
xmlBibliographyDiv = etree.Element("div") | |
xmlBibliography.addnext(xmlBibliographyDiv) | |
citekeys = xmlChapter.xpath(".//citekey/text()") | |
set_citations = set(citekeys) | |
formatted_references = libeoaconvert.format_citations(set_citations, bib_database + ".bib", strLanguage, tmp_citation_filename)[0] | |
fixed_entries = libeoaconvert.fix_bib_entries(formatted_references) | |
for entry in fixed_entries: | |
xmlBibliographyDiv.append(entry) | |
else: | |
# create an empty file | |
open("tmp_files" + os.path.sep + tmp_citation_filename + "_nocites", 'a').close() | |
""" | |
<div2 rend="nonumber"> | |
<head>References</head> | |
<div> | |
<EOAprintbibliography/> | |
<div> | |
<p class="bibliography">Abril Castelló, Vidal (1987). Las Casas contra Vitoria, 1550–1552: La revolución de la duodécima réplica. Causas y consecuencias. <i>Revista de Indias</i> 47(179):83–101.</p> | |
<p class="bibliography">Agrawal, Arun (1995). Dismantling the Divide Between Indigenous and Scientific Knowledge. <i>Development and Change</i> 26:413–439.</p> | |
</div> | |
</div> | |
</div2> | |
""" | |
############### | |
# old version # | |
############### | |
# xmlRefsections = xmlBibTree.findall(".//refsection") | |
# for xmlRefsection in xmlRefsections: | |
# if xmlRefsection.find(".//number").text == str(intChapterNumber): | |
# break | |
# xml_bib_entries = xmlRefsection.findall(".//entry") | |
# intNumberOfEntry = 0 | |
# for xmlEntry in xml_bib_entries: | |
# if intNumberOfEntry == 0: | |
# # Don't check for previous author if first entry of the Bibliography | |
# bibEntry = Bibitem(xmlEntry) | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
# xmlNew = etree.fromstring(strNewentry) | |
# xmlBibliographyDiv.append(xmlNew) | |
# else: | |
# bibEntry = Bibitem(xmlEntry) | |
# # Check if author of previous Entry is the same | |
# bibEntryPrevious = Bibitem(xml_bib_entries[intNumberOfEntry - 1]) | |
# if bibEntry.fullauthorlastfirst() == bibEntryPrevious.fullauthorlastfirst(): | |
# print(bibEntry.citekey()) | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=True) + "</p>" | |
# xmlNew = etree.fromstring(strNewentry) | |
# xmlBibliographyDiv.append(xmlNew) | |
# else: | |
# print(bibEntry.citekey()) | |
# strNewentry = "<p class=\"bibliography\">" + createBibEntryAuthorYear(bibEntry, boolSameAuthor=False) + "</p>" | |
# xmlNew = etree.fromstring(strNewentry) | |
# xmlBibliographyDiv.append(xmlNew) | |
# intNumberOfEntry += 1 | |
###################### | |
# end of old version # | |
###################### | |
intChapterNumber += 1 | |
# for the time being | |
strCitation = "" | |
# Bibliographies are done, now for the citations | |
if bib_type == "anthology" or bib_type == "monograph": | |
intChapterNumber = 1 | |
if bib_type == "monograph": | |
tmp_citation_filename = "used_citations-monograph" | |
tmp_path_html = "tmp_files" + os.path.sep + tmp_citation_filename + ".html" | |
with open(tmp_path_html, "r") as formatted_citations: | |
form_cit = BeautifulSoup(formatted_citations, "html.parser") | |
for xmlChapter in xmlChapters: | |
print("-----------------------------------------------------") | |
print("Processing References for Chapter " + str(intChapterNumber)) | |
xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual") | |
if bib_type == "anthology": | |
tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) | |
tmp_path_html = "tmp_files" + os.path.sep + tmp_citation_filename + ".html" | |
no_cite_path = "tmp_files" + os.path.sep + tmp_citation_filename + "_nocites" | |
if os.path.exists(tmp_path_html): | |
with open(tmp_path_html, "r") as formatted_citations: | |
form_cit = BeautifulSoup(formatted_citations, "html.parser") | |
elif os.path.exists(no_cite_path): | |
print("no cites in this chapter") | |
intChapterNumber += 1 | |
continue | |
counter_citations = 1 | |
for xmlCitation in xmlCitations: | |
string_citekey = xmlCitation.find("./citekey").text | |
progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey)) | |
# If Bibliography-Type is anthology find Refsection for this Chapter | |
############### | |
# old version # | |
############### | |
# if bib_type == "anthology": | |
# print("Yes, it's anthology time!") | |
# xmlRefsections = xmlBibTree.findall(".//refsection") | |
# for xmlRefsection in xmlRefsections: | |
# if xmlRefsection.find(".//number").text == str(intChapterNumber): | |
# break | |
# xml_bib_entries = xmlRefsection.findall(".//entry") | |
################### | |
# end old version # | |
################### | |
# If Bibliography-Type is monograph find all entries, forget about refsection | |
############### | |
# old version # | |
############### | |
""" | |
if bib_type == "monograph": | |
xml_bib_entries = xmlBibTree.findall(".//entry") | |
for xmlEntry in xml_bib_entries: | |
bibEntry = Bibitem(xmlEntry) | |
if bibEntry.citekey() == xmlCitation.find("./citekey").text: | |
if xmlCitation.tag == "EOAciteauthoryear": | |
strCitation = bibEntry.shortauthor() + " " + bibEntry.labelyear() | |
if bibEntry.labelyearsuffix() is not None: | |
strCitation = strCitation + bibEntry.labelyearsuffix() | |
strTitle = bibEntry.title() | |
if xmlCitation.tag == "EOAciteyear": | |
strCitation = bibEntry.labelyear() | |
if bibEntry.labelyearsuffix() is not None: | |
strCitation = strCitation + bibEntry.labelyearsuffix() | |
strTitle = bibEntry.title() | |
if xmlCitation.tag == "EOAcitemanual": | |
cite_text = xmlCitation.find("citetext") | |
if cite_text.getchildren(): | |
tmp_string = xmlCitation.find("citetext") | |
tmp_string = cite_text.getchildren()[0] | |
strCitation = etree.tostring(tmp_string) | |
# BAUSTELLE!!!!! | |
# tmp_string2 = etree.tostring(tmp_string) | |
# tmp_string3 = tmp_string2.decode() | |
# strCitation = tmp_string3.replace("<", "<") | |
else: | |
strCitation = xmlCitation.find("citetext").text | |
strTitle = bibEntry.title() | |
if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: | |
strCitation = strCitation + ", " + xmlCitation.find("./page").text | |
""" | |
###################### | |
# end of old version # | |
###################### | |
############### | |
# new version # | |
############### | |
# string_citekey = xmlCitation.find("./citekey").text | |
for entry in citations_json: | |
if entry["id"] == string_citekey: | |
current_citation = entry | |
strTitle = current_citation["title"] | |
# [1:-1] to remove parentheses around citations | |
try: | |
citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] | |
except IndexError: | |
print("Could not find {}. Exiting.".format(string_citekey)) | |
sys.exit() | |
data_title_value = citeauthoryear_value | |
if xmlCitation.tag == "EOAciteauthoryear": | |
strCitation = citeauthoryear_value | |
elif xmlCitation.tag == "EOAciteyear": | |
strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] | |
elif xmlCitation.tag == "EOAcitemanual": | |
cite_text = xmlCitation.find("citetext") | |
if cite_text.getchildren(): | |
tmp_string = xmlCitation.find("citetext") | |
tmp_string = cite_text.getchildren()[0] | |
strCitation = etree.tostring(tmp_string) | |
# BAUSTELLE!!!!! | |
# tmp_string2 = etree.tostring(tmp_string) | |
# tmp_string3 = tmp_string2.decode() | |
# strCitation = tmp_string3.replace("<", "<") | |
else: | |
strCitation = xmlCitation.find("citetext").text | |
if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: | |
pages_text = libeoaconvert.gettext(xmlCitation.find("./page")) | |
strCitation = strCitation + ", " + pages_text | |
data_title_value = data_title_value + ", " + pages_text | |
# strCitation = strCitation + ", " + xmlCitation.find("./page").text | |
###################### | |
# end of new version # | |
###################### | |
# Hier den XML-Tag durch die Quellenangabe ersetzen | |
tmpTail = xmlCitation.tail | |
xmlCitation.clear() | |
xmlCitation.tag = "span" | |
xmlCitation.set("rel", "popover") | |
xmlCitation.set("class", "citation") | |
xmlCitation.set("citekey", string_citekey) | |
xmlCitation.text = strCitation | |
xmlCitation.tail = tmpTail | |
# Create Link to be used for website in a popover | |
xmlCitation.set("data-toggle", "popover") | |
xmlCitation.set("html", "true") | |
xmlCitation.set("data-placement", "bottom") | |
xmlCitation.set("data-title", data_title_value) | |
try: | |
xmlCitation.set("data-content", strTitle) | |
except: | |
xmlCitation.set("data-content", "missing") | |
counter_citations += 1 | |
intChapterNumber += 1 | |
# If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all | |
if bib_type == "monograph-numeric": | |
if xmlTree.find(".//EOAprintbibliography") is not None: | |
dictCitekeysNumbers = {} | |
dictCitekeysTitles = {} | |
xmlBibliography = xmlTree.find(".//EOAprintbibliography") | |
xmlBibliography.clear() | |
xmlBibliography.tag = "div" | |
xmlBibliography.getparent().tag = "div" | |
xml_bib_entries = xmlBibTree.findall(".//entry") | |
intNumberOfEntry = 1 | |
for xmlEntry in xml_bib_entries: | |
# Go through all entries and assign a number to the citekey | |
bibEntry = Bibitem(xmlEntry) | |
strCitekey = bibEntry.citekey() | |
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) | |
dictCitekeysTitles[strCitekey] = str(bibEntry.title()) | |
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliography.append(xmlNew) | |
intNumberOfEntry += 1 | |
# Now for the references via EOAcitenumeric | |
xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric") | |
for xmlCitenumeric in xmlCitenumerics: | |
print(etree.tostring(xmlCitenumeric)) | |
strPopover = "" | |
tmpCitekeys = xmlCitenumeric.find(".//citekey").text | |
tmpCitekeys = re.sub(" ", "", tmpCitekeys) | |
tmpCitekeys = re.sub("\n", "", tmpCitekeys) | |
listCitekeys = re.split("\,", tmpCitekeys) | |
listCitenumbers = [] | |
for strCitekey in listCitekeys: | |
listCitenumbers.append(dictCitekeysNumbers[strCitekey]) | |
# Create Text to be used on the website in a popover | |
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " | |
listCitenumbers = sorted(listCitenumbers, key=int) | |
strResult = "[" + listCitenumbers[0] | |
intNumberOfSequentialCite = 0 | |
for i in range(1,len(listCitenumbers)): | |
intPreviousCitenumber = int(listCitenumbers[i-1]) | |
intCurrentCitenumber = int(listCitenumbers[i]) | |
if i == (len(listCitenumbers)-1): | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
if intNumberOfSequentialCite == 0: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
else: | |
strResult = strResult + "-" + str(listCitenumbers[i]) | |
intNumberOfSequentialCite == 0 | |
else: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
break | |
intNextCitenumber = int(listCitenumbers[i+1]) | |
if (intCurrentCitenumber + 1) != intNextCitenumber: | |
if intNumberOfSequentialCite != 0: | |
strResult = strResult + "-" + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
continue | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
intNumberOfSequentialCite += 1 | |
continue | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
strResult = strResult + "]" | |
xmlCitenumeric.text = strResult | |
# Create Link to be used for website | |
xmlCitenumeric.set("data-toggle", "popover") | |
xmlCitenumeric.set("html", "true") | |
xmlCitenumeric.set("data-content", strPopover) | |
xmlCitenumeric.set("class","citation") | |
xmlCitenumeric.set("data-placement", "bottom") | |
xmlCitenumeric.set("data-title", strResult) | |
# author is missing! | |
# print("xmlBibliography") | |
# print(etree.tostring(xmlBibliography)) | |
# input() | |
# Numeric citations for the individual chapters | |
if bib_type == "anthology-numeric": | |
intChapterNumber = 1 | |
for xmlChapter in xmlChapters: | |
print("Processing Bibliography") | |
if xmlChapter.find(".//EOAprintbibliography") is not None: | |
dictCitekeysNumbers = {} | |
dictCitekeysTitles = {} | |
xmlBibliography = xmlChapter.find(".//EOAprintbibliography") | |
#xmlBibliography.clear() | |
xmlBibliography.tag = "div" | |
xmlBibliography.getparent().tag = "div" | |
xmlRefsections = xmlBibTree.findall(".//refsection") | |
for xmlRefsection in xmlRefsections: | |
if xmlRefsection.find(".//number").text == str(intChapterNumber): | |
break | |
xml_bib_entries = xmlRefsection.findall(".//entry") | |
intNumberOfEntry = 1 | |
for xmlEntry in xml_bib_entries: | |
# Go through all entries and assign a number to the citekey | |
bibEntry = Bibitem(xmlEntry) | |
strCitekey = bibEntry.citekey() | |
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry) | |
dictCitekeysTitles[strCitekey] = str(bibEntry.title()) | |
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>" | |
xmlNew = etree.fromstring(strNewentry) | |
xmlBibliography.append(xmlNew) | |
intNumberOfEntry += 1 | |
# Now for the references via EOAcitenumeric | |
xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear") | |
print("Found numeric citation in chapter " + str(intChapterNumber)) | |
for xmlCitenumeric in xmlCitenumerics: | |
strPopover = "" | |
tmpCitekeys = xmlCitenumeric.find(".//citekey").text | |
tmpCitekeys = re.sub(" ", "", tmpCitekeys) | |
tmpCitekeys = re.sub("\n", "", tmpCitekeys) | |
print(tmpCitekeys) | |
listCitekeys = re.split("\,", tmpCitekeys) | |
listCitenumbers = [] | |
for strCitekey in listCitekeys: | |
print(strCitekey) | |
listCitenumbers.append(dictCitekeysNumbers[strCitekey]) | |
# Create Text to be used on the website in a popover | |
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " " | |
listCitenumbers = sorted(listCitenumbers, key=int) | |
strResult = "[" + listCitenumbers[0] | |
intNumberOfSequentialCite = 0 | |
for i in range(1,len(listCitenumbers)): | |
intPreviousCitenumber = int(listCitenumbers[i-1]) | |
intCurrentCitenumber = int(listCitenumbers[i]) | |
if i == (len(listCitenumbers)-1): | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
if intNumberOfSequentialCite == 0: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
else: | |
strResult = strResult + "-" + str(listCitenumbers[i]) | |
intNumberOfSequentialCite == 0 | |
else: | |
strResult = strResult + "," + str(listCitenumbers[i]) | |
break | |
intNextCitenumber = int(listCitenumbers[i+1]) | |
if (intCurrentCitenumber + 1) != intNextCitenumber: | |
if intNumberOfSequentialCite != 0: | |
strResult = strResult + "-" + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
continue | |
if (intPreviousCitenumber + 1) == intCurrentCitenumber: | |
intNumberOfSequentialCite += 1 | |
continue | |
else: | |
strResult = strResult + "," + str(intCurrentCitenumber) | |
intNumberOfSequentialCite = 0 | |
strResult = strResult + "]" | |
xmlCitenumeric.text = strResult | |
# Create Link to be used for website in a popover | |
xmlCitenumeric.set("data-toggle", "popover") | |
xmlCitenumeric.set("data-placement", "bottom") | |
xmlCitenumeric.set("data-title", " " + strResult) | |
xmlCitenumeric.set("data-content", strPopover) | |
xmlCitenumeric.set("class","citation") | |
intChapterNumber += 1 | |
# this is somewhat luzzini-specific | |
bib_parent_element = xmlBibliography.getparent() | |
upper_div = bib_parent_element.xpath("./ancestor::div1")[0] | |
previous_div0 = upper_div.getparent() | |
# possible culprit for not finding the index | |
# other_content = bib_parent_element.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex") | |
other_content = upper_div.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex") | |
if len(other_content) > 0: | |
for element in other_content: | |
previous_div0.append(element) | |
# here followed the conversion to epub and the conversion to django.xml | |
# both parts were removed and put into separate files. | |
intermediate_file_pre = "tmp_files/IntermediateXMLFile_pre.xml" | |
intermediate_file = "tmp_files/IntermediateXMLFile.xml" | |
ergebnisdatei = open(intermediate_file_pre, "w") | |
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode") | |
ergebnisdatei.write(ergebnis) | |
ergebnisdatei.close() | |
# replacing a milestone element by a closing and opening combination | |
with open(intermediate_file_pre, 'r') as tmp_file: | |
filedata = tmp_file.read() | |
# add XML declaration | |
filedata_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n" + filedata | |
# Replace the target string | |
filedata_clean = filedata_declaration.replace('<msparbreak/>', '</p><p>') | |
# Write the file out again | |
with open(intermediate_file, 'w') as outfile: | |
outfile.write(filedata_clean) | |
# saving some data | |
data_to_pickle = {'chapterdict' : dictChapters, | |
'eqdict' : dictEquations, | |
'listdict' : dictLists, | |
'theoremdict' : dictTheorems, | |
'figdict' : dictFigures, | |
'secdict' : dictSections, | |
'fndict' : dictFootnotes, | |
'tabdict' : dictTables, | |
'pagelabeldict' : dictPagelabels | |
} | |
with open('tmp_files/data.pickle', 'wb') as f: | |
# Pickle the 'data' dictionary using the highest protocol available. | |
pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL) | |
grep_command = "grep -A1 -B2 'argument of \\\EOAfn' %s-tralics.log" % args.filename | |
grep_command_arguments = shlex.split(grep_command) | |
grep_result = subprocess.Popen(grep_command_arguments, stdout=subprocess.PIPE) | |
grep_output = grep_result.stdout.read() | |
if len(grep_output) > 0: | |
print("\n===\nFootnotes with paragraphs were found. They have to be replaced by the \EOAfnpar command.\n") | |
print(grep_output.decode("utf-8")) | |
print("===\n") | |
print("Removing temporary files.") | |
cleanup() | |
print("Done!") | |
sys.exit() |