Skip to content
Permalink
1793c57b61
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
executable file 1730 lines (1561 sloc) 79.6 KB
#!/usr/bin/env python3
# -*- coding: utf-8; mode: python -*-
# Time-stamp: <2019-03-20 17:50:39 (kthoden)>
"""
Converts Latex files into a customized DocBook XML file.
The program depends on the external program tralics for the conversion
as well as xelatex, pdfcrop (part of latex distributions) and
pandoc-citeproc for additional formatting.
"""
# license?
__version__= "1.0"
__author__ = "Klaus Thoden"
__date__="20171205"
# can the job done by BeautifulSoup also be done by lxml.html soupparser?
# as described in http://infohost.nmt.edu/tcc/help/pubs/pylxml/web/soupparser.html
# from lxml.html import soupparser
from utils.libeoabibitem import Bibitem
import utils.libeoaconvert as libeoaconvert
from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite
import utils.bib2html as bib2html
# imports
import argparse
from lxml import etree
from bs4 import BeautifulSoup
import glob
import os
import re
import string
import shlex
import json
import subprocess
import sys
import shutil
import logging
import pickle
from pathlib import Path
import time
BASE_DIR = Path( __file__ ).resolve().parent
SCRIPT_PATH = Path( __file__ )
SCRIPT_NAME = SCRIPT_PATH.stem
#####################
# Parsing arguments #
#####################
parser = argparse.ArgumentParser(
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)
parser.add_argument(
"-c", "--config",
default = BASE_DIR / "config" / "eoaconvert.cfg",
help="Name of config file"
)
parser.add_argument(
"-l", "--log-file",
default = Path("logs", SCRIPT_NAME).with_suffix(".log"),
help="logfile"
)
parser.add_argument(
"--log-level",
default = "INFO",
help="log level: choose between DEBUG, INFO, WARNING, ERROR, CRITICAL"
)
parser.add_argument(
"--tei-guidelines",
default = "tei/TEI",
help="path to the https://github.com/TEIC/TEI"
)
parser.add_argument(
"--tei-stylesheets",
default = "tei/Stylesheets",
help="path to the https://github.com/TEIC/Stylesheets"
)
parser.add_argument(
"-f", "--filename",
required = True,
help="Name of main EOATeX file (without suffix!)."
)
parser.add_argument(
"--latex-dir",
default = "./latex-out",
help="directory where to find the output generated by eoatex2pdf.py"
)
parser.add_argument(
"-o", "--output-dir",
default = "./imxml",
help="where to dump all output files"
)
parser.add_argument(
"-t", "--trash",
help="Remove temporary files."
)
args = parser.parse_args()
CONFIG_FILE = args.config
print("The configfile is %s." % CONFIG_FILE)
# current biber is not compatible with this code
# switch TeX distribution to TeXLive2016,
# run biber_2.1 -O biber2-1n.bbl $INPUT to obtain this file
BIBERFILE = "biber2-1.bbl"
##################################
# Reading the configuration file #
##################################
CONFIG = load_config(
CONFIG_FILE,
args.log_level,
args.log_file,
)
########################
# Paths to executables #
########################
GM_PATH = "gm"
TRALICS_PATH_EXEC = "tralics"
PDFCROP_EXEC = "pdfcrop" # (part of texlive distribution):
# TL_PATH = CONFIG['Executables']['texlive']
# TEXBIN_PATH = CONFIG['Executables']['texbin']
############################
# Paths to auxiliary files #
############################
TRALICS_PATH_LIB = BASE_DIR / CONFIG['Auxiliaries']['TRALICS_PATH_LIB']
TEMPLATE_PATH = BASE_DIR / CONFIG['Auxiliaries']['template_path']
SUPPORT_PATH = BASE_DIR / CONFIG['Auxiliaries']['support_path']
############################
# Paths:
############################
INPUT_DIR = Path( args.filename ).resolve().parent
INPUT_PATH_NO_EXT = args.filename
OUTPUT_DIR = Path( args.output_dir )
LATEX_DIR = Path ( args.latex_dir )
CONVERT_DIR = OUTPUT_DIR / "CONVERT"
# CONVERT_DIR = os.getcwd() + os.path.sep + "CONVERT"
TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"
# where to output the xml file:
XML_FILE = OUTPUT_DIR / (INPUT_PATH_NO_EXT + ".xml")
#################################################
# Checking for existance of tools and libraries #
#################################################
# sanity check:
logging.debug("PATH: {}".format( os.environ['PATH'] ))
check_executable( GM_PATH )
check_executable( TRALICS_PATH_EXEC )
check_executable( PDFCROP_EXEC )
if not os.path.exists(TRALICS_PATH_LIB):
logging.error(f"Cannot find the Tralics configuration at {TRALICS_PATH_LIB}. Exiting.")
sys.exit()
##################################
# Setting up various directories #
##################################
if not os.path.exists(OUTPUT_DIR):
os.mkdir( OUTPUT_DIR )
if not os.path.exists(TEMP_DIR):
os.mkdir( TEMP_DIR )
if not os.path.exists( TEMP_DIR / "formulas2png" ):
os.mkdir( TEMP_DIR / "formulas2png" )
if not os.path.exists( DEBUG_DIR ):
os.mkdir( DEBUG_DIR )
# Copy Support-Files from /Library/MPIWG to current directory
shutil.copy(SUPPORT_PATH / "classes.dtd", OUTPUT_DIR)
shutil.copy(SUPPORT_PATH / "mathml2-qname-1.mod", OUTPUT_DIR)
shutil.copy(SUPPORT_PATH / "mathml2.dtd", OUTPUT_DIR)
copy_dir_overwrite(SUPPORT_PATH / "html", (OUTPUT_DIR / "html"))
copy_dir_overwrite(SUPPORT_PATH / "iso8879", (OUTPUT_DIR / "iso8879"))
copy_dir_overwrite(SUPPORT_PATH / "iso9573-13", (OUTPUT_DIR / "iso9573-13"))
copy_dir_overwrite(SUPPORT_PATH / "mathml", (OUTPUT_DIR / "mathml"))
########################################
# Certain functions for specific tasks #
########################################
def getchildren(xmlElement):
"""Include all subelements"""
1 + 1
return xmlElement
# def getchildren ends here
def TeX2PNG(LaTeXCode, Type, Chapter, Number):
"""Function to render LaTeX-Code into PNG-Files, returns PNG-Filename (epub & django)"""
# logging.info( f"TeX2PNG({LaTeXCode}, {Type}, {Chapter}, {Number})" )
# Dictionary contains Type:begin/end
Types = {
"EOAineq" : ["$", "$"],
"EOAequation" : ["\\begin{equation*}", "\\end{equation*}"],
"EOAequationnonumber" : ["\\begin{equation*}", "\\end{equation*}"],
"EOAequationarray" : ["\\begin{align*}", "\\end{align*}"],
"EOAequationarraynonumber" : ["\\begin{align*}", "\\end{align*}"]
}
LaTeXCode = Types[Type][0] + LaTeXCode + Types[Type][1]
dictRebindedCommands = {
"\|ket\|" : r"\\ket",
"\|braket\|" : r"\\braket",
"\|bra\|" : r"\\bra",
"\|Bra\|" : r"\\Bra",
"\|Ket\|" : r"\\Ket",
"\slashed\|" : r"\\slashed"
}
for strCommand in dictRebindedCommands.keys():
LaTeXCode = re.sub(strCommand, dictRebindedCommands[strCommand], LaTeXCode)
# Open plain LaTeX-Template
tmp = open(TEMPLATE_PATH / "formula.tex", "r")
Template = tmp.read()
tmp.close()
# Get tmp-directory for this user account
# tmpDir = os.getenv("TMPDIR")
# use local tmpdir
formula_tmp_dir = TEMP_DIR / "formulas2png"
# Make directory items if it doesn't already exist
items_dir = OUTPUT_DIR / "items"
if not os.path.exists( items_dir ):
os.mkdir( items_dir )
s = string.Template(Template)
e = s.substitute(DERINHALT=LaTeXCode)
tmpFile = formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".tex" )
tmp = open(tmpFile, "w")
tmp.write(e)
tmp.close()
Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute())
Argumente = shlex.split(Kommando)
# Redirecting stderr to save XeLaTeX-Output
Datei = open(TEMP_DIR / 'xelatex-run.log', 'w')
Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
if Ergebnis == 0:
logging.info("Successfully converted formula " + Type + str(Chapter) + "_" + str(Number))
if Ergebnis == 1:
logging.error("Failed to convert formula " + Type + str(Chapter) + "_" + str(Number))
Kommando = "{cmd} {arg1} {arg2}".format(
cmd=PDFCROP_EXEC,
arg1=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf")).absolute(),
arg2=(formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute()
)
# Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + ".pdf " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format(
cmd=GM_PATH,
arg1 = (formula_tmp_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf")).absolute(),
arg2 = (items_dir / (Type + "_" + str(Chapter) + "_" + str(Number) + ".png")).absolute()
)
# Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + Type + "_" + str(Chapter) + "_" + str(Number) + "a.pdf " + os.getenv("PWD") + "/items/" + Type + "_" + str(Chapter) + "_" + str(Number) + ".png"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
# logging.info("TeX2PNG done")
return LaTeXCode
# def TeX2PNG ends here
def make_latex_bibl_file(
bib_database,
set_citations,
files
):
"""Construct a separate latex file with bibliography.
The HTML bibliography is still not perfectly formatted like the
LaTeX version. To check both files, a separate file is made that and
which is then also converted in the various formats.
"""
string_citations = ", ".join(set_citations)
for (input_path, output_path) in files:
with open(input_path, "r") as tmp_latex:
largebib_template = tmp_latex.read()
largebib_template_string = string.Template( largebib_template )
largebib_replacement = largebib_template_string.substitute(
INSERT_BIB_DATABASE = bib_database,
INSERT_CITEKEYS = string_citations
)
with open(output_path, "w") as tmp_latex:
tmp_latex.write(largebib_replacement)
# def make_latex_bibl_file ends here
def sanitize_bibentry(bibEntry):
"""Some additional cleanup actions"""
bibEntry = bibEntry.replace(". , ", ", ")
bibEntry = bibEntry.replace("vols..", "vols.")
return(bibEntry.strip())
# def sanitize_bibentry ends here
'''
def createBibEntryAuthorYear(bibEntry, boolSameAuthor):
"""Function to create a complete Entry of a publication (epub & django) for author-year citation"""
strBibEntry = ""
if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types:
logging.error("You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types)))
sys.exit()
bool_edited_book = False
book_without_author = False
editor_postfix = bibEntry.fullauthorlastfirst()[1]
if boolSameAuthor == False:
if len(editor_postfix) != 0:
if editor_postfix == "no_author_only_title":
book_without_author = True
strAuthor = bibEntry.title()
else:
bool_edited_book = True
strAuthor = bibEntry.fullauthorlastfirst()[0] + ", " + editor_postfix
else:
strAuthor = bibEntry.fullauthorlastfirst()[0]
if boolSameAuthor == True:
# if there is no author, but a publisher, the localized
# postfix is returned from the function together with the name in a tuple
if len(editor_postfix) != 0:
bool_edited_book = True
strAuthor = "– " + editor_postfix
else:
strAuthor = "–"
# Next line good for debugging
# print(bibEntry.citekey(), strAuthor)
if bibEntry.entrytype() == "book":
if bool_edited_book == True:
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url()
elif book_without_author == True:
strBibEntry = "<i>" + str(bibEntry.title()) + "</i> " + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + ")." + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url()
else:
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " + bibEntry.editor() + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + str(bibEntry.location()) + bibEntry.pages() + ". " + bibEntry.url()
if bibEntry.entrytype() == "booklet":
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i>" + str(bibEntry.location()) + bibEntry.howpublished() + "."
if bibEntry.entrytype() == "report":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>"
if bibEntry.entrytype() == "thesis":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>" + bibEntry.thesistype() + bibEntry.institution() + "." + bibEntry.url()
if bibEntry.entrytype() == "misc":
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). <i>" + str(bibEntry.title()) + "</i> " + bibEntry.howpublished() + ". " + bibEntry.note() + bibEntry.location() + bibEntry.thesistype() + "." + bibEntry.url()
if bibEntry.entrytype() == "incollection":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + bibEntry.booktitle() + ". " + bibEntry.editor() + bibEntry.edition() + bibEntry.volumenumeric() + bibEntry.seriesnumber() + bibEntry.note() + bibEntry.location() + bibEntry.pages() + "." + bibEntry.url()
if bibEntry.entrytype() == "inproceedings":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). " + bibEntry.title() + bibEntry.booktitle() + bibEntry.editor() + bibEntry.series() + bibEntry.location() + bibEntry.pages()
if bibEntry.entrytype() == "article":
strBibEntry = strAuthor + " (" + str(bibEntry.labelyear()) + str(bibEntry.labelyearsuffix()) + "). " + str(bibEntry.title()) + str(bibEntry.journaltitle()) + bibEntry.volumenumberpages() + ". " + bibEntry.note() + bibEntry.url()
if bibEntry.entrytype() == "newspaper":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + "). <i>" + bibEntry.title() + "</i>"
# print(strBibEntry)
return sanitize_bibentry(strBibEntry)
# def createBibEntryAuthorYear ends here
'''
def createBibEntryNumeric(bibEntry):
"""Function to create a complete Entry of a publication (epub & django) for numeric citation"""
strBibEntry = ""
if bibEntry.entrytype() not in libeoaconvert.allowed_bibentry_types:
logging.error("You cannot use the entry type %s in entry %s. Allowed entry types are: %s.\n" % (bibEntry.entrytype(), bibEntry.citekey() , ", ".join(libeoaconvert.allowed_bibentry_types)))
sys.exit()
strAuthor = bibEntry.fullauthorfirstlast()
if bibEntry.entrytype() == "book":
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>." + bibEntry.location() + ", " + bibEntry.year()
if bibEntry.entrytype() == "booklet":
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.howpublished() + ". " + bibEntry.location() + ", " + bibEntry.year()
if bibEntry.entrytype() == "report":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."
if bibEntry.entrytype() == "thesis":
strBibEntry = strAuthor + ". <i>" + bibEntry.title() + "</i>. " + bibEntry.thesistype() + bibEntry.institution() + ", " + bibEntry.year()
if bibEntry.entrytype() == "misc":
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". "
if bibEntry.entrytype() == "incollection":
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + bibEntry.editor() + ". " + bibEntry.location() + ", " + bibEntry.year() + ". " + bibEntry.pages() + "."
if bibEntry.entrytype() == "inproceedings":
strBibEntry = strAuthor + ". " + bibEntry.title() + ". " + bibEntry.booktitle() + ". " + bibEntry.volumenumeric() + bibEntry.year() + ". " + bibEntry.pages() + "."
if bibEntry.entrytype() == "article":
strBibEntry = strAuthor + ". " + bibEntry.title() + "<i>" + bibEntry.journaltitle() + "</i> " + bibEntry.volumenumberpages() + " (" + bibEntry.year() + "):" + bibEntry.pages() + "."
if bibEntry.entrytype() == "newspaper":
strBibEntry = strAuthor + " (" + bibEntry.labelyear() + bibEntry.labelyearsuffix() + ") <i>" + bibEntry.title() + "</i>."
return strBibEntry
# def createBibEntryNumeric ends here
def pdf_burst(input_file, tmpDir):
"""Split PDF file into single pages"""
from PyPDF2 import PdfFileWriter, PdfFileReader
input1 = PdfFileReader(open(tmpDir / input_file, "rb"))
logging.debug("Input is %s and has %d pages." % (input_file, input1.getNumPages()))
for pageno in range(input1.getNumPages()):
output = PdfFileWriter()
output.addPage(input1.getPage(pageno))
output_filename = tmpDir / ("EOAformulas_%d.pdf" % (pageno + 1))
output_stream = open(output_filename, 'wb')
output.write(output_stream)
output_stream.close()
logging.debug("Wrote %s." % output_filename)
pageno += 1
# def pdf_burst ends here
def progress(count, total, status=''):
"""Progress bar for command line. Taken from
https://gist.github.com/vladignatyev/06860ec2040cb497f0f3"""
bar_len = 60
filled_len = int(round(bar_len * count / float(total)))
percents = round(100.0 * count / float(total), 1)
bar = '#' * filled_len + '-' * (bar_len - filled_len)
sys.stdout.write('[%s] %s%s ... %s\r' % (bar, percents, '%', status))
sys.stdout.flush()
# def progress ends here
def cleanup():
"""Remove support files"""
try:
os.remove(OUTPUT_DIR / "classes.dtd")
os.remove(OUTPUT_DIR / "mathml2-qname-1.mod")
os.remove(OUTPUT_DIR / "mathml2.dtd")
shutil.rmtree(OUTPUT_DIR / "html")
shutil.rmtree(OUTPUT_DIR / "iso8879")
shutil.rmtree(OUTPUT_DIR / "iso9573-13")
shutil.rmtree(OUTPUT_DIR / "mathml")
# shutil.rmtree((os.getcwd() + "/mathml2"))
logging.debug("Removed support files.")
except:
logging.info("No temporary files were found.")
# def cleanup ends here
# Remove temporary files, neccessary for troubleshooting
if args.trash == "temp":
cleanup()
sys.exit()
##############################################################
# Preparing the main document #
##############################################################
# .tex -> .xml
def run_tralics(
input_file,
TRALICS_PATH_LIB,
TRALICS_LOG_PATH,
output_dir = OUTPUT_DIR,
):
fixed_tex_file_path = output_dir / Path(input_file).name
libeoaconvert.enable_preamble(
input_file,
fixed_tex_file_path,
"xml"
)
# Convert TeX to XML via Tralics
logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {TRALICS_LOG_PATH}" )
exec_command(
"{cmd} -log_file {log_file} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format(
cmd = TRALICS_PATH_EXEC,
log_file = TRALICS_LOG_PATH,
conf_dir = TRALICS_PATH_LIB,
output_dir = output_dir,
input_dir = output_dir,
input_file = input_file,
),
ignore_fail = True # :-D
)
# .tex -> .xml
run_tralics(
input_file = INPUT_PATH_NO_EXT + '.tex',
TRALICS_PATH_LIB = TRALICS_PATH_LIB,
TRALICS_LOG_PATH = (INPUT_PATH_NO_EXT + "-tralics.log"),
output_dir = OUTPUT_DIR
)
def fix_underscore_and_eoatranscripted(
xml_file
):
# Fix underscore und fix EOAtranscripted
tmpFile = open (xml_file, "r")
tmpText = tmpFile.read()
tmpFile.close()
tmpText = re.sub(r"<error n='_' l='(.*?)' c='Missing dollar'/>", "_", tmpText)
tmpText = re.sub(r"<error n='\\par' l='(.*?)' c='Invalid \\par command: paragraph not started'/>", "", tmpText)
tmpFile = open (xml_file, "w")
tmpFile.write(tmpText)
tmpFile.close()
fix_underscore_and_eoatranscripted(
xml_file = XML_FILE
)
# Complete XML-Document in xmlTree
xmlParser = etree.XMLParser(no_network=False,load_dtd=True) #resolve_entities=False
xmlTree = etree.parse(str(XML_FILE), xmlParser)
xmlChapters = xmlTree.findall("//div1")
# Cleanup of not needed tags in advance. To be cleaned: <error>
etree.strip_elements(xmlTree, with_tail=False, *['error'])
logging.info("-----------------------------------------------------")
logging.info("Move EOAlanguage from <head> into attribute of EOAchapter")
for intChapterNumber, xmlChapter in enumerate(xmlChapters, start=1):
xmlLanguage = xmlChapter.find(".//EOAlanguage")
if xmlLanguage is not None:
strLanguage = xmlLanguage.text or "english"
xmlChapter.set("language", strLanguage)
xmlLanguage.text = None
logging.info("The language of Chapter %d is %s." % (intChapterNumber, strLanguage))
xmlChapter = etree.strip_tags(xmlChapter, "EOAlanguage")
##############################################################
# Numbering and Typesetting various Elements #
##############################################################
# Figure out how to number (like essay or regular)
try:
strSerie = xmlTree.find(".//EOAseries").text or "regular"
except AttributeError:
logging.error("\n\nYou are most probably using the preamble for the PDF output. Exiting.")
sys.exit()
if strSerie == "Essay":
strNumberingType = "essay"
else:
strNumberingType = "regular"
# Dictionaries containing UIDs and Numbers
dictChapters = {}
dictFigures = {}
dictEquations = {}
dictSections = {}
dictFootnotes = {}
dictPagelabels = {}
dictTables = {}
dictLists = {}
dictTheorems = {}
set_citations = set()
logging.info("-----------------------------------------------------")
logging.info("Numbering Chapters")
Chapternumber = 1
for xmlChapter in xmlChapters:
if xmlChapter.get('rend') != "nonumber":
Chapteruid = xmlChapter.get('id')
dictChapters[Chapteruid] = str(Chapternumber)
Chapternumber += 1
# EOAequation, EOAsubequation and EOAequationarray Numbering per Chapter
intChapterNumber = 1
logging.info("-----------------------------------------------------")
logging.info("Processing .//EOAequation | .//EOAequationarray | .//EOAsubequations")
for xmlChapter in xmlChapters:
intEquationnumber = 1
xmlDinge = xmlChapter.xpath(".//EOAequation | .//EOAequationarray | .//EOAsubequations")
logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge)))
for xmlDing in xmlDinge:
if xmlDing.tag == "EOAequationarray":
# tmpNumberinArray is only being used for filename
tmpNumberinArray = intEquationnumber
# tmpDictNumberLabel used to insert the attribute value into <EOAequation>
tmpDictNumberLabel = {}
# Numbering is being done by <mtr>-Tags
xmlMathmlrows = xmlDing.findall(".//{http://www.w3.org/1998/Math/MathML}mtr")
for xmlMathmlrow in xmlMathmlrows:
if "Label" in xmlMathmlrow.attrib:
# Label dem Dictionary für die Euqations hinzufügen
if xmlChapter.get("rend") != "nonumber":
dictEquations[xmlMathmlrow.get("Label")] = str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
tmpDictNumberLabel[str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)] = xmlMathmlrow.get("Label")
if xmlChapter.get("rend") == "nonumber":
dictEquations[xmlMathmlrow.get("Label")] = str(intEquationnumber)
tmpDictNumberLabel[str(intEquationnumber)] = xmlMathmlrow.get("Label")
intEquationnumber += 1
xmlRohTeX = xmlDing.find(".//texmath")
xmlNew = etree.Element('EOAequationarray')
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in xmlRohTeX.text.splitlines() if s])
# \rowattributeunknown has to be deleted, its an artefact
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
# Push Down loop to parse the raw code
textFormel = ""
boolBackslash = False
for Buchstabe in textSourcecode:
if Buchstabe == "\n":
continue
if Buchstabe == "\\":
if boolBackslash == False:
textFormel += Buchstabe
boolBackslash = True
continue
if boolBackslash == True:
textFormel += Buchstabe
str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
if xmlChapter.get("rend") != "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)))
if xmlChapter.get("rend") == "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray)))
tmpXML.set("TeX", str_latexcode)
# Put Label into EOAequation
if xmlChapter.get("rend") != "nonumber":
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
if xmlChapter.get("rend") == "nonumber":
strTempKey = str(tmpNumberinArray)
if strTempKey in tmpDictNumberLabel:
#tmpXML.set("label", tmpDictNumberLabel[(str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray))])
tmpXML.set("label", tmpDictNumberLabel[strTempKey])
xmlNew.append(tmpXML)
textFormel = ""
boolBackslash = False
tmpNumberinArray += 1
continue
if Buchstabe != "\\":
textFormel += Buchstabe
boolBackslash = False
# Typeset last equation
str_latexcode = TeX2PNG(textFormel, "EOAequationarray", str(intChapterNumber), str(tmpNumberinArray))
if xmlChapter.get("rend") != "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(dictChapters[xmlChapter.get('id')] + "." + str(tmpNumberinArray)))
if xmlChapter.get("rend") == "nonumber":
tmpXML = etree.Element("EOAequation", filename=("EOAequationarray" + "_" + str(intChapterNumber) + "_" + str(tmpNumberinArray) + ".png"), number=(str(tmpNumberinArray)))
tmpXML.set("TeX", str_latexcode)
# Put Label into EOAequation
if xmlChapter.get("rend") != "nonumber":
strTempKey = str(dictChapters[xmlChapter.get('id')]) + "." + str(tmpNumberinArray)
if xmlChapter.get("rend") == "nonumber":
strTempKey = str(tmpNumberinArray)
if strTempKey in tmpDictNumberLabel:
logging.info(strTempKey)
logging.info(tmpDictNumberLabel)
logging.info(dictChapters)
tmpXML.set("label", tmpDictNumberLabel[strTempKey])
xmlNew.append(tmpXML)
xmlDing.getparent().replace(xmlDing, xmlNew)
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
#xmlNew.getparent().tag = "temp"
continue
if xmlDing.tag == "EOAsubequations":
# Enclosing <p>-Tag of the EOAsubequations needs to be removed
xmlDing.getparent().tag = "temp"
xmlSubequations = xmlDing.findall('.//EOAequation')
listCharacters = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
tmpI = 0
# Insert Number of this Subequation into dictEquations
xmlAnchor = xmlDing.find(".//anchor")
logging.info(xmlAnchor)
if xmlChapter.get("rend") != "nonumber":
dictEquations[xmlAnchor.get('id')] = dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber)
if xmlChapter.get("rend") == "nonumber":
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
# Delete anchor
xmlAnchor.getparent().remove(xmlAnchor)
for xmlSubequation in xmlSubequations:
# Enclosing <p>-Tag of the EOAsubequation needs to be removed
#xmlSubequation.getparent().tag = "temp"
# Numbering Subequations with characters
strSubequationNumber = str(intEquationnumber) + listCharacters[tmpI]
tmpI += 1
textSourcecode = xmlSubequation.find('.//texmath').text
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
str_latexcode = TeX2PNG(textSourcecode, "EOAequation", str(intChapterNumber), strSubequationNumber)
xmlAnchor = xmlSubequation.find(".//anchor")
# Clear Equation
xmlSubequation.clear()
if xmlChapter.get("rend") != "nonumber":
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png")
xmlSubequation.set("number", dictChapters[xmlChapter.get('id')] + "." + strSubequationNumber)
xmlSubequation.set("uid", xmlAnchor.get('id'))
if xmlChapter.get("rend") == "nonumber":
xmlSubequation.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + strSubequationNumber + ".png")
xmlSubequation.set("number", strSubequationNumber)
xmlSubequation.set("uid", xmlAnchor.get('id'))
xmlSubequation.set("id", xmlAnchor.get('id'))
xmlSubequation.set("TeX", str_latexcode)
# Insert Number of this Equation into dictEquations
if strNumberingType == "regular":
dictEquations[xmlAnchor.get('id')] = str(dictChapters[xmlChapter.get('id')]) + "." + strSubequationNumber
if strNumberingType == "essay":
dictEquations[xmlAnchor.get('id')] = strSubequationNumber
# TODO: Anchor direkt unter Subequation aufheben, und der ersten Equation zuordnen, so dass auf 8.16 bei 8.16a und 8.16b verlinkt werden kann
xmlDing.tag = "temp"
# enclosing <p>-Tag of the Subequations is not wanted, transformed to <temp> to be deleted later on
#xmlDing.getparent().tag = "temp"
intEquationnumber += 1
continue
if xmlDing.tag == "EOAequation":
# Check, if Equation has already been found in a Subeqation
xmlAnchor = xmlDing.find("anchor")
if xmlAnchor == None:
continue
if xmlAnchor.get('id') in dictEquations:
continue
if xmlDing.find('.//texmath') is not None:
textSourcecode = xmlDing.find('.//texmath').text
else:
textSourcecode = xmlDing.text
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
str_latexcode = TeX2PNG(textSourcecode, "EOAequation", intChapterNumber, intEquationnumber)
#print ("Got:")
#print (str_latexcode)
if xmlChapter.get("rend") != "nonumber":
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png")
xmlDing.set("number", dictChapters[xmlChapter.get('id')] + "." + str(intEquationnumber))
xmlDing.set("uid", xmlAnchor.get('id'))
if xmlChapter.get("rend") == "nonumber":
xmlDing.set("filename", "EOAequation" + "_" + str(intChapterNumber) + "_" + str(intEquationnumber) + ".png")
xmlDing.set("number", str(intEquationnumber))
xmlDing.set("uid", xmlAnchor.get('id'))
xmlDing.set("id", xmlAnchor.get('id'))
xmlDing.set("TeX", str_latexcode)
#xmlDing.getparent().replace(xmlDing, xmlNew)
# Insert Number of this Equation into dictEquations
if strNumberingType == "regular":
dictEquations[xmlAnchor.get('id')] = \
str(dictChapters[xmlChapter.get('id')]) + "." + str(intEquationnumber)
if strNumberingType == "essay":
dictEquations[xmlAnchor.get('id')] = str(intEquationnumber)
intEquationnumber += 1
continue
intChapterNumber += 1
intChapterNumber = 1
logging.info("-----------------------------------------------------")
logging.info("Processing .//EOAequationnonumber | .//EOAequationarraynonumber")
for xmlChapter in xmlChapters:
tempImagenumber = 1
xmlDinge = xmlChapter.xpath(".//EOAequationnonumber | .//EOAequationarraynonumber")
logging.info("Working on Chapter %d which contains %d formulæ." % (intChapterNumber, len(xmlDinge)))
# print ("Working on Chapter " + str(intChapterNumber))
# print ("Es wurden " + str(len(xmlDinge)) + " Formeln gefunden")
for xmlDing in xmlDinge:
if xmlDing.tag == "EOAequationarraynonumber":
if xmlDing.find(".//texmath") is not None:
textSourcecode = xmlDing.find(".//texmath").text
else:
textSourcecode = xmlDing.text
xmlNew = etree.Element('EOAequationarraynonumber')
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
# \rowattributeunknown has to be deleted, its an artefact
textSourcecode = re.sub("\\\\rowattributeunknown", "", textSourcecode)
# TODO: HIer überprüfen, ob und inwiefern es ausreichend ist, EOAequationarraynonumber in eine Grafik zu packen
str_latexcode = TeX2PNG(textSourcecode, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
xmlNew.set("TeX", str_latexcode)
xmlDing.getparent().replace(xmlDing, xmlNew)
tempImagenumber += 1
continue
# Push Down loop to parse the raw code (Wird vorerst nicht ausgeführt)
textFormel = ""
boolBackslash = False
for Buchstabe in textSourcecode:
if Buchstabe == "\n":
continue
if Buchstabe == "\\":
if boolBackslash == False:
textFormel += Buchstabe
boolBackslash = True
continue
if boolBackslash == True:
textFormel += Buchstabe
str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
tmpXML.set("TeX", str_latexcode)
xmlNew.append(tmpXML)
textFormel = ""
boolBackslash = False
tempImagenumber += 1
continue
if Buchstabe != "\\":
textFormel += Buchstabe
boolBackslash = False
# Typeset last equation
str_latexcode = TeX2PNG(textFormel, "EOAequationarraynonumber", str(intChapterNumber), str(tempImagenumber))
tmpXML = etree.Element("EOAequationnonumber", filename=("EOAequationarraynonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
tmpXML.set("TeX", str_latexcode)
xmlNew.append(tmpXML)
xmlDing.getparent().replace(xmlDing, xmlNew)
continue
if xmlDing.tag == "EOAequationnonumber":
textSourcecode = xmlDing.find('.//texmath').text
# Blank lines need to be removed otherwise TeX won't work
textSourcecode = os.linesep.join([s for s in textSourcecode.splitlines() if s])
str_latexcode = TeX2PNG(textSourcecode, "EOAequationnonumber", str(intChapterNumber), tempImagenumber)
# TODO: HTML-Code für das fertige Bild einfügen (Ist dieser ToDo noch aktuell?)
xmlNew = etree.Element("EOAequationnonumber", filename=("EOAequationnonumber" + "_" + str(intChapterNumber) + "_" + str(tempImagenumber) + ".png"))
xmlNew.set("TeX", str_latexcode)
xmlDing.getparent().replace(xmlDing, xmlNew)
tempImagenumber += 1
continue
intChapterNumber += 1
logging.info("-----------------------------------------------------")
logging.info("Converting EOAineq")
intChapterNumber = 1
intEOAineqRunningOrder = 1
dictEOAineqs = {}
strTeXEquations = ""
all_ineq = xmlTree.findall(".//EOAineq")
# if all_ineq is not None:
if len(all_ineq) > 0:
logging.info("Found " + str(len(all_ineq)) + " formulas")
for xmlChapter in xmlChapters:
logging.info("Chapter " + str(intChapterNumber))
xmlEOAineqs = xmlChapter.findall(".//EOAineq")
intEOAineqnumber = 1
for xmlEOAineq in xmlEOAineqs:
if xmlEOAineq.find('.//texmath') is not None:
strSourceCode = xmlEOAineq.find('.//texmath').text
else:
strSourceCode = xmlEOAineq.text
progress(intEOAineqnumber, len(xmlEOAineqs),"Processing EOAineq %s of %s." % (intEOAineqnumber, len(xmlEOAineqs)))
strSourceCode = os.linesep.join([s for s in strSourceCode.splitlines() if s])
# this occurred once in sources 11
strSourceCode = strSourceCode.replace(r"\@root", r"\root")
strTeXEquations = strTeXEquations + "$" + strSourceCode + "$\n\\newpage\n"
# Add intEOAineqRunningOrder : Filename to dictionary
strFilename = "EOAineq_" + str(intChapterNumber) + "_" + str(intEOAineqnumber)
dictEOAineqs[intEOAineqRunningOrder] = strFilename
# Prepare XML
tmpTail = xmlEOAineq.tail
xmlEOAineq.clear()
xmlEOAineq.tail = tmpTail
xmlEOAineq.set("src", strFilename + ".png")
xmlEOAineq.set("TeX", strSourceCode)
# increment integers
intEOAineqRunningOrder += 1
intEOAineqnumber +=1
intChapterNumber += 1
dictRebindedCommands = {
"\|ket\|" : r"\\ket",
"\|braket\|" : r"\\braket",
"\|bra\|" : r"\\bra",
"\|Bra\|" : r"\\Bra",
"\|Ket\|" : r"\\Ket",
"\slashed\|" : r"\\slashed"
}
for strCommand in dictRebindedCommands.keys():
strTeXEquations = re.sub(strCommand, dictRebindedCommands[strCommand], strTeXEquations)
tmp = open(TEMPLATE_PATH / "formula.tex", "r")
Template = tmp.read()
tmp.close()
# Get tmp-directory for this user account
# tmpDir = os.getenv("TMPDIR")
# use local tmpdir
formula_tmp_dir = TEMP_DIR / "formulas2png"
# Make directory items if it doesn't already exist
items_dir = OUTPUT_DIR / "items"
if not os.path.exists( items_dir):
os.mkdir( items_dir )
s = string.Template(Template)
e = s.substitute(DERINHALT=strTeXEquations)
tmpFile = formula_tmp_dir / "EOAinline.tex"
tmp = open(tmpFile, "w")
tmp.write(e)
tmp.close()
logging.info("Typesetting all Inline Equations")
Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute())
Argumente = shlex.split(Kommando)
Datei = open(TEMP_DIR / 'xelatex-run.log', 'w')
Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
logging.info("Splitting all Inline Equations")
pdf_burst("EOAinline.pdf", formula_tmp_dir)
logging.info("Converting %s split pages into PNG-Images" % len(dictEOAineqs.keys()))
counter_dictEOAineqs = 1
for intRunningOrder in dictEOAineqs.keys():
# provide more status information here in output!
progress(counter_dictEOAineqs, len(dictEOAineqs.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAineqs, len(dictEOAineqs.keys())))
Kommando = "{cmd} {arg1} {arg2}".format(
cmd = PDFCROP_EXEC,
arg1 = (formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(),
arg2 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute()
)
# Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format(
cmd = GM_PATH,
arg1 = (formula_tmp_dir / (dictEOAineqs[intRunningOrder] + ".pdf")).absolute(),
arg2 = (items_dir / (dictEOAineqs[intRunningOrder] + ".png")).absolute()
)
#Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAineqs[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAineqs[intRunningOrder] + ".png"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
counter_dictEOAineqs += 1
else:
logging.info("Found no EOAineq. Continuing")
###########
# Formula #
###########
logging.info("-----------------------------------------------------")
logging.info("Converting EOAchem")
intChapterNumber = 1
int_EOAchem_running_order = 1
dictEOAchems = {}
str_tex_chem = ""
all_chem = xmlTree.findall(".//EOAchem")
# if all_chem is not None:
if len(all_chem) > 0:
logging.info("Found " + str(len(all_chem)) + " chemical formulas")
for xmlChapter in xmlChapters:
logging.info("Chapter " + str(intChapterNumber))
xmlEOAchems = xmlChapter.findall(".//EOAchem")
int_EOAchem_number = 1
for xml_EOAchem in xmlEOAchems:
str_chem_text = xml_EOAchem.text
progress(int_EOAchem_number, len(xmlEOAchems),"Processing EOAchem %s of %s." % (int_EOAchem_number, len(xmlEOAchems)))
str_chem_text = os.linesep.join([s for s in str_chem_text.splitlines() if s])
str_tex_chem = str_tex_chem + "\ce{" + str_chem_text + "}\n\\newpage\n"
# Add int_EOAchem_running_order : Filename to dictionary
strFilename = "EOAchem_" + str(intChapterNumber) + "_" + str(int_EOAchem_number)
dictEOAchems[int_EOAchem_running_order] = strFilename
# Prepare XML
tmpTail = xml_EOAchem.tail
xml_EOAchem.clear()
xml_EOAchem.tail = tmpTail
xml_EOAchem.set("src", strFilename + ".png")
xml_EOAchem.set("TeX", str_chem_text)
# increment integers
int_EOAchem_running_order += 1
int_EOAchem_number +=1
intChapterNumber += 1
tmp = open(TEMPLATE_PATH / "formula.tex", "r")
Template = tmp.read()
tmp.close()
# Get tmp-directory for this user account
# tmpDir = os.getenv("TMPDIR")
# use local tmpdir
formula_tmp_dir = TEMP_DIR / "formulas2png/"
# Make directory items if it doesn't already exist
items_dir = OUTPUT_DIR / "items"
if not os.path.exists( items_dir ):
os.mkdir( items_dir )
s = string.Template(Template)
e = s.substitute(DERINHALT=str_tex_chem)
tmpFile = formula_tmp_dir / "EOAchem.tex"
tmp = open(tmpFile, "w")
tmp.write(e)
tmp.close()
logging.info("Typesetting all inline Chemical formulas")
Kommando = "xelatex --halt-on-error " + str(tmpFile.absolute())
Argumente = shlex.split(Kommando)
Datei = open(TEMP_DIR / 'xelatex-run.log', 'w')
Ergebnis = subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
logging.info("Splitting all Inline Chemical formulas")
pdf_burst("EOAchem.pdf", formula_tmp_dir)
logging.info("Converting %s split pages into PNG-Images" % len(dictEOAchems.keys()))
counter_dictEOAchems = 1
for intRunningOrder in dictEOAchems.keys():
# provide more status information here in output!
progress(counter_dictEOAchems, len(dictEOAchems.keys()),"Splitting all inline equations, image %s of %s" % (counter_dictEOAchems, len(dictEOAchems.keys())))
Kommando = "{cmd} {arg1} {arg2}".format(
cmd=PDFCROP_EXEC,
arg1=(formula_tmp_dir / ("EOAformulas_" + str(intRunningOrder) + ".pdf")).absolute(),
arg2=(formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute()
)
# Kommando = PDFCROP_EXEC + " " + formula_tmp_dir + "EOAformulas_" + str(intRunningOrder) + ".pdf " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
Kommando = "{cmd} convert -density 144 {arg1} {arg2}".format(
cmd=GM_PATH,
arg1 = (formula_tmp_dir / (dictEOAchems[intRunningOrder] + ".pdf")).absolute(),
arg2 = (items_dir / (dictEOAchems[intRunningOrder] + ".png")).absolute()
)
# Kommando = GM_PATH + " convert -density 144 " + formula_tmp_dir + dictEOAchems[intRunningOrder] + ".pdf " + os.getenv("PWD") + "/items/" + dictEOAchems[intRunningOrder] + ".png"
Argumente = shlex.split(Kommando)
subprocess.call(Argumente,cwd=formula_tmp_dir,stdout=Datei)
counter_dictEOAchems += 1
else:
logging.info("Found no EOAchem. Continuing")
###############
# Formula end #
###############
logging.info("-----------------------------------------------------")
logging.info("EOAFigure Numbering per Chapter")
for xmlChapter in xmlChapters:
Figurenumber = 1
xmlFigures = xmlChapter.xpath(".//EOAfigure | .//EOAlsfigure")
for xmlFigure in xmlFigures:
xmlAnchor = xmlFigure.find("anchor")
# Check if Figure is in a numbered Chapter
# Otherwise just put the Number of the figure
if xmlChapter.get('id'):
dictFigures[xmlAnchor.get('id')] = \
str(dictChapters[xmlChapter.get('id')]) + "." + str(Figurenumber)
else:
dictFigures[xmlAnchor.get('id')] = str(Figurenumber)
xmlFigure.set("id", xmlAnchor.get("id"))
Figurenumber += 1
logging.info("-----------------------------------------------------")
logging.info("Numbering Theorems")
for xmlChapter in xmlChapters:
xmlTheorems = xmlChapter.findall(".//theorem")
for xmlTheorem in xmlTheorems:
strUID = xmlTheorem.get("id")
strNumber = xmlTheorem.get("id-text")
dictTheorems[strUID] = strNumber
logging.info("-----------------------------------------------------")
logging.info("Section, Subsection,... Numbering per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
strUID = xmlChapter.get("id")
#dictChapters[strUID] = str(intChapterNumber)
xmlSections = xmlChapter.findall("div2")
intSectionNumber = 1
for xmlSection in xmlSections:
if xmlSection.get("rend") == "nonumber":
continue
strUID = xmlSection.get("id")
if xmlChapter.get("rend") != "nonumber":
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber)
if xmlChapter.get("rend") == "nonumber":
dictSections[strUID] = str(intSectionNumber)
xmlSubsections = xmlSection.findall("div3")
intSubsectionNumber = 1
for xmlSubsection in xmlSubsections:
if xmlSubsection.get("rend") == "nonumber":
continue
strUID = xmlSubsection.get("id")
if xmlChapter.get("rend") != "nonumber":
dictSections[strUID] = str(intChapterNumber) + "." + str(intSectionNumber) + "." + str(intSubsectionNumber)
if xmlChapter.get("rend") == "nonumber":
dictSections[strUID] = str(intSectionNumber) + "." + str(intSubsectionNumber)
intSubsectionNumber += 1
intSectionNumber += 1
if xmlChapter.get("rend") != "nonumber":
intChapterNumber += 1
logging.info("-----------------------------------------------------")
logging.info("Numbering of Footnotes per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
intNoteNumber = 1
xmlFootnotes = xmlChapter.findall(".//note")
for xmlFootnote in xmlFootnotes:
strUID = xmlFootnote.get("id")
dictFootnotes[strUID] = str(intNoteNumber)
intNoteNumber += 1
# here was OU's footnote code, now in libeoaconvert
# def get_bigfoot_data(chapter)
# bigfoot needs to be integrated into
# 'fndict': {'uid11': '2', 'uid12': '3', 'uid9': '1'},
logging.info("-----------------------------------------------------")
logging.info("Numbering of Lists per Chapter")
for xmlChapter in xmlChapters:
xmlListitems = xmlChapter.findall(".//item")
for xmlListitem in xmlListitems:
strUID = xmlListitem.get("id")
strItemNumber = xmlListitem.get("id-text")
dictLists[strUID] = strItemNumber
logging.info("-----------------------------------------------------")
logging.info("Working on Page Numbers for References")
listAuxFiles = glob.glob( str(LATEX_DIR /"*.aux") )
if len(listAuxFiles) == 0:
logging.error("No aux file found. Exiting")
sys.exit(1)
else:
for strFile in listAuxFiles:
tmpFile = open(strFile, "r")
lines = tmpFile.readlines()
tmpFile.close()
for line in lines:
# hyperref makes the lines much much longer
# \newlabel{BL}{{1.1}{4}{Forschungsüberblick zur Literatur über Alvarus Thomas}{section.1.1}{}}
# \newlabel{BL}{{1.1}{4}}
matched_label = re.match(r'\\newlabel\{(.*?)\}\{\{(.*?)\}\{(.*?)\}', line)
# matchObjectLabel = re.match(r'\newlabel\{(.*?)\}', line)
if matched_label:
# matchObjectPage = re.match(r'(.*?)\}\{(\d{1,})\}\}$', line)
# if matchObjectPage:
dictPagelabels[matched_label.group(1)] = matched_label.group(3)
# parsing out information on cite works
matched_citation = re.match(r'\\abx@aux@cite{(.*?)}', line)
if matched_citation is not None:
set_citations.add(matched_citation.group(1))
logging.info("page labels:")
logging.info(dictPagelabels)
logging.info("citations:")
logging.info(set_citations)
logging.info("-----------------------------------------------------")
logging.info("Numbering of Tables per Chapter")
intChapterNumber = 1
for xmlChapter in xmlChapters:
intTableNumber = 1
xmlTables = xmlChapter.findall(".//EOAtable")
for xmlTable in xmlTables:
xmlTableLabel = xmlTable.find(".//EOAtablelabel")
strTableCaption = xmlTable.find(".//EOAtablecaption").text
if strTableCaption == "nonumber":
continue
if not xmlTableLabel.text or xmlTableLabel.text == "":
xmlTableLabel.text = "table" + str(intChapterNumber) + str(intTableNumber)
strUID = xmlTableLabel.text
logging.debug(f"XML table label: {strUID}")
if xmlChapter.get("rend") != "nonumber":
dictTables[strUID] = dictChapters[xmlChapter.get('id')] + "." + str(intTableNumber)
if xmlChapter.get("rend") == "nonumber":
dictTables[strUID] = str(intTableNumber)
intTableNumber += 1
logging.debug(f"Tables in this chapter: {dictTables}.")
intChapterNumber += 1
##############################################################
# Preparing the Bibliography #
##############################################################
def bibl_info_from_xml(
xmlTree
):
if xmlTree.find(".//EOAbibliographydatabase") is not None:
bib_database = xmlTree.find(".//EOAbibliographydatabase").text
else:
return None
bib_type = xmlTree.find(".//EOAbibliographytype").text
if bib_type not in ["monograph", "anthology", "monograph-numeric", "anthology-numeric"]:
raise( Exception(f"The bibtype must be one of {','.join[bib_type]}. Exiting") )
return (bib_type, bib_database)
# .bib -> .json
# (return json data as python dict)
def write_json_bibl(
bibl_info,
output_file,
):
(bib_type, bib_database) = bibl_info
# the new solution: pandoc-citeproc
# interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json"
citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib"
logging.debug(f"Running citeproc with the following command: {citeproc_command}")
citeproc_arguments = shlex.split(citeproc_command)
citeproc_process = subprocess.Popen(citeproc_arguments, stdout=subprocess.PIPE)
citeproc_json = citeproc_process.stdout.read()
citations_json = json.loads(citeproc_json)
# for x in citations_json:
# print(x["title"])
logging.debug(f"Dumping bib json file: {output_file}.")
with open(output_file, 'w') as ibjf:
json.dump(citeproc_json.decode('utf-8'), ibjf)
return citations_json
def add_bibliography_to_xml(
print_bibl_element,
chapter_element,
bib_database,
citations_json,
tmp_citation_filename
):
bibliography_keyword = print_bibl_element.get("keyword")
if bibliography_keyword:
logging.debug(f"Found bibliography keyword {bibliography_keyword}")
else:
logging.debug(f"No bibliography keyword found")
print_bibl_element.getparent().tag = "div"
xmlBibliographyDiv = etree.Element("div")
xmlBibliography.addnext(xmlBibliographyDiv)
citekeys = chapter_element.xpath(".//citekey/text()")
nocite_elements = chapter_element.xpath(".//nocite")
if nocite_elements:
logging.debug(f"Found {libeoaconvert.plural(len(nocite_elements), 'nocite command')}.")
nocitekeys = []
for nocite in nocite_elements:
keys = [x.strip() for x in nocite.text.split(",")]
nocitekeys += keys
nocite.tag = "elementtobestripped"
logging.debug(f"Found {libeoaconvert.plural(len(set(nocitekeys)), 'nocite key')}.")
logging.debug(f"Adding nocite keys to the other cite keys.")
citekeys += nocitekeys
if bibliography_keyword:
logging.info("We want to collect the entries matching the keywords from the database.")
citations_to_format = [x["id"] for x in citations_json if x["keyword"] == bibliography_keyword]
logging.debug(f"Found {libeoaconvert.plural(len(citations_to_format), 'nocite citation')} in database.")
else:
citations_to_format = set(citekeys)
logging.info( "citekeys: ")
logging.info( len( citekeys ) )
csl_file = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE']
formatted_references = bib2html.main(
bib_file = Path(bib_database).with_suffix( ".bib" ),
citekeys = citekeys,
tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
language = strLanguage,
temp_dir = tmp_citation_filename
)
'''
formatted_references = libeoaconvert.format_citations(
citations_to_format,
bib_database + ".bib",
strLanguage,
tmp_citation_filename,
csl_file
)[0]
'''
fixed_entries = libeoaconvert.fix_bib_entries(formatted_references)
for entry in fixed_entries:
xmlBibliographyDiv.append(entry)
bibl_info = bibl_info_from_xml(
xmlTree
)
if bibl_info is None:
logging.warning("No bibliography database found.")
else:
(bib_type, bib_database) = bibl_info
logging.debug(f"bib type is {bib_type}")
logging.info( ".bib -> .json")
citations_json = write_json_bibl(
bibl_info,
output_file = TEMP_DIR / (INPUT_PATH_NO_EXT + "-bib.json")
)
## only for debugging (?)
make_latex_bibl_file(
bib_database = bib_database,
set_citations = set_citations,
files = [
(TEMPLATE_PATH / "largebib.tex", DEBUG_DIR / "debug_onlybib.tex"),
(TEMPLATE_PATH / "largebib-xml.tex", DEBUG_DIR / "debug_onlybib-xml.tex"),
]
)
# If Bibliography-Type is monograph search for EOAbibliography and make it all
if bib_type == "monograph":
tmp_citation_filename = TEMP_DIR / "bib2html" / "used_citations-monograph"
if xmlTree.find(".//EOAprintbibliography") is not None:
# to insert here: with keywords we can have multiple bibliographies
xmlBibliography = xmlTree.find(".//EOAprintbibliography")
add_bibliography_to_xml(
xmlBibliography,
xmlTree,
bib_database = bibl_info[1],
citations_json = citations_json,
tmp_citation_filename = tmp_citation_filename
)
# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter
elif bib_type == "anthology":
for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1):
logging.debug(f"Looking at chapter {intChapterNumber}.")
tmp_citation_filename = TEMP_DIR / "bib2html" / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber))
if xmlChapter.find(".//EOAprintbibliography") is not None:
xmlBibliography = xmlChapter.find(".//EOAprintbibliography")
add_bibliography_to_xml(
xmlBibliography,
xmlChapter,
bib_database = bibl_info[1],
citations_json = citations_json,
tmp_citation_filename = tmp_citation_filename
)
else:
# create an empty file
logging.debug("No bibliography found.")
open(TEMP_DIR / (tmp_citation_filename + "_nocitations"), 'a').close()
"""
<div2 rend="nonumber">
<head>References</head>
<div>
<EOAprintbibliography/>
<div>
<p class="bibliography">Abril Castelló, Vidal (1987). Las Casas contra Vitoria, 1550–1552: La revolución de la duodécima réplica. Causas y consecuencias. <i>Revista de Indias</i> 47(179):83–101.</p>
<p class="bibliography">Agrawal, Arun (1995). Dismantling the Divide Between Indigenous and Scientific Knowledge. <i>Development and Change</i> 26:413–439.</p>
</div>
</div>
</div2>
"""
# for the time being
strCitation = ""
# Bibliographies are done, now for the citations
if bib_type == "anthology" or bib_type == "monograph":
intChapterNumber = 1
if bib_type == "monograph":
tmp_citation_filename = "used_citations-monograph"
tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html")
with open(tmp_path_html, "r") as formatted_citations:
form_cit = BeautifulSoup(formatted_citations, "html.parser")
for xmlChapter in xmlChapters:
logging.info("-----------------------------------------------------")
logging.info("Processing References for Chapter " + str(intChapterNumber))
xmlCitations = xmlChapter.xpath(".//EOAciteauthoryear | .//EOAciteyear | .//EOAcitemanual")
if bib_type == "anthology":
tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber)
tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html")
no_cite_path = TEMP_DIR / (tmp_citation_filename + "_nocitations")
if os.path.exists(tmp_path_html):
with open(tmp_path_html, "r") as formatted_citations:
form_cit = BeautifulSoup(formatted_citations, "html.parser")
elif os.path.exists(no_cite_path):
logging.debug("no citations in this chapter")
intChapterNumber += 1
continue
counter_citations = 1
for xmlCitation in xmlCitations:
string_citekey = xmlCitation.find("./citekey").text
progress(counter_citations, len(xmlCitations),"Processing reference %s of %s: %s" % (counter_citations, len(xmlCitations), string_citekey))
# If Bibliography-Type is anthology find Refsection for this Chapter
###############
# old version #
###############
# if bib_type == "anthology":
# print("Yes, it's anthology time!")
# xmlRefsections = xmlBibTree.findall(".//refsection")
# for xmlRefsection in xmlRefsections:
# if xmlRefsection.find(".//number").text == str(intChapterNumber):
# break
# xml_bib_entries = xmlRefsection.findall(".//entry")
###################
# end old version #
###################
# If Bibliography-Type is monograph find all entries, forget about refsection
###############
# old version #
###############
"""
if bib_type == "monograph":
xml_bib_entries = xmlBibTree.findall(".//entry")
for xmlEntry in xml_bib_entries:
bibEntry = Bibitem(xmlEntry)
if bibEntry.citekey() == xmlCitation.find("./citekey").text:
if xmlCitation.tag == "EOAciteauthoryear":
strCitation = bibEntry.shortauthor() + " " + bibEntry.labelyear()
if bibEntry.labelyearsuffix() is not None:
strCitation = strCitation + bibEntry.labelyearsuffix()
strTitle = bibEntry.title()
if xmlCitation.tag == "EOAciteyear":
strCitation = bibEntry.labelyear()
if bibEntry.labelyearsuffix() is not None:
strCitation = strCitation + bibEntry.labelyearsuffix()
strTitle = bibEntry.title()
if xmlCitation.tag == "EOAcitemanual":
cite_text = xmlCitation.find("citetext")
if cite_text.getchildren():
tmp_string = xmlCitation.find("citetext")
tmp_string = cite_text.getchildren()[0]
strCitation = etree.tostring(tmp_string)
# BAUSTELLE!!!!!
# tmp_string2 = etree.tostring(tmp_string)
# tmp_string3 = tmp_string2.decode()
# strCitation = tmp_string3.replace("&lt;", "<")
else:
strCitation = xmlCitation.find("citetext").text
strTitle = bibEntry.title()
if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
strCitation = strCitation + ", " + xmlCitation.find("./page").text
"""
######################
# end of old version #
######################
###############
# new version #
###############
# string_citekey = xmlCitation.find("./citekey").text
for entry in citations_json:
if entry["id"] == string_citekey:
current_citation = entry
strTitle = current_citation["title"]
# [1:-1] to remove parentheses around citations
try:
citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1]
except IndexError:
logging.error("Could not find {}. Exiting.".format(string_citekey))
sys.exit()
data_title_value = citeauthoryear_value
if xmlCitation.tag == "EOAciteauthoryear":
strCitation = citeauthoryear_value
elif xmlCitation.tag == "EOAciteyear":
strCitation = form_cit.select("#citeyear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1]
elif xmlCitation.tag == "EOAcitemanual":
cite_text = xmlCitation.find("citetext")
if cite_text.getchildren():
tmp_string = xmlCitation.find("citetext")
tmp_string = cite_text.getchildren()[0]
strCitation = etree.tostring(tmp_string)
# BAUSTELLE!!!!!
# tmp_string2 = etree.tostring(tmp_string)
# tmp_string3 = tmp_string2.decode()
# strCitation = tmp_string3.replace("&lt;", "<")
else:
strCitation = xmlCitation.find("citetext").text
if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
pages_text = libeoaconvert.gettext(xmlCitation.find("./page"))
strCitation = strCitation + ", " + pages_text
data_title_value = data_title_value + ", " + pages_text
# strCitation = strCitation + ", " + xmlCitation.find("./page").text
######################
# end of new version #
######################
# Hier den XML-Tag durch die Quellenangabe ersetzen
tmpTail = xmlCitation.tail
xmlCitation.clear()
xmlCitation.tag = "span"
xmlCitation.set("rel", "popover")
xmlCitation.set("class", "citation")
xmlCitation.set("citekey", string_citekey)
xmlCitation.text = strCitation
xmlCitation.tail = tmpTail
# Create Link to be used for website in a popover
xmlCitation.set("data-toggle", "popover")
xmlCitation.set("html", "true")
xmlCitation.set("data-placement", "bottom")
xmlCitation.set("data-title", data_title_value)
try:
xmlCitation.set("data-content", strTitle)
except:
xmlCitation.set("data-content", "missing")
counter_citations += 1
intChapterNumber += 1
# If Bibliography-Type is monograph-numeric search for EOAbibliography and make it all
if bib_type == "monograph-numeric":
if xmlTree.find(".//EOAprintbibliography") is not None:
dictCitekeysNumbers = {}
dictCitekeysTitles = {}
xmlBibliography = xmlTree.find(".//EOAprintbibliography")
xmlBibliography.clear()
xmlBibliography.tag = "div"
xmlBibliography.getparent().tag = "div"
xml_bib_entries = xmlBibTree.findall(".//entry")
intNumberOfEntry = 1
for xmlEntry in xml_bib_entries:
# Go through all entries and assign a number to the citekey
bibEntry = Bibitem(xmlEntry)
strCitekey = bibEntry.citekey()
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
dictCitekeysTitles[strCitekey] = str(bibEntry.title())
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliography.append(xmlNew)
intNumberOfEntry += 1
# Now for the references via EOAcitenumeric
xmlCitenumerics = xmlTree.findall(".//EOAcitenumeric")
for xmlCitenumeric in xmlCitenumerics:
logging.info(etree.tostring(xmlCitenumeric))
strPopover = ""
tmpCitekeys = xmlCitenumeric.find(".//citekey").text
tmpCitekeys = re.sub(" ", "", tmpCitekeys)
tmpCitekeys = re.sub("\n", "", tmpCitekeys)
listCitekeys = re.split("\,", tmpCitekeys)
listCitenumbers = []
for strCitekey in listCitekeys:
listCitenumbers.append(dictCitekeysNumbers[strCitekey])
# Create Text to be used on the website in a popover
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
listCitenumbers = sorted(listCitenumbers, key=int)
strResult = "[" + listCitenumbers[0]
intNumberOfSequentialCite = 0
for i in range(1,len(listCitenumbers)):
intPreviousCitenumber = int(listCitenumbers[i-1])
intCurrentCitenumber = int(listCitenumbers[i])
if i == (len(listCitenumbers)-1):
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
if intNumberOfSequentialCite == 0:
strResult = strResult + "," + str(listCitenumbers[i])
else:
strResult = strResult + "-" + str(listCitenumbers[i])
intNumberOfSequentialCite == 0
else:
strResult = strResult + "," + str(listCitenumbers[i])
break
intNextCitenumber = int(listCitenumbers[i+1])
if (intCurrentCitenumber + 1) != intNextCitenumber:
if intNumberOfSequentialCite != 0:
strResult = strResult + "-" + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
else:
strResult = strResult + "," + str(intCurrentCitenumber)
continue
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
intNumberOfSequentialCite += 1
continue
else:
strResult = strResult + "," + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
strResult = strResult + "]"
xmlCitenumeric.text = strResult
# Create Link to be used for website
xmlCitenumeric.set("data-toggle", "popover")
xmlCitenumeric.set("html", "true")
xmlCitenumeric.set("data-content", strPopover)
xmlCitenumeric.set("class","citation")
xmlCitenumeric.set("data-placement", "bottom")
xmlCitenumeric.set("data-title", strResult)
# author is missing!
# print("xmlBibliography")
# print(etree.tostring(xmlBibliography))
# input()
# Numeric citations for the individual chapters
if bib_type == "anthology-numeric":
intChapterNumber = 1
for xmlChapter in xmlChapters:
logging.info("Processing Bibliography")
if xmlChapter.find(".//EOAprintbibliography") is not None:
dictCitekeysNumbers = {}
dictCitekeysTitles = {}
xmlBibliography = xmlChapter.find(".//EOAprintbibliography")
#xmlBibliography.clear()
xmlBibliography.tag = "div"
xmlBibliography.getparent().tag = "div"
xmlRefsections = xmlBibTree.findall(".//refsection")
for xmlRefsection in xmlRefsections:
if xmlRefsection.find(".//number").text == str(intChapterNumber):
break
xml_bib_entries = xmlRefsection.findall(".//entry")
intNumberOfEntry = 1
for xmlEntry in xml_bib_entries:
# Go through all entries and assign a number to the citekey
bibEntry = Bibitem(xmlEntry)
strCitekey = bibEntry.citekey()
dictCitekeysNumbers[strCitekey] = str(intNumberOfEntry)
dictCitekeysTitles[strCitekey] = str(bibEntry.title())
strNewentry = "<p class=\"bibliography\">[" + str(intNumberOfEntry) + "] " + createBibEntryNumeric(bibEntry) + "</p>"
xmlNew = etree.fromstring(strNewentry)
xmlBibliography.append(xmlNew)
intNumberOfEntry += 1
# Now for the references via EOAcitenumeric
xmlCitenumerics = xmlChapter.xpath(".//EOAcitenumeric | .//EOAciteauthoryear | .//EOAciteyear")
logging.info("Found numeric citation in chapter " + str(intChapterNumber))
for xmlCitenumeric in xmlCitenumerics:
strPopover = ""
tmpCitekeys = xmlCitenumeric.find(".//citekey").text
tmpCitekeys = re.sub(" ", "", tmpCitekeys)
tmpCitekeys = re.sub("\n", "", tmpCitekeys)
logging.info(tmpCitekeys)
listCitekeys = re.split("\,", tmpCitekeys)
listCitenumbers = []
for strCitekey in listCitekeys:
logging.info(strCitekey)
listCitenumbers.append(dictCitekeysNumbers[strCitekey])
# Create Text to be used on the website in a popover
strPopover = strPopover + "[" + dictCitekeysNumbers[strCitekey] + "] " + dictCitekeysTitles[strCitekey] + " "
listCitenumbers = sorted(listCitenumbers, key=int)
strResult = "[" + listCitenumbers[0]
intNumberOfSequentialCite = 0
for i in range(1,len(listCitenumbers)):
intPreviousCitenumber = int(listCitenumbers[i-1])
intCurrentCitenumber = int(listCitenumbers[i])
if i == (len(listCitenumbers)-1):
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
if intNumberOfSequentialCite == 0:
strResult = strResult + "," + str(listCitenumbers[i])
else:
strResult = strResult + "-" + str(listCitenumbers[i])
intNumberOfSequentialCite == 0
else:
strResult = strResult + "," + str(listCitenumbers[i])
break
intNextCitenumber = int(listCitenumbers[i+1])
if (intCurrentCitenumber + 1) != intNextCitenumber:
if intNumberOfSequentialCite != 0:
strResult = strResult + "-" + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
else:
strResult = strResult + "," + str(intCurrentCitenumber)
continue
if (intPreviousCitenumber + 1) == intCurrentCitenumber:
intNumberOfSequentialCite += 1
continue
else:
strResult = strResult + "," + str(intCurrentCitenumber)
intNumberOfSequentialCite = 0
strResult = strResult + "]"
xmlCitenumeric.text = strResult
# Create Link to be used for website in a popover
xmlCitenumeric.set("data-toggle", "popover")
xmlCitenumeric.set("data-placement", "bottom")
xmlCitenumeric.set("data-title", " " + strResult)
xmlCitenumeric.set("data-content", strPopover)
xmlCitenumeric.set("class","citation")
intChapterNumber += 1
# this is somewhat luzzini-specific
bib_parent_element = xmlBibliography.getparent()
upper_div = bib_parent_element.xpath("./ancestor::div1")[0]
previous_div0 = upper_div.getparent()
# possible culprit for not finding the index
# other_content = bib_parent_element.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex")
other_content = upper_div.xpath(".//EOAtocentry | .//EOAprintpersonindex | .//EOAprintlocationindex | .//EOAprintindex")
if len(other_content) > 0:
for element in other_content:
previous_div0.append(element)
etree.strip_tags(xmlTree, "tagtobestripped")
etree.strip_elements(xmlTree, "elementtobestripped", with_tail=False)
# here followed the conversion to epub and the conversion to django.xml
# both parts were removed and put into separate files.
intermediate_file_pre = TEMP_DIR / "IntermediateXMLFile_pre.xml"
intermediate_file = TEMP_DIR / "IntermediateXMLFile.xml"
ergebnisdatei = open(intermediate_file_pre, "w")
ergebnis = etree.tostring(xmlTree, pretty_print=True, encoding="unicode")
ergebnisdatei.write(ergebnis)
ergebnisdatei.close()
# replacing a milestone element by a closing and opening combination
with open(intermediate_file_pre, 'r') as tmp_file:
filedata = tmp_file.read()
# add XML declaration
filedata_declaration = "<?xml version='1.0' encoding='UTF-8'?>\n" + filedata
# Replace the target string
filedata_clean = filedata_declaration.replace('<msparbreak/>', '</p><p>')
# Write the file out again
with open(intermediate_file, 'w') as outfile:
outfile.write(filedata_clean)
# saving some data
data_to_pickle = {'chapterdict' : dictChapters,
'eqdict' : dictEquations,
'listdict' : dictLists,
'theoremdict' : dictTheorems,
'figdict' : dictFigures,
'secdict' : dictSections,
'fndict' : dictFootnotes,
'tabdict' : dictTables,
'pagelabeldict' : dictPagelabels
}
with open(TEMP_DIR / 'data.pickle', 'wb') as f:
# Pickle the 'data' dictionary using the highest protocol available.
pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)
grep_command = "grep -A1 -B2 'argument of \\\EOAfn' {}".format(
# INPUT_PATH_NO_EXT
OUTPUT_DIR / (INPUT_PATH_NO_EXT + "-tralics.log")
)
grep_command_arguments = shlex.split(grep_command)
grep_result = subprocess.Popen(grep_command_arguments, stdout=subprocess.PIPE)
grep_output = grep_result.stdout.read()
if len(grep_output) > 0:
logging.info("\n===\nFootnotes with paragraphs were found. They have to be replaced by the \EOAfnpar command.\n")
logging.info(grep_output.decode("utf-8"))
logging.info("===\n")
logging.info("Removing temporary files.")
cleanup()
logging.info("Done!")
sys.exit()