diff --git a/eoatex2imxml.py b/eoatex2imxml.py index ce56645..449dfe8 100755 --- a/eoatex2imxml.py +++ b/eoatex2imxml.py @@ -21,7 +21,7 @@ from utils.libeoabibitem import Bibitem import utils.libeoaconvert as libeoaconvert -from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite +from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite, ToLog, ToFile import utils.bib2html as bib2html # imports @@ -59,8 +59,9 @@ help="Name of config file" ) parser.add_argument( - "-l", "--log-file", - default = Path("logs", SCRIPT_NAME).with_suffix(".log"), + "-l", "--log-dir", + default = Path("logs"), + # default = Path("logs", SCRIPT_NAME).with_suffix(".log"), help="logfile" ) parser.add_argument( @@ -117,7 +118,8 @@ CONFIG = load_config( CONFIG_FILE, args.log_level, - args.log_file, + (Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ), + # args.log_file, ) ######################## @@ -141,9 +143,14 @@ # Paths: ############################ INPUT_DIR = Path( args.filename ).resolve().parent -INPUT_PATH_NO_EXT = args.filename +INPUT_PATH = Path( args.filename ) +if INPUT_PATH.suffix == '': + INPUT_PATH = INPUT_PATH.with_suffix( ".tex" ) +elif INPUT_PATH.suffix != ".tex": + raise( Exception( "input file matching '*.tex' expected" ) ) OUTPUT_DIR = Path( args.output_dir ) LATEX_DIR = Path ( args.latex_dir ) +LOG_DIR = Path( args.log_dir ) CONVERT_DIR = OUTPUT_DIR / "CONVERT" # CONVERT_DIR = os.getcwd() + os.path.sep + "CONVERT" @@ -151,7 +158,9 @@ DEBUG_DIR = OUTPUT_DIR / "debug" # where to output the xml file: -XML_FILE = OUTPUT_DIR / (INPUT_PATH_NO_EXT + ".xml") +XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" ) + +BIB2HTML_FILENAME = "temp" ################################################# @@ -461,34 +470,35 @@ def cleanup(): def run_tralics( input_file, TRALICS_PATH_LIB, - TRALICS_LOG_PATH, + log_path, output_dir = OUTPUT_DIR, ): - fixed_tex_file_path = output_dir / Path(input_file).name + fixed_tex_file_path = output_dir / input_file.name libeoaconvert.enable_preamble( input_file, fixed_tex_file_path, "xml" ) # Convert TeX to XML via Tralics - logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {TRALICS_LOG_PATH}" ) + logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {log_path}" ) exec_command( - "{cmd} -log_file {log_file} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format( + "{cmd} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format( cmd = TRALICS_PATH_EXEC, - log_file = TRALICS_LOG_PATH, + # log_file = log_filename, conf_dir = TRALICS_PATH_LIB, output_dir = output_dir, - input_dir = output_dir, - input_file = input_file, + input_dir = input_file.parent, + input_file = fixed_tex_file_path, ), + output_to = ToFile( log_path ), ignore_fail = True # :-D ) # .tex -> .xml run_tralics( - input_file = INPUT_PATH_NO_EXT + '.tex', + input_file = INPUT_PATH, TRALICS_PATH_LIB = TRALICS_PATH_LIB, - TRALICS_LOG_PATH = (INPUT_PATH_NO_EXT + "-tralics.log"), + log_path = LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log"), output_dir = OUTPUT_DIR ) @@ -1177,10 +1187,9 @@ def bibl_info_from_xml( # .bib -> .json # (return json data as python dict) def write_json_bibl( - bibl_info, + bib_database, output_file, ): - (bib_type, bib_database) = bibl_info # the new solution: pandoc-citeproc # interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json" citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib" @@ -1200,9 +1209,8 @@ def write_json_bibl( def add_bibliography_to_xml( print_bibl_element, chapter_element, - bib_database, citations_json, - tmp_citation_filename + formatted_references ): bibliography_keyword = print_bibl_element.get("keyword") if bibliography_keyword: @@ -1239,24 +1247,6 @@ def add_bibliography_to_xml( logging.info( len( citekeys ) ) csl_file = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE'] - formatted_references = bib2html.main( - bib_file = Path(bib_database).with_suffix( ".bib" ), - citekeys = citekeys, - tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex", - language = strLanguage, - temp_dir = tmp_citation_filename - ) - - ''' - formatted_references = libeoaconvert.format_citations( - citations_to_format, - bib_database + ".bib", - strLanguage, - tmp_citation_filename, - csl_file - )[0] - ''' - fixed_entries = libeoaconvert.fix_bib_entries(formatted_references) for entry in fixed_entries: xmlBibliographyDiv.append(entry) @@ -1274,8 +1264,8 @@ def add_bibliography_to_xml( logging.info( ".bib -> .json") citations_json = write_json_bibl( - bibl_info, - output_file = TEMP_DIR / (INPUT_PATH_NO_EXT + "-bib.json") + INPUT_DIR / bib_database, + output_file = TEMP_DIR / (INPUT_PATH.stem + "-bib.json") ) ## only for debugging (?) @@ -1290,33 +1280,58 @@ def add_bibliography_to_xml( # If Bibliography-Type is monograph search for EOAbibliography and make it all if bib_type == "monograph": - tmp_citation_filename = TEMP_DIR / "bib2html" / "used_citations-monograph" + # tmp_citation_filename = TEMP_DIR / "bib2html" / tmp if xmlTree.find(".//EOAprintbibliography") is not None: # to insert here: with keywords we can have multiple bibliographies xmlBibliography = xmlTree.find(".//EOAprintbibliography") + citekeys = xmlTree.xpath(".//citekey/text()") + formatted_bibl_info = bib2html.main( + bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ), + citekeys = citekeys, + tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex", + language = strLanguage, + temp_dir = TEMP_DIR / "bib2html" / "monograph-tmp", + output_file = TEMP_DIR / "bib2html" / "used_citations-monograph.html", + log_dir = LOG_DIR / SCRIPT_NAME / "bib2html" + ) + formatted_references = formatted_bibl_info['references'] + + logging.debug( "formatted bibliography:" ) + logging.debug( etree.tostring(formatted_references) ) add_bibliography_to_xml( - xmlBibliography, - xmlTree, - bib_database = bibl_info[1], - citations_json = citations_json, - tmp_citation_filename = tmp_citation_filename + xmlBibliography, + xmlTree, + citations_json, + formatted_references ) # If Bibliography-Type is anthology search for EOAbibliography and make one per chapter elif bib_type == "anthology": for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1): logging.debug(f"Looking at chapter {intChapterNumber}.") - tmp_citation_filename = TEMP_DIR / "bib2html" / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber)) + # tmp_citation_filename = TEMP_DIR / "bib2html" / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber)) if xmlChapter.find(".//EOAprintbibliography") is not None: xmlBibliography = xmlChapter.find(".//EOAprintbibliography") + citekeys = xmlChapter.xpath(".//citekey/text()") + formatted_bibl_info = bib2html.main( + bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ), + citekeys = citekeys, + tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex", + language = strLanguage, + temp_dir = TEMP_DIR / "bib2html" / "chapter_{:02d}-tmp".format( intChapterNumber ), + output_file = TEMP_DIR / "bib2html" / "used_citations-anthology-chapter_{:02d}.html".format( intChapterNumber ), + log_dir = LOG_DIR / SCRIPT_NAME / "bib2html" + ) + formatted_citations = formatted_bibl_info['references'] + logging.debug( "formatted bibliography:" ) + logging.debug( etree.tostring(formatted_references) ) add_bibliography_to_xml( - xmlBibliography, - xmlChapter, - bib_database = bibl_info[1], - citations_json = citations_json, - tmp_citation_filename = tmp_citation_filename + xmlBibliography, + xmlChapter, + citations_json, + formatted_references ) else: @@ -1346,7 +1361,7 @@ def add_bibliography_to_xml( if bib_type == "monograph": tmp_citation_filename = "used_citations-monograph" - tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html") + tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename) .with_suffix( ".html" ) with open(tmp_path_html, "r") as formatted_citations: form_cit = BeautifulSoup(formatted_citations, "html.parser") @@ -1357,15 +1372,21 @@ def add_bibliography_to_xml( if bib_type == "anthology": tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber) - tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html") - no_cite_path = TEMP_DIR / (tmp_citation_filename + "_nocitations") + tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename / BIB2HTML_FILENAME) .with_suffix( ".html" ) + # no_cite_path = TEMP_DIR / "bib2html" / (tmp_citation_filename + "_nocitations") if os.path.exists(tmp_path_html): with open(tmp_path_html, "r") as formatted_citations: form_cit = BeautifulSoup(formatted_citations, "html.parser") + else: + logging.debug("no citations in this chapter") + intChapterNumber += 1 + continue + ''' elif os.path.exists(no_cite_path): logging.debug("no citations in this chapter") intChapterNumber += 1 continue + ''' counter_citations = 1 @@ -1440,7 +1461,8 @@ def add_bibliography_to_xml( # [1:-1] to remove parentheses around citations try: - citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] + citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text + # citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1] except IndexError: logging.error("Could not find {}. Exiting.".format(string_citekey)) sys.exit() @@ -1461,6 +1483,7 @@ def add_bibliography_to_xml( # strCitation = tmp_string3.replace("<", "<") else: strCitation = xmlCitation.find("citetext").text + logging.info( "formatted citation: {}".format( strCitation ) ) if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None: pages_text = libeoaconvert.gettext(xmlCitation.find("./page")) @@ -1713,8 +1736,7 @@ def add_bibliography_to_xml( pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL) grep_command = "grep -A1 -B2 'argument of \\\EOAfn' {}".format( - # INPUT_PATH_NO_EXT - OUTPUT_DIR / (INPUT_PATH_NO_EXT + "-tralics.log") + LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log") ) grep_command_arguments = shlex.split(grep_command) grep_result = subprocess.Popen(grep_command_arguments, stdout=subprocess.PIPE) diff --git a/utils/bib2html.py b/utils/bib2html.py index 7fadf0b..6865d5e 100755 --- a/utils/bib2html.py +++ b/utils/bib2html.py @@ -9,12 +9,15 @@ __date__ = "20190313" __author__ = "kthoden@mpiwg-berlin.mpg.de" +from utils.load_config import exec_command, ToFile, ToLog + import argparse import os import subprocess import shlex import logging import string +import shutil from lxml import etree from pathlib import Path @@ -102,9 +105,25 @@ def write_dummy_latex( # def write_dummy_latex ends here -def run_htlatex(tmp_filename): +def run_htlatex( + tmp_filename, + log_dir +): """Create HTML file from temporary LaTeX file""" + exec_command( + f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'", + output_to = ToFile( Path(log_dir) / "htlatex1.log" ) + ) + exec_command( + f"biber {tmp_filename}", + output_to = ToFile( Path(log_dir) / "biber.log" ) + ) + exec_command( + f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'", + output_to = ToFile( Path(log_dir) / "htlatex2.log" ) + ) +''' command = f"htlatex {tmp_filename}.tex 'xhtml,charset=utf-8' ' -cunihtf -utf8'" arguments = shlex.split(command) logging.info("Using external command htlatex with command %s" % command) @@ -119,8 +138,71 @@ def run_htlatex(tmp_filename): arguments = shlex.split(command) logging.info("Using external command htlatex with command %s" % command) subprocess.call(arguments) + ''' # def run_htlatex ends here +def create_citations(citekeys, xml_tree, style): + """Create citations""" + + if style in ["authoryear", "year"]: + pass + else: + logging.error("Unrecognized citation format, choose 'authoryear' or 'year'. Exiting") + sys.exit() + + surrounding_div = etree.fromstring(f"