Skip to content

Commit

Permalink
eoatex2imxl: fixed/completed using htlatex for bibl. and references f…
Browse files Browse the repository at this point in the history
…ormatting
  • Loading branch information
EsGeh authored and EsGeh committed Apr 10, 2019
1 parent 62b04b5 commit d83aa99
Show file tree
Hide file tree
Showing 4 changed files with 212 additions and 79 deletions.
136 changes: 79 additions & 57 deletions eoatex2imxml.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from utils.libeoabibitem import Bibitem
import utils.libeoaconvert as libeoaconvert
from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite
from utils.load_config import load_config, exec_command, check_executable, copy_dir_overwrite, ToLog, ToFile
import utils.bib2html as bib2html

# imports
Expand Down Expand Up @@ -59,8 +59,9 @@
help="Name of config file"
)
parser.add_argument(
"-l", "--log-file",
default = Path("logs", SCRIPT_NAME).with_suffix(".log"),
"-l", "--log-dir",
default = Path("logs"),
# default = Path("logs", SCRIPT_NAME).with_suffix(".log"),
help="logfile"
)
parser.add_argument(
Expand Down Expand Up @@ -117,7 +118,8 @@
CONFIG = load_config(
CONFIG_FILE,
args.log_level,
args.log_file,
(Path(args.log_dir) / SCRIPT_NAME) . with_suffix( ".log" ),
# args.log_file,
)

########################
Expand All @@ -141,17 +143,24 @@
# Paths:
############################
INPUT_DIR = Path( args.filename ).resolve().parent
INPUT_PATH_NO_EXT = args.filename
INPUT_PATH = Path( args.filename )
if INPUT_PATH.suffix == '':
INPUT_PATH = INPUT_PATH.with_suffix( ".tex" )
elif INPUT_PATH.suffix != ".tex":
raise( Exception( "input file matching '*.tex' expected" ) )
OUTPUT_DIR = Path( args.output_dir )
LATEX_DIR = Path ( args.latex_dir )
LOG_DIR = Path( args.log_dir )

CONVERT_DIR = OUTPUT_DIR / "CONVERT"
# CONVERT_DIR = os.getcwd() + os.path.sep + "CONVERT"
TEMP_DIR = OUTPUT_DIR / "tmp_files"
DEBUG_DIR = OUTPUT_DIR / "debug"

# where to output the xml file:
XML_FILE = OUTPUT_DIR / (INPUT_PATH_NO_EXT + ".xml")
XML_FILE = (OUTPUT_DIR / INPUT_PATH.name) .with_suffix( ".xml" )

BIB2HTML_FILENAME = "temp"


#################################################
Expand Down Expand Up @@ -461,34 +470,35 @@ def cleanup():
def run_tralics(
input_file,
TRALICS_PATH_LIB,
TRALICS_LOG_PATH,
log_path,
output_dir = OUTPUT_DIR,
):
fixed_tex_file_path = output_dir / Path(input_file).name
fixed_tex_file_path = output_dir / input_file.name
libeoaconvert.enable_preamble(
input_file,
fixed_tex_file_path,
"xml"
)
# Convert TeX to XML via Tralics
logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {TRALICS_LOG_PATH}" )
logging.info( f"executing {TRALICS_PATH_EXEC}. log file: {log_path}" )
exec_command(
"{cmd} -log_file {log_file} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format(
"{cmd} -confdir {conf_dir}/tralics_conf -config {conf_dir}/tralics.tcf -utf8 -utf8output -output_dir={output_dir} -input_dir={input_dir} -input_file={input_file}".format(
cmd = TRALICS_PATH_EXEC,
log_file = TRALICS_LOG_PATH,
# log_file = log_filename,
conf_dir = TRALICS_PATH_LIB,
output_dir = output_dir,
input_dir = output_dir,
input_file = input_file,
input_dir = input_file.parent,
input_file = fixed_tex_file_path,
),
output_to = ToFile( log_path ),
ignore_fail = True # :-D
)

# .tex -> .xml
run_tralics(
input_file = INPUT_PATH_NO_EXT + '.tex',
input_file = INPUT_PATH,
TRALICS_PATH_LIB = TRALICS_PATH_LIB,
TRALICS_LOG_PATH = (INPUT_PATH_NO_EXT + "-tralics.log"),
log_path = LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log"),
output_dir = OUTPUT_DIR
)

Expand Down Expand Up @@ -1177,10 +1187,9 @@ def bibl_info_from_xml(
# .bib -> .json
# (return json data as python dict)
def write_json_bibl(
bibl_info,
bib_database,
output_file,
):
(bib_type, bib_database) = bibl_info
# the new solution: pandoc-citeproc
# interim_bib_json_file = INPUT_PATH_NO_EXT + "-bib.json"
citeproc_command = "pandoc-citeproc --bib2json %s" % bib_database + ".bib"
Expand All @@ -1200,9 +1209,8 @@ def write_json_bibl(
def add_bibliography_to_xml(
print_bibl_element,
chapter_element,
bib_database,
citations_json,
tmp_citation_filename
formatted_references
):
bibliography_keyword = print_bibl_element.get("keyword")
if bibliography_keyword:
Expand Down Expand Up @@ -1239,24 +1247,6 @@ def add_bibliography_to_xml(
logging.info( len( citekeys ) )
csl_file = BASE_DIR / CONFIG['Auxiliaries']['CSL_FILE']

formatted_references = bib2html.main(
bib_file = Path(bib_database).with_suffix( ".bib" ),
citekeys = citekeys,
tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
language = strLanguage,
temp_dir = tmp_citation_filename
)

'''
formatted_references = libeoaconvert.format_citations(
citations_to_format,
bib_database + ".bib",
strLanguage,
tmp_citation_filename,
csl_file
)[0]
'''

fixed_entries = libeoaconvert.fix_bib_entries(formatted_references)
for entry in fixed_entries:
xmlBibliographyDiv.append(entry)
Expand All @@ -1274,8 +1264,8 @@ def add_bibliography_to_xml(

logging.info( ".bib -> .json")
citations_json = write_json_bibl(
bibl_info,
output_file = TEMP_DIR / (INPUT_PATH_NO_EXT + "-bib.json")
INPUT_DIR / bib_database,
output_file = TEMP_DIR / (INPUT_PATH.stem + "-bib.json")
)

## only for debugging (?)
Expand All @@ -1290,33 +1280,58 @@ def add_bibliography_to_xml(

# If Bibliography-Type is monograph search for EOAbibliography and make it all
if bib_type == "monograph":
tmp_citation_filename = TEMP_DIR / "bib2html" / "used_citations-monograph"
# tmp_citation_filename = TEMP_DIR / "bib2html" / tmp
if xmlTree.find(".//EOAprintbibliography") is not None:
# to insert here: with keywords we can have multiple bibliographies
xmlBibliography = xmlTree.find(".//EOAprintbibliography")

citekeys = xmlTree.xpath(".//citekey/text()")
formatted_bibl_info = bib2html.main(
bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ),
citekeys = citekeys,
tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
language = strLanguage,
temp_dir = TEMP_DIR / "bib2html" / "monograph-tmp",
output_file = TEMP_DIR / "bib2html" / "used_citations-monograph.html",
log_dir = LOG_DIR / SCRIPT_NAME / "bib2html"
)
formatted_references = formatted_bibl_info['references']

logging.debug( "formatted bibliography:" )
logging.debug( etree.tostring(formatted_references) )
add_bibliography_to_xml(
xmlBibliography,
xmlTree,
bib_database = bibl_info[1],
citations_json = citations_json,
tmp_citation_filename = tmp_citation_filename
xmlBibliography,
xmlTree,
citations_json,
formatted_references
)

# If Bibliography-Type is anthology search for EOAbibliography and make one per chapter
elif bib_type == "anthology":
for intChapterNumber, xmlChapter in enumerate(xmlChapters, start = 1):
logging.debug(f"Looking at chapter {intChapterNumber}.")
tmp_citation_filename = TEMP_DIR / "bib2html" / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber))
# tmp_citation_filename = TEMP_DIR / "bib2html" / ("used_citations-anthology-chapter_{:02d}".format(intChapterNumber))
if xmlChapter.find(".//EOAprintbibliography") is not None:
xmlBibliography = xmlChapter.find(".//EOAprintbibliography")

citekeys = xmlChapter.xpath(".//citekey/text()")
formatted_bibl_info = bib2html.main(
bib_file = (INPUT_DIR / bib_database).with_suffix( ".bib" ),
citekeys = citekeys,
tex_template = BASE_DIR / "bibformat" / "4ht" / "bibliography4ht.tex",
language = strLanguage,
temp_dir = TEMP_DIR / "bib2html" / "chapter_{:02d}-tmp".format( intChapterNumber ),
output_file = TEMP_DIR / "bib2html" / "used_citations-anthology-chapter_{:02d}.html".format( intChapterNumber ),
log_dir = LOG_DIR / SCRIPT_NAME / "bib2html"
)
formatted_citations = formatted_bibl_info['references']
logging.debug( "formatted bibliography:" )
logging.debug( etree.tostring(formatted_references) )
add_bibliography_to_xml(
xmlBibliography,
xmlChapter,
bib_database = bibl_info[1],
citations_json = citations_json,
tmp_citation_filename = tmp_citation_filename
xmlBibliography,
xmlChapter,
citations_json,
formatted_references
)

else:
Expand Down Expand Up @@ -1346,7 +1361,7 @@ def add_bibliography_to_xml(

if bib_type == "monograph":
tmp_citation_filename = "used_citations-monograph"
tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html")
tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename) .with_suffix( ".html" )
with open(tmp_path_html, "r") as formatted_citations:
form_cit = BeautifulSoup(formatted_citations, "html.parser")

Expand All @@ -1357,15 +1372,21 @@ def add_bibliography_to_xml(

if bib_type == "anthology":
tmp_citation_filename = "used_citations-anthology-chapter_{:02d}".format(intChapterNumber)
tmp_path_html = TEMP_DIR / (tmp_citation_filename + ".html")
no_cite_path = TEMP_DIR / (tmp_citation_filename + "_nocitations")
tmp_path_html = (TEMP_DIR / "bib2html" / tmp_citation_filename / BIB2HTML_FILENAME) .with_suffix( ".html" )
# no_cite_path = TEMP_DIR / "bib2html" / (tmp_citation_filename + "_nocitations")
if os.path.exists(tmp_path_html):
with open(tmp_path_html, "r") as formatted_citations:
form_cit = BeautifulSoup(formatted_citations, "html.parser")
else:
logging.debug("no citations in this chapter")
intChapterNumber += 1
continue
'''
elif os.path.exists(no_cite_path):
logging.debug("no citations in this chapter")
intChapterNumber += 1
continue
'''

counter_citations = 1

Expand Down Expand Up @@ -1440,7 +1461,8 @@ def add_bibliography_to_xml(

# [1:-1] to remove parentheses around citations
try:
citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1]
citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text
# citeauthoryear_value = form_cit.select("#citeauthoryear ~ p > span[data-cites='%s']" % string_citekey)[0].text[1:-1]
except IndexError:
logging.error("Could not find {}. Exiting.".format(string_citekey))
sys.exit()
Expand All @@ -1461,6 +1483,7 @@ def add_bibliography_to_xml(
# strCitation = tmp_string3.replace("&lt;", "<")
else:
strCitation = xmlCitation.find("citetext").text
logging.info( "formatted citation: {}".format( strCitation ) )

if xmlCitation.find("./page") is not None and xmlCitation.find("./page").text is not None:
pages_text = libeoaconvert.gettext(xmlCitation.find("./page"))
Expand Down Expand Up @@ -1713,8 +1736,7 @@ def add_bibliography_to_xml(
pickle.dump(data_to_pickle, f, pickle.HIGHEST_PROTOCOL)

grep_command = "grep -A1 -B2 'argument of \\\EOAfn' {}".format(
# INPUT_PATH_NO_EXT
OUTPUT_DIR / (INPUT_PATH_NO_EXT + "-tralics.log")
LOG_DIR / SCRIPT_NAME / (INPUT_PATH.stem + "-tralics.log")
)
grep_command_arguments = shlex.split(grep_command)
grep_result = subprocess.Popen(grep_command_arguments, stdout=subprocess.PIPE)
Expand Down
Loading

0 comments on commit d83aa99

Please sign in to comment.