diff --git a/find_chapters.py b/find_chapters.py index 71353c2..a647b78 100644 --- a/find_chapters.py +++ b/find_chapters.py @@ -2,7 +2,7 @@ # -*- coding: utf-8; mode: python -*- """ -Find pages of a chapter in an LaTeX aux file +Find pages of a chapter in an LaTeX toc file This is from Hack #92 from the O'Reilly Book "LaTeX Hacks" (9783897214774) where it was implemented in Perl. It depends on a @@ -16,7 +16,7 @@ uploading chapter files and prefixing them with chapter info could be further automatted. However, output needs to be checked! -Maybe as a preprocessing step (and not to schlepp the aux file +Maybe as a preprocessing step (and not to schlepp the toc file around), include the chapter page information in a config file before uploading (or use a PDF metadata field for that?) """ @@ -40,6 +40,10 @@ # \newlabel{chapter01_caraffa}{{1}{5}{Objects of Value: Challenging Conventional Hierarchies in the Photo Archive\EOAauthor {Costanza Caraffa}}{section*.4}{}} NEWLABEL_REGEX = r"\\newlabel\{" + CHAPTER_LABEL_REGEX + "\}\{\{\d+\}\{(\d+)\}" +# toc +# \contentsline {chapter}{\numberline {6}Der Kontext 2: Die Renaissance-Kommentare zu Pseudo-Proklos’ \emph {Sphaera}}{117}{chapter.6}% +TOC_REGEX = r"\\contentsline \{chapter\}\{.*?}\{(\d+)\}\{.*?\}" + # declare rules how to name chapter files def get_labels_from_xml(xmlfile): @@ -69,7 +73,7 @@ def main(): """The main bit""" parser = argparse.ArgumentParser() - parser.add_argument("auxfile", help="The auxfile that is used to read the page numbers from.") + parser.add_argument("tocfile", help="The tocfile that is used to read the page numbers from.") parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters.") parser.add_argument("-t", "--teifile", help="The TEI file for getting the labels of chapters.") parser.add_argument("-o", "--offset", help="An offset to be added to the pages. You have to manually find out the number. Should be around 7.", default=0) @@ -77,20 +81,18 @@ def main(): offset = int(args.offset) - with open(args.auxfile) as aux_file: - aux_lines = aux_file.read() + with open(args.tocfile) as toc_file: + toc_lines = toc_file.read() if args.teifile: references = [] chapter_ids = get_labels_from_xml(args.teifile) for chapter_id in chapter_ids: newlabel_regex = r"\\newlabel\{" + chapter_id + "\}\{\{\d+\}\{(\d+)\}" - ding = re.findall(newlabel_regex, aux_lines)[0] + ding = re.findall(newlabel_regex, toc_lines)[0] references.append(ding) else: - references = re.findall(NEWLABEL_REGEX, aux_lines) - - print(references) + references = re.findall(TOC_REGEX, toc_lines) if not references: logging.error("No chapter marks found. Exiting")