Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Using toc file instead of aux
  • Loading branch information
kthoden committed Mar 29, 2019
1 parent ee0e15c commit 8dbf505
Showing 1 changed file with 11 additions and 9 deletions.
20 changes: 11 additions & 9 deletions find_chapters.py
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8; mode: python -*-

"""
Find pages of a chapter in an LaTeX aux file
Find pages of a chapter in an LaTeX toc file
This is from Hack #92 from the O'Reilly Book "LaTeX Hacks"
(9783897214774) where it was implemented in Perl. It depends on a
Expand All @@ -16,7 +16,7 @@
uploading chapter files and prefixing them with chapter info could be
further automatted. However, output needs to be checked!
Maybe as a preprocessing step (and not to schlepp the aux file
Maybe as a preprocessing step (and not to schlepp the toc file
around), include the chapter page information in a config file before
uploading (or use a PDF metadata field for that?)
"""
Expand All @@ -40,6 +40,10 @@
# \newlabel{chapter01_caraffa}{{1}{5}{Objects of Value: Challenging Conventional Hierarchies in the Photo Archive\EOAauthor {Costanza Caraffa}}{section*.4}{}}
NEWLABEL_REGEX = r"\\newlabel\{" + CHAPTER_LABEL_REGEX + "\}\{\{\d+\}\{(\d+)\}"

# toc
# \contentsline {chapter}{\numberline {6}Der Kontext 2: Die Renaissance-Kommentare zu Pseudo-Proklos’ \emph {Sphaera}}{117}{chapter.6}%
TOC_REGEX = r"\\contentsline \{chapter\}\{.*?}\{(\d+)\}\{.*?\}"

# declare rules how to name chapter files

def get_labels_from_xml(xmlfile):
Expand Down Expand Up @@ -69,28 +73,26 @@ def main():
"""The main bit"""

parser = argparse.ArgumentParser()
parser.add_argument("auxfile", help="The auxfile that is used to read the page numbers from.")
parser.add_argument("tocfile", help="The tocfile that is used to read the page numbers from.")
parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters.")
parser.add_argument("-t", "--teifile", help="The TEI file for getting the labels of chapters.")
parser.add_argument("-o", "--offset", help="An offset to be added to the pages. You have to manually find out the number. Should be around 7.", default=0)
args = parser.parse_args()

offset = int(args.offset)

with open(args.auxfile) as aux_file:
aux_lines = aux_file.read()
with open(args.tocfile) as toc_file:
toc_lines = toc_file.read()

if args.teifile:
references = []
chapter_ids = get_labels_from_xml(args.teifile)
for chapter_id in chapter_ids:
newlabel_regex = r"\\newlabel\{" + chapter_id + "\}\{\{\d+\}\{(\d+)\}"
ding = re.findall(newlabel_regex, aux_lines)[0]
ding = re.findall(newlabel_regex, toc_lines)[0]
references.append(ding)
else:
references = re.findall(NEWLABEL_REGEX, aux_lines)

print(references)
references = re.findall(TOC_REGEX, toc_lines)

if not references:
logging.error("No chapter marks found. Exiting")
Expand Down

0 comments on commit 8dbf505

Please sign in to comment.