Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Exit strategy
  • Loading branch information
Klaus Thoden committed Nov 20, 2018
1 parent 5504189 commit c53cfd4
Showing 1 changed file with 7 additions and 2 deletions.
9 changes: 7 additions & 2 deletions find_chapters.py
Expand Up @@ -26,13 +26,14 @@
__author__ = "kthoden@mpiwg-berlin.mpg.de" __author__ = "kthoden@mpiwg-berlin.mpg.de"


import re import re
import sys
import argparse import argparse
import logging import logging
from PyPDF2 import PdfFileWriter, PdfFileReader from PyPDF2 import PdfFileWriter, PdfFileReader


logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s') logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')


CHAPTER_LABEL_REGEX = r"chapter\d+_.*?" CHAPTER_LABEL_REGEX = r"chap\d+_.*?"
# \newlabel{chapter01_caraffa}{{1}{5}{Objects of Value: Challenging Conventional Hierarchies in the Photo Archive\EOAauthor {Costanza Caraffa}}{section*.4}{}} # \newlabel{chapter01_caraffa}{{1}{5}{Objects of Value: Challenging Conventional Hierarchies in the Photo Archive\EOAauthor {Costanza Caraffa}}{section*.4}{}}
NEWLABEL_REGEX = r"\\newlabel\{" + CHAPTER_LABEL_REGEX + "\}\{\{\d+\}\{(\d+)\}" NEWLABEL_REGEX = r"\\newlabel\{" + CHAPTER_LABEL_REGEX + "\}\{\{\d+\}\{(\d+)\}"


Expand All @@ -55,14 +56,18 @@ def main():


parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("auxfile", help="The auxfile that is used to read the page numbers from.") parser.add_argument("auxfile", help="The auxfile that is used to read the page numbers from.")
parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters..") parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters.")
args = parser.parse_args() args = parser.parse_args()


with open(args.auxfile) as aux_file: with open(args.auxfile) as aux_file:
aux_lines = aux_file.read() aux_lines = aux_file.read()


references = re.findall(NEWLABEL_REGEX, aux_lines) references = re.findall(NEWLABEL_REGEX, aux_lines)


if not references:
logging.error("No chapter marks found. Exiting")
sys.exit()

input1 = PdfFileReader(open(args.pdffile, "rb")) input1 = PdfFileReader(open(args.pdffile, "rb"))
logging.debug("Input has %d pages.", input1.getNumPages()) logging.debug("Input has %d pages.", input1.getNumPages())


Expand Down

0 comments on commit c53cfd4

Please sign in to comment.