Skip to content
Permalink
Browse files

Exit strategy

  • Loading branch information...
kthoden committed Nov 20, 2018
1 parent 5504189 commit c53cfd4cea644d52cea40bddf6bcabf0d1c4f312
Showing with 7 additions and 2 deletions.
  1. +7 −2 find_chapters.py
@@ -26,13 +26,14 @@
__author__ = "kthoden@mpiwg-berlin.mpg.de"

import re
import sys
import argparse
import logging
from PyPDF2 import PdfFileWriter, PdfFileReader

logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s')

CHAPTER_LABEL_REGEX = r"chapter\d+_.*?"
CHAPTER_LABEL_REGEX = r"chap\d+_.*?"
# \newlabel{chapter01_caraffa}{{1}{5}{Objects of Value: Challenging Conventional Hierarchies in the Photo Archive\EOAauthor {Costanza Caraffa}}{section*.4}{}}
NEWLABEL_REGEX = r"\\newlabel\{" + CHAPTER_LABEL_REGEX + "\}\{\{\d+\}\{(\d+)\}"

@@ -55,14 +56,18 @@ def main():

parser = argparse.ArgumentParser()
parser.add_argument("auxfile", help="The auxfile that is used to read the page numbers from.")
parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters..")
parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters.")
args = parser.parse_args()

with open(args.auxfile) as aux_file:
aux_lines = aux_file.read()

references = re.findall(NEWLABEL_REGEX, aux_lines)

if not references:
logging.error("No chapter marks found. Exiting")
sys.exit()

input1 = PdfFileReader(open(args.pdffile, "rb"))
logging.debug("Input has %d pages.", input1.getNumPages())

0 comments on commit c53cfd4

Please sign in to comment.
You can’t perform that action at this time.