diff --git a/find_chapters.py b/find_chapters.py index 108690d..5e97167 100644 --- a/find_chapters.py +++ b/find_chapters.py @@ -26,13 +26,14 @@ __author__ = "kthoden@mpiwg-berlin.mpg.de" import re +import sys import argparse import logging from PyPDF2 import PdfFileWriter, PdfFileReader logging.basicConfig(level=logging.DEBUG, format=' %(asctime)s - %(levelname)s - %(message)s') -CHAPTER_LABEL_REGEX = r"chapter\d+_.*?" +CHAPTER_LABEL_REGEX = r"chap\d+_.*?" # \newlabel{chapter01_caraffa}{{1}{5}{Objects of Value: Challenging Conventional Hierarchies in the Photo Archive\EOAauthor {Costanza Caraffa}}{section*.4}{}} NEWLABEL_REGEX = r"\\newlabel\{" + CHAPTER_LABEL_REGEX + "\}\{\{\d+\}\{(\d+)\}" @@ -55,7 +56,7 @@ def main(): parser = argparse.ArgumentParser() parser.add_argument("auxfile", help="The auxfile that is used to read the page numbers from.") - parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters..") + parser.add_argument("pdffile", help="The PDF file that is going to be split in chapters.") args = parser.parse_args() with open(args.auxfile) as aux_file: @@ -63,6 +64,10 @@ def main(): references = re.findall(NEWLABEL_REGEX, aux_lines) + if not references: + logging.error("No chapter marks found. Exiting") + sys.exit() + input1 = PdfFileReader(open(args.pdffile, "rb")) logging.debug("Input has %d pages.", input1.getNumPages())