diff --git a/.gitignore b/.gitignore index 3196ae3..eb96dc9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ /ffmpeg-* /jbig2enc-* /jbig2dec-* +/liberation-fonts-* /leptonica-* /logs /nginx-* diff --git a/README.todo b/README.todo index 485b9f7..62ab79c 100644 --- a/README.todo +++ b/README.todo @@ -8,13 +8,16 @@ bug: bug: - ☐ tag löschen "löschen|Yes?" + ☐ https://github.com/jonaswinkler/paperless-ng/issues/1490 + ☐ https://github.com/paperless-ngx/paperless-ngx/discussions/3090 + Calling OCRmyPDF with args: {'input_file': PosixPath('/scratch/local/plprj/2024-02-19/paperless-ngx34bhqotl/c913a207C78B05A-B130-45F8-9EC4-281AD914ABFA}.PDF'), 'output_file': PosixPath('/scratch/local/plprj/2024-02-19/paperless-0ee3414u/archive.pdf'), 'use_threads': True, 'jobs': 32, 'language': 'deu+eng', 'output_type': 'pdfa', 'progress_bar': False, 'color_conversion_strategy': 'RGB', 'force_ocr': True, 'clean_final': True, 'deskew': True, 'rotate_pages': True, 'rotate_pages_threshold': 12.0, 'sidecar': PosixPath('/scratch/local/plprj/2024-02-19/paperless-0ee3414u/sidecar.txt')} -test: - ☐ zbar ohne imagemagick +upgrade: + ssl-certs in conf umziehen + +bug: + ☐ tag löschen "löschen|Yes?" - ☐ conf.build-scripte - sed '/kmous=/d;/XM=/d;$s/$/XM=,/' encrypted document: [2024-02-19 14:07:32,454] [DEBUG] [paperless.parsing.tesseract] Calling OCRmyPDF with args: {'input_file': PosixPath('/home/wwwutz/paperless/2.5.3/media/documents/originals/0000754.pdf'), 'output_file': PosixPath('/scratch/local/paperless/wwwutz/2.5.3/paperless-huijc19r/archive.pdf'), 'use_threads': True, 'jobs': 32, 'language': 'deu+eng', 'output_type': 'pdfa', 'progress_bar': False, 'color_conversion_strategy': 'RGB', 'force_ocr': True, 'clean_final': True, 'deskew': True, 'rotate_pages': True, 'rotate_pages_threshold': 12.0, 'sidecar': PosixPath('/scratch/local/paperless/wwwutz/2.5.3/paperless-huijc19r/sidecar.txt')} @@ -26,10 +29,11 @@ encrypted document: ___________________ Archive: + ✔ conf.build-scripte @done (24-02-21 13:48) @project(bug) + sed '/kmous=/d;/XM=/d;$s/$/XM=,/' + ✔ zbar ohne imagemagick @done (24-02-21 09:01) @project(test) ✔ ocrmypdf @done (24-02-15 17:01) ( . profile; ocrmypdf --force-ocr ../PDFofDeath/2024-01-29\ 5650025416_B.pdf xxx.pdf ) - --force-ocr was issued, causing transcoding. - The optional dependency 'jbig2' was not found, so some image optimizations could not be attempted. ✔ pngquant @done (24-02-15 13:48) git clone --recursive https://github.com/kornelski/pngquant.git The optional dependency 'pngquant' was not found, so some image optimizations could not be attempted. @@ -51,4 +55,6 @@ Archive: https://www.python.org/downloads/release/python-3118/ ✔ sqlite => 3.45.1 @done (24-02-12 13:18) https://sqlite.org/download.html + --force-ocr was issued, causing transcoding. + The optional dependency 'jbig2' was not found, so some image optimizations could not be attempted. diff --git a/build.profile b/build.profile index 7233b69..e8ff908 100644 --- a/build.profile +++ b/build.profile @@ -23,5 +23,6 @@ BUILD_tesseract=tesseract-5.3.4 BUILD_nginx=nginx-1.25.3 BUILD_jbig2enc=jbig2enc-0.28-17-gea05019 BUILD_jbig2dec=jbig2dec-0.20 +BUILD_libfontttf=liberation-fonts-ttf-2.1.5 BUILD_paperless=paperless-ngx-2.5.3 diff --git a/buildall.sh b/buildall.sh index 38f09a7..709199b 100755 --- a/buildall.sh +++ b/buildall.sh @@ -70,6 +70,7 @@ ln -fs "${logfile}" "${LOGS}/build.log" [ -d "${BUILD_jbig2enc}" ] || ./jbig2enc.build.sh [ -d "${BUILD_jbig2dec}" ] || ./jbig2dec.build.sh [ -d "${BUILD_tesseract}" ] || ./tesseract.build.sh +[ -d "${BUILD_libfontttf}" ] || ./liberation-fonts-ttf.build.sh [ -d "${BUILD_nginx}" ] || ./nginx.build.sh ./nginx.conf.build.sh [ -d "$PROJECT/${BUILD_paperless}" ] || ./paperless-ngx.build.sh diff --git a/liberation-fonts-ttf.build.sh b/liberation-fonts-ttf.build.sh new file mode 100755 index 0000000..1286097 --- /dev/null +++ b/liberation-fonts-ttf.build.sh @@ -0,0 +1,34 @@ +#!/bin/bash +{ +set -x +set -e + +. build.profile + +function B_LIBFONTTTF { + +[ -d "${PREFIX}" ] || { echo "PREFIX unset or not a directory";exit; } + +# SRCURL="https://github.com/liberationfonts/liberation-fonts/files/7261482/liberation-fonts-ttf-2.1.5.tar.gz" +SRCURL="https://beehive.molgen.mpg.de/31b453e0b77bacde410a34a725b34f8a/liberation-fonts-ttf-2.1.5.tar.gz" + +PREFIX="${PREFIX}/${BUILD_libfontttf}" + +BUILD_PKG="${BUILD_libfontttf}" + +mkdir -p "${PREFIX}" + +test -e "${BUILD_PKG}.tar.gz" || wget -nv "${SRCURL}" -O "${BUILD_PKG}.tar.gz" +test -d "${BUILD_PKG}" || mkdir -pv "${BUILD_PKG}" && tar -xf "${BUILD_PKG}.tar.gz" --strip-components=1 -C "${BUILD_PKG}" + +cd "${BUILD_PKG}" + +for font in *.ttf; do + install -v -m 644 "${font}" "${PREFIX}/${font}" +done + +} + +B_LIBFONTTTF + +} diff --git a/paperless.conf.build.sh b/paperless.conf.build.sh index 87b4750..b58e232 100755 --- a/paperless.conf.build.sh +++ b/paperless.conf.build.sh @@ -17,30 +17,42 @@ ln -vfs ${PROJECT}/conf/paperless.conf ${PREFIX} PAPERLESS_SECRET_KEY=${PAPERLESS_SECRET_KEY:-$(cat /dev/urandom | head -c 50 | openssl base64 |head -1)} # paperless.conf aus paperles.conf.example hart generieren -_sed=( - -e "/#PAPERLESS_URL=/ a PAPERLESS_URL=${PAPERLESS_URL}" - -e "/#PAPERLESS_CSRF_TRUSTED_ORIGINS=/ a PAPERLESS_CSRF_TRUSTED_ORIGINS=${PAPERLESS_CSRF_TRUSTED_ORIGINS:-${PAPERLESS_URL}}" - -e "/#PAPERLESS_REDIS=/ a PAPERLESS_REDIS=unix://${DEVSHM}/redis.sock" - -e "/#PAPERLESS_SECRET_KEY=change-me/ a PAPERLESS_SECRET_KEY=${PAPERLESS_SECRET_KEY}" -# -e '/#PAPERLESS_AUTO_LOGIN_USERNAME=/ a PAPERLESS_AUTO_LOGIN_USERNAME=paperless' - -e '/#PAPERLESS_OCR_LANGUAGE=/ a PAPERLESS_OCR_LANGUAGE=deu+eng' - -e '/#PAPERLESS_OCR_MODE=skip/ a PAPERLESS_OCR_MODE=force' - -e "/#PAPERLESS_OCR_LANGUAGE=/ a PAPERLESS_NLTK_DIR=${PROJECT}/data/nltk" - -e "/#PAPERLESS_OCR_CLEAN=/ a PAPERLESS_OCR_CLEAN=clean" - -e "/#PAPERLESS_CONSUMPTION_DIR=/ a PAPERLESS_CONSUMPTION_DIR=${PROJECT}/consume" - -e "/#PAPERLESS_CONSUMPTION_DIR=/ a PAPERLESS_LOGGING_DIR=${PROJECT}/log" - -e '/#PAPERLESS_CONSUMER_RECURSIVE=/ a PAPERLESS_CONSUMER_RECURSIVE=true' - -e '/#PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=/ a PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=true' - -e "/#PAPERLESS_DATA_DIR=/ a PAPERLESS_SCRATCH_DIR=${TMPDIR}" - -e "/#PAPERLESS_DATA_DIR=/ a PAPERLESS_DATA_DIR=${PROJECT}/data" - -e "/#PAPERLESS_STATICDIR=/ a PAPERLESS_STATICDIR=${PREFIX}/static" - -e "/#PAPERLESS_MEDIA_ROOT=/ a PAPERLESS_MEDIA_ROOT=${PROJECT}/media" - -e '/#PAPERLESS_TIME_ZONE=/ a PAPERLESS_TIME_ZONE=Europe/Berlin' - -e '/#PAPERLESS_CONSUMER_ENABLE_BARCODES=/ a PAPERLESS_CONSUMER_ENABLE_BARCODES=true' - -e '/#PAPERLESS_CONSUMER_ENABLE_BARCODES=/ a PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE=true' +_conf=( +# PAPERLESS_AUTO_LOGIN_USERNAME=paperless +PAPERLESS_CONSUMER_ENABLE_ASN_BARCODE=true +PAPERLESS_CONSUMER_ENABLE_BARCODES=true +PAPERLESS_CONSUMER_RECURSIVE=true +PAPERLESS_CONSUMER_SUBDIRS_AS_TAGS=true + +PAPERLESS_CONSUMPTION_DIR=${PROJECT}/consume +PAPERLESS_DATA_DIR=${PROJECT}/data +PAPERLESS_LOGGING_DIR=${PROJECT}/log +PAPERLESS_MEDIA_ROOT=${PROJECT}/media +PAPERLESS_NLTK_DIR=${PROJECT}/data/nltk +PAPERLESS_SCRATCH_DIR=${TMPDIR} +PAPERLESS_STATICDIR=${PREFIX}/static + +PAPERLESS_URL=${PAPERLESS_URL} +PAPERLESS_CSRF_TRUSTED_ORIGINS=${PAPERLESS_CSRF_TRUSTED_ORIGINS:-${PAPERLESS_URL}} + +PAPERLESS_OCR_CLEAN=clean +PAPERLESS_OCR_LANGUAGE=deu+eng +PAPERLESS_OCR_MODE=force + +PAPERLESS_REDIS=unix://${DEVSHM}/redis.sock +PAPERLESS_SECRET_KEY=${PAPERLESS_SECRET_KEY} +PAPERLESS_THUMBNAIL_FONT_NAME=${PROJECT}/${BUILD_libfontttf}/LiberationMono-Regular.ttf +PAPERLESS_TIME_ZONE=Europe/Berlin ) ( + set -x echo "# generated by $0 in $PWD. do not edit." - sed "${_sed[@]}" ${BUILD_paperless}/paperless.conf.example + echo "# paperless.conf.example" + cat ${BUILD_paperless}/paperless.conf.example + echo "# generated by $0 in $PWD" + + for c in "${_conf[@]}"; do + echo "$c" + done ) | tee ${TO}