diff --git a/README.todo b/README.todo index 42eff0c..eb8730e 100644 --- a/README.todo +++ b/README.todo @@ -2,6 +2,30 @@ dependency tree: dot REAME.dot + +2.16.1: + + + ☐ gs ? + GS_VERSION=10.03.1 + + ☐ SSL für granian + + ☐ paperless-ngx + + ☐ # Set Python environment variables + ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 \ + # Ignore warning from Whitenoise about async iterators + PYTHONWARNINGS="ignore:::django.http.response:517" \ + PNGX_CONTAINERIZED=1 \ + # https://docs.astral.sh/uv/reference/settings/#link-mode + UV_LINK_MODE=copy \ + UV_CACHE_DIR=/cache/uv/ + + ☐ install management commands + docker/install_management_commands.sh + bug: libtiff sollte den LD_LIBRARY_PATH setzen, siehe /home/paperless/2.13.5.1/zbar-0.23.93/profile, sonst findet er zwar die header files zum compilieren aber er lädt /usr/lib/libtiff ... und die ist alt. @@ -11,7 +35,6 @@ bug: ☐ https://github.com/jonaswinkler/paperless-ng/issues/1490 ☐ https://github.com/paperless-ngx/paperless-ngx/discussions/3090 Calling OCRmyPDF with args: {'input_file': PosixPath('/scratch/local/plprj/2024-02-19/paperless-ngx34bhqotl/c913a207C78B05A-B130-45F8-9EC4-281AD914ABFA}.PDF'), 'output_file': PosixPath('/scratch/local/plprj/2024-02-19/paperless-0ee3414u/archive.pdf'), 'use_threads': True, 'jobs': 32, 'language': 'deu+eng', 'output_type': 'pdfa', 'progress_bar': False, 'color_conversion_strategy': 'RGB', 'force_ocr': True, 'clean_final': True, 'deskew': True, 'rotate_pages': True, 'rotate_pages_threshold': 12.0, 'sidecar': PosixPath('/scratch/local/plprj/2024-02-19/paperless-0ee3414u/sidecar.txt')} - ☐ /node_modules/.bin/ng build --configuration production Locale data for 'ar-AR' cannot be found. Using locale data for 'ar'. WARNING [src/locale/messages.ar_AR.xlf]: File target locale ('ar') does not match configured locale ('ar-AR') @@ -38,6 +61,19 @@ encrypted document: ___________________ Archive: + ✔ corepack fragt nach Y für pnpm install @done (25-05-20 11:25) @project(2.16.1) + ✔ checkversions.sh @done (25-05-20 11:25) @project(2.16.1) + ✔ laeuft granian über socket ? Nein. @done (25-05-20 09:37) @project(2.16.1) + ✔ QPDF ? @done (25-05-16 11:59) @project(2.16.1) + QPDF_VERSION=11.9.0 + https://github.com/qpdf/qpdf/releases 12.2.0 + ✔ version checken @done (25-05-16 11:44) @project(2.16.1) + /src/src-ui/src/environments/environment.prod.ts + ✔ Dockerfile @done (25-05-16 11:41) @project(2.16.1) + docker.io/node:20-bookworm-slim + liegt jetzt in t/ + ✔ im docker ist schon ein npm drin ! @done (25-05-16 11:01) @project(2.16.1) + ✔ version 10.8.2, ich habe 10.9.2 @done (25-05-16 10:59) @project(2.16.1) ✔ conf.build-scripte @done (24-02-21 13:48) @project(bug) sed '/kmous=/d;/XM=/d;$s/$/XM=,/' ✔ zbar ohne imagemagick @done (24-02-21 09:01) @project(test) diff --git a/build.profile b/build.profile index 9cb4bb6..09d93f2 100644 --- a/build.profile +++ b/build.profile @@ -1,6 +1,6 @@ -BUILD_TAG=2.15.3 +BUILD_TAG=2.16.1 -BUILD_paperless=paperless-ngx-2.15.3 +BUILD_paperless=paperless-ngx-2.16.1 BUILD_DIR=build BUILD_SRC="${PWD}" @@ -25,10 +25,9 @@ BUILD_sqlite_SRCURL="https://beehive.molgen.mpg.de/46ef8fec4c97ec77ab27659ad27b2 BUILD_redis=redis-7.4.1 BUILD_redis_SRCURL="https://beehive.molgen.mpg.de/360809807c15da120affb356e55c6388/${BUILD_redis}.tar.gz" - -# wget "https://www.python.org/ftp/python/3.12.9/Python-3.12.9.tgz" -BUILD_python=Python-3.12.9 -BUILD_python_SRCURL="https://beehive.molgen.mpg.de/ce613c72fa9b32fb4f109762d61b249b/${BUILD_python}.tgz" +# wget "https://www.python.org/ftp/python/3.12.10/Python-3.12.10.tgz" +BUILD_python=Python-3.12.10 +BUILD_python_SRCURL="https://beehive.molgen.mpg.de/35c03f014408e26e2b06d576c19cac54/${BUILD_python}.tgz" # wget "https://nodejs.org/download/release/v22.15.1/node-v22.15.1.tar.gz" BUILD_node=node-22.15.1 @@ -47,8 +46,8 @@ BUILD_leptonica=leptonica-1.85.0 BUILD_leptonica_SRCURL="https://beehive.molgen.mpg.de/1732e999e2fef8721348edc2b7283224/${BUILD_leptonica}.tar.gz" # SRCURL="https://nginx.org/download/nginx-1.27.3.tar.gz" -BUILD_nginx=nginx-1.27.3 -BUILD_nginx_SRCURL="https://beehive.molgen.mpg.de/89773c781ff0c2cd876b03bbc094c258/${BUILD_nginx}.tar.gz" +#BUILD_nginx=nginx-1.27.3 +#BUILD_nginx_SRCURL="https://beehive.molgen.mpg.de/89773c781ff0c2cd876b03bbc094c258/${BUILD_nginx}.tar.gz" # SRCURL="https://github.com/liberationfonts/liberation-fonts/files/7261482/liberation-fonts-ttf-2.1.5.tar.gz" BUILD_libfontttf=liberation-fonts-ttf-2.1.5 @@ -89,3 +88,5 @@ BUILD_jbig2enc_SRCURL="https://github.com/agl/jbig2enc.git" BUILD_jbig2dec=jbig2dec-0.20 BUILD_jbig2dec_SRCURL="https://github.com/ArtifexSoftware/jbig2dec.git" +BUILD_qpdf=qpdf-12.2.0 +BUILD_qpdf_SRCURL="https://github.com/qpdf/qpdf.git" diff --git a/buildall.sh b/buildall.sh index fdf6670..b755dfc 100755 --- a/buildall.sh +++ b/buildall.sh @@ -15,15 +15,15 @@ PAPERLESS_ADMIN_USER=admin PAPERLESS_ADMIN_PASSWORD=$(head -c 12 < /dev/urandom | openssl base64 |head -1) PAPERLESS_ADMIN_MAIL=admin@localhost. -PAPERLESS_BIND_ADDR=localhost +PAPERLESS_BIND_ADDR=127.0.0.1 PAPERLESS_PORT=8880 +PAPERLESS_WEBSERVER_WORKERS=5 # PAPERLESS_HTTPS=8443 PAPERLESS_URL=https://localhost PAPERLESS_CSRF_TRUSTED_ORIGINS=http://localhost:\${PAPERLESS_PORT} -NGINX_SSL_DHPARAM=\${PROJECT}/certs/dhparam.pem -NGINX_SSL_CERTIFICATE=\${PROJECT}/certs/fullchain.pem -NGINX_SSL_CERTIFICATE_KEY=\${PROJECT}/certs/privkey.pem +# SSL_CERTIFICATE=\${PROJECT}/certs/fullchain.pem +# SSL_CERTIFICATE_KEY=\${PROJECT}/certs/privkey.pem _EOP_ ) ${BUILD_LOCAL} @@ -83,8 +83,9 @@ popd [ -d "${BUILD_DIR}/${BUILD_jbig2dec}" ] || ./jbig2dec.build.sh [ -d "${BUILD_DIR}/${BUILD_tesseract}" ] || ./tesseract.build.sh [ -d "${BUILD_DIR}/${BUILD_libfontttf}" ] || ./liberation-fonts-ttf.build.sh -[ -d "${BUILD_DIR}/${BUILD_nginx}" ] || ./nginx.build.sh -./nginx.conf.build.sh +#[ -d "${BUILD_DIR}/${BUILD_nginx}" ] || ./nginx.build.sh +[ -d "${BUILD_DIR}/${BUILD_qpdf}" ] || ./qpdf.build.sh +# ./nginx.conf.build.sh ./profile.build.sh ./startstop.build.sh [ -d "$PROJECT/${BUILD_paperless}" ] || ./paperless-ngx.build.sh diff --git a/checkversions.sh b/checkversions.sh index 0d12781..9d323e6 100755 --- a/checkversions.sh +++ b/checkversions.sh @@ -174,7 +174,7 @@ version_BUILD_node() { parse 'https://github.com/nodejs/node/releases' '(?<=Version ).*?(?= ).*LTS\)'|cut -d' ' -f1 } version_BUILD_python() { - parselast 'https://raw.githubusercontent.com/python/peps/main/peps/pep-0664.rst' '(?<=^- )[0-9].*(?=: )' + parselast 'https://raw.githubusercontent.com/python/peps/main/peps/pep-0693.rst' '(?<=^- )[0-9].*(?=: )' } version_BUILD_redis() { parse 'https://redis.io/downloads/' '(?<=>).*(?=\s+release notes)' @@ -224,6 +224,9 @@ version_BUILD_libjpegturbo() { version_BUILD_libtiff() { bparse 'https://download.osgeo.org/libtiff/?C=M&O=D' 'tiff-(\S+?).tar.gz' } +version_BUILD_qpdf() { + gdrem qpdf/qpdf 'refs/tags/v[0-9]*'|cut -d' ' -f2 +} function version() { Bi=$1 @@ -269,10 +272,12 @@ BUILD_pngquant BUILD_zbar BUILD_unpaper BUILD_libfontttf +BUILD_qpdf ) _xparts=( -BUILD_leptonica +BUILD_python +BUILD_qpdf ) # list of versions diff --git a/cleanall.sh b/cleanall.sh index 880a613..5588e7a 100644 --- a/cleanall.sh +++ b/cleanall.sh @@ -13,7 +13,7 @@ RMRF=( "${BUILD_jbig2enc}" "${BUILD_leptonica}" "${BUILD_libfontttf}" - "${BUILD_nginx}" +# "${BUILD_nginx}" "${BUILD_node}" "${BUILD_paperless}" "${BUILD_pngquant}" diff --git a/paperless-ngx.asgi.patch b/paperless-ngx.asgi.patch new file mode 100644 index 0000000..23fec7c --- /dev/null +++ b/paperless-ngx.asgi.patch @@ -0,0 +1,141 @@ +diff --git a/src/paperless/asgi.py b/src/paperless/asgi.py +index 8d63c347a..45d8daa3a 100644 +--- a/src/paperless/asgi.py ++++ b/src/paperless/asgi.py +@@ -1,13 +1,134 @@ + import os + +-from django.core.asgi import get_asgi_application ++# from django.core.asgi import get_asgi_application ++ ++import django ++from django.core.handlers.asgi import ASGIHandler ++ ++# https://code.djangoproject.com/ticket/36399 ++assert django.VERSION < (6,0), "Remove ASGIRequest backport." ++ ++class XXXASGIRequest(HttpRequest): ++ """ ++ Custom request subclass that decodes from an ASGI-standard request dict ++ and wraps request body handling. ++ """ ++ ++ # Number of seconds until a Request gives up on trying to read a request ++ # body and aborts. ++ body_receive_timeout = 60 ++ ++ def __init__(self, scope, body_file): ++ self.scope = scope ++ self._post_parse_error = False ++ self._read_started = False ++ self.resolver_match = None ++ self.path = scope["path"] ++ self.script_name = get_script_prefix(scope) ++ if self.script_name: ++ # TODO: Better is-prefix checking, slash handling? ++ self.path_info = scope["path"].removeprefix(self.script_name) ++ else: ++ self.path_info = scope["path"] ++ # HTTP basics. ++ self.method = self.scope["method"].upper() ++ # Ensure query string is encoded correctly. ++ query_string = self.scope.get("query_string", "") ++ if isinstance(query_string, bytes): ++ query_string = query_string.decode() ++ self.META = { ++ "REQUEST_METHOD": self.method, ++ "QUERY_STRING": query_string, ++ "SCRIPT_NAME": self.script_name, ++ "PATH_INFO": self.path_info, ++ # WSGI-expecting code will need these for a while ++ "wsgi.multithread": True, ++ "wsgi.multiprocess": True, ++ } ++ if self.scope.get("client"): ++ self.META["REMOTE_ADDR"] = self.scope["client"][0] ++ self.META["REMOTE_HOST"] = self.META["REMOTE_ADDR"] ++ self.META["REMOTE_PORT"] = self.scope["client"][1] ++ if self.scope.get("server"): ++ self.META["SERVER_NAME"] = self.scope["server"][0] ++ self.META["SERVER_PORT"] = str(self.scope["server"][1]) ++ else: ++ self.META["SERVER_NAME"] = "unknown" ++ self.META["SERVER_PORT"] = "0" ++ # Headers go into META. ++ for name, value in self.scope.get("headers", []): ++ name = name.decode("latin1") ++ if name == "content-length": ++ corrected_name = "CONTENT_LENGTH" ++ elif name == "content-type": ++ corrected_name = "CONTENT_TYPE" ++ else: ++ corrected_name = "HTTP_%s" % name.upper().replace("-", "_") ++ # HTTP/2 say only ASCII chars are allowed in headers, but decode ++ # latin1 just in case. ++ value = value.decode("latin1") ++ if corrected_name == "HTTP_COOKIE": ++ existing = self.META.get("HTTP_COOKIE") ++ if existing is not None: ++ value = existing + value ++ self.META["HTTP_COOKIE"] = value ++ else: ++ if corrected_name in self.META: ++ value = self.META[corrected_name] + "," + value ++ self.META[corrected_name] = value ++ # Pull out request encoding, if provided. ++ self._set_content_type_params(self.META) ++ # Directly assign the body file to be our stream. ++ self._stream = body_file ++ # Other bits. ++ self.resolver_match = None ++ ++ @cached_property ++ def GET(self): ++ return QueryDict(self.META["QUERY_STRING"]) ++ ++ def _get_scheme(self): ++ return self.scope.get("scheme") or super()._get_scheme() ++ ++ def _get_post(self): ++ if not hasattr(self, "_post"): ++ self._load_post_and_files() ++ return self._post ++ ++ def _set_post(self, post): ++ self._post = post ++ ++ def _get_files(self): ++ if not hasattr(self, "_files"): ++ self._load_post_and_files() ++ return self._files ++ ++ POST = property(_get_post, _set_post) ++ FILES = property(_get_files) ++ ++ @cached_property ++ def COOKIES(self): ++ return parse_cookie(self.META.get("HTTP_COOKIE", "")) ++ ++ def close(self): ++ super().close() ++ self._stream.close() ++ ++ ++class BackportASGIHandler(ASGIHandler): ++ request_class = XXXASGIHandler ++ + + # Fetch Django ASGI application early to ensure AppRegistry is populated + # before importing consumers and AuthMiddlewareStack that may import ORM + # models. + + os.environ.setdefault("DJANGO_SETTINGS_MODULE", "paperless.settings") +-django_asgi_app = get_asgi_application() ++# django_asgi_app = get_asgi_application() ++ ++django.setup(set_prefix=False) ++ ++application = BackportASGIHandler() + + from channels.auth import AuthMiddlewareStack # noqa: E402 + from channels.routing import ProtocolTypeRouter # noqa: E402 diff --git a/paperless-ngx.build.sh b/paperless-ngx.build.sh index 0b9c6a1..803d138 100755 --- a/paperless-ngx.build.sh +++ b/paperless-ngx.build.sh @@ -6,12 +6,12 @@ set -e . build.profile . ${PREFIX}/${BUILD_pngquant}/profile +. ${PREFIX}/${BUILD_qpdf}/profile . ${PREFIX}/${BUILD_ffmpeg}/profile . ${PREFIX}/${BUILD_unpaper}/profile . ${PREFIX}/${BUILD_sqlite}/profile . ${PREFIX}/${BUILD_imagemagick}/profile . ${PREFIX}/${BUILD_zbar}/profile -. ${PREFIX}/${BUILD_nginx}/profile . ${PREFIX}/${BUILD_python}/profile . ${PREFIX}/${BUILD_node}/profile . ${PREFIX}/${BUILD_leptonica}/profile @@ -46,6 +46,10 @@ else popd fi +#pushd ${BUILD_PKG} +#patch -p1 --silent --backup --forward --input=${BUILD_SRC}/paperless-ngx.asgi.patch +#popd + # we need paperless-ngx to be extracted since it reads paperless.conf.example # what a mess 8-( pushd ${BUILD_SRC} @@ -76,7 +80,8 @@ pushd src-ui npm update -g pnpm npm install -g corepack@latest corepack enable -pnpm install +# https://github.com/nodejs/corepack/issues/550 +COREPACK_ENABLE_DOWNLOAD_PROMPT=0 pnpm install --no-color #npm update npm -g --no-fund #ulimit -n 512 diff --git a/profile.build.sh b/profile.build.sh index 7bc30c9..d6397a8 100755 --- a/profile.build.sh +++ b/profile.build.sh @@ -19,7 +19,6 @@ PATH=$HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin . ${PROJECT}/${BUILD_python}/profile . ${PROJECT}/${BUILD_node}/profile . ${PROJECT}/${BUILD_redis}/profile -. ${PROJECT}/${BUILD_nginx}/profile . ${PROJECT}/${BUILD_imagemagick}/profile . ${PROJECT}/${BUILD_unpaper}/profile . ${PROJECT}/${BUILD_ffmpeg}/profile @@ -29,6 +28,7 @@ PATH=$HOME/bin:/bin:/sbin:/usr/bin:/usr/sbin . ${PROJECT}/${BUILD_libtiff}/profile . ${PROJECT}/${BUILD_libjpegturbo}/profile . ${PROJECT}/${BUILD_libwebp}/profile +. ${PROJECT}/${BUILD_qpdf}/profile XDG_CACHE_HOME=${XDG_CACHE_HOME} TMPDIR=${TMPDIR} diff --git a/qpdf.build.sh b/qpdf.build.sh new file mode 100755 index 0000000..e66508b --- /dev/null +++ b/qpdf.build.sh @@ -0,0 +1,60 @@ +#!/bin/bash +{ +set -x +set -e +set -u + +. build.profile + +function B_QPDF { + +SRCURL="${BUILD_qpdf_SRCURL}" + +BUILD_PKG=${BUILD_qpdf} + +PREFIX="${PREFIX}/${BUILD_PKG}" + +mkdir -p "${PREFIX}" + +cat >"${PREFIX}"/profile <<-EOF +PATH=${PREFIX}/bin:\$PATH +PKG_CONFIG_PATH=${PROJECT}/${BUILD_PKG}/lib/pkgconfig\${PKG_CONFIG_PATH:+:}\${PKG_CONFIG_PATH:-} +LD_LIBRARY_PATH=${PROJECT}/${BUILD_PKG}/lib\${LD_LIBRARY_PATH:+:}\${LD_LIBRARY_PATH:-} +EOF + +if [ ! -d "${BUILD_PKG}" ]; then + if [ ! -e "${BUILD_PKG}.tar" ]; then + git clone ${SRCURL} --branch "v${BUILD_PKG##*-}" --single-branch "${BUILD_PKG}" + tar cf "${BUILD_PKG}.tar" "${BUILD_PKG}" + else + tar -xf "${BUILD_PKG}.tar" + fi +fi + +pushd "${BUILD_PKG}" + +local _conf=( + -DCMAKE_INSTALL_PREFIX=${PREFIX} + -DCMAKE_INSTALL_LIBDIR=lib + -DCMAKE_COLOR_MAKEFILE=OFF + -DCMAKE_VERBOSE_MAKEFILE=ON +) + +rm -rf b +mkdir b +pushd b +cmake .. "${_conf[@]}" +make -j "${NPROC}" install +popd + +popd + +} + +pushd "${BUILD_DIR}" + +B_QPDF + +popd + +} diff --git a/startstop.build.sh b/startstop.build.sh index 5e94bcc..787d2ff 100755 --- a/startstop.build.sh +++ b/startstop.build.sh @@ -16,7 +16,7 @@ export XDG_RUNTIME_DIR=/run/user/\$(id -u \$USER) . profile function rm_pidfiles() { - rm -fv "\${PIDFILE}" "\${PGIDFILE}" + rm -fv "\${PIDFILE}" "\${PGIDFILE}" "\${@}" } function pwait() { @@ -75,6 +75,64 @@ srv_gunicorn_start() { gunicorn "\${_opts[@]}" } +#### granian #### + +srv_granian_start() { +set -x + cd "${PROJECT}/paperless-ngx/src" + rm -fv "${DEVSHM}/granian.pid" + _opts=( +# no unix domain socket support in granian +# https://github.com/emmett-framework/granian/issues/97 +# https://github.com/paperless-ngx/paperless-ngx/discussions/9941 +# --host "unix:${DEVSHM}/granian.sock" + --host ${PAPERLESS_BIND_ADDR} + --port ${PAPERLESS_PORT} + ${SSL_CERTIFICATE:+--ssl-certificate ${SSL_CERTIFICATE}} + ${SSL_CERTIFICATE_KEY:+--ssl-keyfile ${SSL_CERTIFICATE_KEY}} + --workers ${PAPERLESS_WEBSERVER_WORKERS:-1} + --interface asginl + --pid-file "${DEVSHM}/granian.pid" + --ws + --loop uvloop + --access-log + paperless.asgi:application + ) + trap rm_pidfiles EXIT + granian "\${_opts[@]}" +} + +srv_granian_stop() { + if [[ -s "${DEVSHM}/granian.pid" ]]; then + local pid + read -a pid < "${DEVSHM}/granian.pid" + kill -- \${pid} + echo "waiting for process \${pid} to die" + tail -f --pid=\${pid} /dev/null + else + echo "# no pid file found: ${DEVSHM}/granian.pid" + fi + if [[ -s "\${PGIDFILE}" ]]; then + local pgid + read -a pgid < "\${PGIDFILE}" + echo "killing process group \${pgid}" + kill -- -\${pgid} + echo "waiting for process group \${pgid} to die" + pwait --echo --pgroup \${pgid} + else + if [[ -s "\${PIDFILE}" ]]; then + local pid + read -a pid < "\${PIDFILE}" + kill -- \${pid} + echo "waiting for process \${pid} to die" + tail -f --pid=\${pid} /dev/null + else + echo "# no pid file found: \${PGIDFILE}" + fi + fi + rm -fv "${DEVSHM}/granian.pid" +} + #### consumer #### srv_consumer_start() { @@ -217,7 +275,8 @@ cmd="\${1:-help}" srv="\${2:-all}" if [[ \${srv} = 'all' ]]; then - _srv=(redis nginx gunicorn consumer scheduler worker) + # _srv=(redis nginx granian consumer scheduler worker) + _srv=(redis granian consumer scheduler worker) if [[ \${cmd} = 'start' ]]; then srv_migrate