modify scripts

This commit is contained in:
2025-07-12 13:59:28 +08:00
parent 96790a8365
commit 83d0745695
5 changed files with 436 additions and 0 deletions

View File

@ -0,0 +1,64 @@
-------------------------------------------------------
------------------- paperless 无纸化pdf管理 ------------
-------------------------------------------------------
## 最好不要用命令使用docker-compose.yml来创建需要制定后端使用的数据库以及redis
docker run -itd \
--name paperless \
--network devops \
--platform linux/x86_64 \
-e TZ="Asia/Shanghai" \
-v /etc/localtime:/etc/localtime:ro \
-v "$(pwd)/dockers/paperless/pdfs:/usr/src/paperless/data" \
-v "$(pwd)/dockers/paperless/db:/usr/src/paperless/db" \
-e USERMAP_UID=1000 -e USERMAP_GID=1000 \
-p 8000:8000 \
ghcr.io/paperless-ngx/paperless-ngx
# 容器创建好之后,要手动设置密码(二选一操作,目前设置的 admin / admin
docker compose run --rm webserver createsuperuser
python3 manage.py createsuperuser
# 已有文档,放在指定目录下,等系统自动加载(或者手工启动)
cd /path/to/paperless/src/
python3 manage.py document_consumer
# 自动解析文件名
https://docs.paperless-ngx.com/advanced_usage/#file-name-handling
https://docs.paperless-ngx.com/configuration/#PAPERLESS_POST_CONSUME_SCRIPT
environment:
PAPERLESS_POST_CONSUME_SCRIPT: "/usr/src/paperless/scripts/parse_filename.py"
paperless 默认不会删除重复的文件,这会导致如果重复添加,会不停扫描,加载,报错。没找到配置,直接修改源码解决:
/usr/src/paperless/src/documents/consumer.py
def pre_check_duplicate(self):
"""
Using the MD5 of the file, check this exact file doesn't already exist
"""
with open(self.input_doc.original_file, "rb") as f:
checksum = hashlib.md5(f.read()).hexdigest()
existing_doc = Document.global_objects.filter(
Q(checksum=checksum) | Q(archive_checksum=checksum),
)
if existing_doc.exists():
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS
log_msg = f"Not consuming {self.filename}: It is a duplicate of {existing_doc.get().title} (#{existing_doc.get().pk})."
if existing_doc.first().deleted_at is not None:
msg = ConsumerStatusShortMessage.DOCUMENT_ALREADY_EXISTS_IN_TRASH
log_msg += " Note: existing document is in the trash."
## 修改这里,让它删除重复文件。
if settings.CONSUMER_DELETE_DUPLICATES or True:
os.unlink(self.input_doc.original_file)
self._fail(
msg,
log_msg,
)