Files
devops/docker/paperless/plugins/batch_del.py
2025-07-12 13:59:28 +08:00

77 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
import os
import sqlite3
import requests
import time
from requests.auth import HTTPBasicAuth
from requests.exceptions import RequestException
import logging
# Paperless 服务器信息
PAPERLESS_URL = "http://localhost:8000/api"
AUTH = HTTPBasicAuth("admin", "admin") # Basic Auth 认证
# 日志配置
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
# 连接到 SQLite 数据库
DB_PATH = "/usr/src/paperless/data/db.sqlite3"
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# API 请求封装(带重试),支持GETPOST等
def api_request(method, url, data=None, retries=5):
for attempt in range(retries):
try:
response = requests.request(method, url, json=data, auth=AUTH, timeout=5)
if response.status_code in [200, 201, 204]:
return response.json() if response.text else True
elif response.status_code == 404:
logging.warning(f"API 资源未找到: {method} {url}")
return None
else:
logging.error(f"API 请求失败: {method} {url}, 状态码: {response.status_code}, 响应: {response.text}")
except RequestException as e:
logging.error(f"API 请求异常: {method} {url}, 错误: {e}")
if attempt < retries - 1:
logging.warning(f"请求失败,等待 2 秒后重试 ({attempt+1}/{retries})...")
time.sleep(2)
logging.error(f"API 请求最终失败: {method} {url}")
return None
# 从数据库中获取 page_count <= 6 的文档 id
def get_documents_to_delete():
try:
cursor.execute("SELECT id FROM documents_document WHERE page_count <= 6 and (deleted_at IS NULL OR deleted_at = '') ")
rows = cursor.fetchall()
return [row[0] for row in rows]
except sqlite3.Error as e:
logging.error(f"Error querying data: {e}")
return []
# 删除文档
def delete_documents(doc_ids):
succ_count = 0
for doc_id in doc_ids:
url = f"{PAPERLESS_URL}/documents/{doc_id}/"
result = api_request("DELETE", url)
if result:
logging.info(f"✅ 文档 {doc_id} 删除成功")
succ_count += 1
else:
logging.error(f"❌ 文档 {doc_id} 删除失败")
logging.info(f"\ntotal count: {len(doc_ids)}, deleted: {succ_count}")
if __name__ == "__main__":
doc_ids = get_documents_to_delete()
if doc_ids:
delete_documents(doc_ids)
else:
logging.info("没有需要删除的文档")
# 关闭数据库连接
conn.close()