#!/usr/bin/env python3 import os import sqlite3 import requests import time from requests.auth import HTTPBasicAuth from requests.exceptions import RequestException import logging # Paperless 服务器信息 PAPERLESS_URL = "http://localhost:8000/api" #AUTH = HTTPBasicAuth("admin", "admin") # Basic Auth 认证, mac上用这个 AUTH = HTTPBasicAuth("admin", "paperless") # Basic Auth 认证,NAS上用这个 # 日志配置 logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s") # 连接到 SQLite 数据库 DB_PATH = "/usr/src/paperless/data/db.sqlite3" conn = sqlite3.connect(DB_PATH) cursor = conn.cursor() # API 请求封装(带重试),支持GET,POST等 def api_request(method, url, data=None, retries=5): for attempt in range(retries): try: response = requests.request(method, url, json=data, auth=AUTH, timeout=5) if response.status_code in [200, 201, 204]: return response.json() if response.text else True elif response.status_code == 404: logging.warning(f"API 资源未找到: {method} {url}") return None else: logging.error(f"API 请求失败: {method} {url}, 状态码: {response.status_code}, 响应: {response.text}") except RequestException as e: logging.error(f"API 请求异常: {method} {url}, 错误: {e}") if attempt < retries - 1: logging.warning(f"请求失败,等待 2 秒后重试 ({attempt+1}/{retries})...") time.sleep(2) logging.error(f"API 请求最终失败: {method} {url}") return None # 从数据库中获取 page_count <= 6 的文档 id def get_documents_to_delete(): try: cursor.execute("SELECT id FROM documents_document WHERE page_count <= 6 and (deleted_at IS NULL OR deleted_at = '') ") rows = cursor.fetchall() return [row[0] for row in rows] except sqlite3.Error as e: logging.error(f"Error querying data: {e}") return [] # 删除文档 def delete_documents(doc_ids): succ_count = 0 for doc_id in doc_ids: url = f"{PAPERLESS_URL}/documents/{doc_id}/" result = api_request("DELETE", url) if result: logging.info(f"✅ 文档 {doc_id} 删除成功") succ_count += 1 else: logging.error(f"❌ 文档 {doc_id} 删除失败") logging.info(f"\ntotal count: {len(doc_ids)}, deleted: {succ_count}") if __name__ == "__main__": doc_ids = get_documents_to_delete() if doc_ids: delete_documents(doc_ids) else: logging.info("没有需要删除的文档") # 关闭数据库连接 conn.close()