modify scripts

This commit is contained in:
2025-07-12 13:59:28 +08:00
parent 96790a8365
commit 83d0745695
5 changed files with 436 additions and 0 deletions

View File

@ -0,0 +1,77 @@
#!/usr/bin/env python3
import os
import sqlite3
import requests
import time
from requests.auth import HTTPBasicAuth
from requests.exceptions import RequestException
import logging
# Paperless 服务器信息
PAPERLESS_URL = "http://localhost:8000/api"
AUTH = HTTPBasicAuth("admin", "admin") # Basic Auth 认证
# 日志配置
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
# 连接到 SQLite 数据库
DB_PATH = "/usr/src/paperless/data/db.sqlite3"
conn = sqlite3.connect(DB_PATH)
cursor = conn.cursor()
# API 请求封装(带重试),支持GETPOST等
def api_request(method, url, data=None, retries=5):
for attempt in range(retries):
try:
response = requests.request(method, url, json=data, auth=AUTH, timeout=5)
if response.status_code in [200, 201, 204]:
return response.json() if response.text else True
elif response.status_code == 404:
logging.warning(f"API 资源未找到: {method} {url}")
return None
else:
logging.error(f"API 请求失败: {method} {url}, 状态码: {response.status_code}, 响应: {response.text}")
except RequestException as e:
logging.error(f"API 请求异常: {method} {url}, 错误: {e}")
if attempt < retries - 1:
logging.warning(f"请求失败,等待 2 秒后重试 ({attempt+1}/{retries})...")
time.sleep(2)
logging.error(f"API 请求最终失败: {method} {url}")
return None
# 从数据库中获取 page_count <= 6 的文档 id
def get_documents_to_delete():
try:
cursor.execute("SELECT id FROM documents_document WHERE page_count <= 6 and (deleted_at IS NULL OR deleted_at = '') ")
rows = cursor.fetchall()
return [row[0] for row in rows]
except sqlite3.Error as e:
logging.error(f"Error querying data: {e}")
return []
# 删除文档
def delete_documents(doc_ids):
succ_count = 0
for doc_id in doc_ids:
url = f"{PAPERLESS_URL}/documents/{doc_id}/"
result = api_request("DELETE", url)
if result:
logging.info(f"✅ 文档 {doc_id} 删除成功")
succ_count += 1
else:
logging.error(f"❌ 文档 {doc_id} 删除失败")
logging.info(f"\ntotal count: {len(doc_ids)}, deleted: {succ_count}")
if __name__ == "__main__":
doc_ids = get_documents_to_delete()
if doc_ids:
delete_documents(doc_ids)
else:
logging.info("没有需要删除的文档")
# 关闭数据库连接
conn.close()