78 lines
2.7 KiB
Python
78 lines
2.7 KiB
Python
#!/usr/bin/env python3
|
||
import os
|
||
import sqlite3
|
||
import requests
|
||
import time
|
||
from requests.auth import HTTPBasicAuth
|
||
from requests.exceptions import RequestException
|
||
import logging
|
||
|
||
# Paperless 服务器信息
|
||
PAPERLESS_URL = "http://localhost:8000/api"
|
||
#AUTH = HTTPBasicAuth("admin", "admin") # Basic Auth 认证, mac上用这个
|
||
AUTH = HTTPBasicAuth("admin", "paperless") # Basic Auth 认证,NAS上用这个
|
||
|
||
# 日志配置
|
||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||
|
||
# 连接到 SQLite 数据库
|
||
DB_PATH = "/usr/src/paperless/data/db.sqlite3"
|
||
conn = sqlite3.connect(DB_PATH)
|
||
cursor = conn.cursor()
|
||
|
||
# API 请求封装(带重试),支持GET,POST等
|
||
def api_request(method, url, data=None, retries=5):
|
||
for attempt in range(retries):
|
||
try:
|
||
response = requests.request(method, url, json=data, auth=AUTH, timeout=5)
|
||
|
||
if response.status_code in [200, 201, 204]:
|
||
return response.json() if response.text else True
|
||
elif response.status_code == 404:
|
||
logging.warning(f"API 资源未找到: {method} {url}")
|
||
return None
|
||
else:
|
||
logging.error(f"API 请求失败: {method} {url}, 状态码: {response.status_code}, 响应: {response.text}")
|
||
|
||
except RequestException as e:
|
||
logging.error(f"API 请求异常: {method} {url}, 错误: {e}")
|
||
|
||
if attempt < retries - 1:
|
||
logging.warning(f"请求失败,等待 2 秒后重试 ({attempt+1}/{retries})...")
|
||
time.sleep(2)
|
||
|
||
logging.error(f"API 请求最终失败: {method} {url}")
|
||
return None
|
||
|
||
# 从数据库中获取 page_count <= 6 的文档 id
|
||
def get_documents_to_delete():
|
||
try:
|
||
cursor.execute("SELECT id FROM documents_document WHERE page_count <= 6 and (deleted_at IS NULL OR deleted_at = '') ")
|
||
rows = cursor.fetchall()
|
||
return [row[0] for row in rows]
|
||
except sqlite3.Error as e:
|
||
logging.error(f"Error querying data: {e}")
|
||
return []
|
||
|
||
# 删除文档
|
||
def delete_documents(doc_ids):
|
||
succ_count = 0
|
||
for doc_id in doc_ids:
|
||
url = f"{PAPERLESS_URL}/documents/{doc_id}/"
|
||
result = api_request("DELETE", url)
|
||
if result:
|
||
logging.info(f"✅ 文档 {doc_id} 删除成功")
|
||
succ_count += 1
|
||
else:
|
||
logging.error(f"❌ 文档 {doc_id} 删除失败")
|
||
logging.info(f"\ntotal count: {len(doc_ids)}, deleted: {succ_count}")
|
||
|
||
if __name__ == "__main__":
|
||
doc_ids = get_documents_to_delete()
|
||
if doc_ids:
|
||
delete_documents(doc_ids)
|
||
else:
|
||
logging.info("没有需要删除的文档")
|
||
|
||
# 关闭数据库连接
|
||
conn.close() |