modify scripts
This commit is contained in:
77
docker/paperless/plugins/batch_del.py
Normal file
77
docker/paperless/plugins/batch_del.py
Normal file
@ -0,0 +1,77 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import sqlite3
|
||||
import requests
|
||||
import time
|
||||
from requests.auth import HTTPBasicAuth
|
||||
from requests.exceptions import RequestException
|
||||
import logging
|
||||
|
||||
# Paperless 服务器信息
|
||||
PAPERLESS_URL = "http://localhost:8000/api"
|
||||
AUTH = HTTPBasicAuth("admin", "admin") # Basic Auth 认证
|
||||
|
||||
# 日志配置
|
||||
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
|
||||
|
||||
# 连接到 SQLite 数据库
|
||||
DB_PATH = "/usr/src/paperless/data/db.sqlite3"
|
||||
conn = sqlite3.connect(DB_PATH)
|
||||
cursor = conn.cursor()
|
||||
|
||||
# API 请求封装(带重试),支持GET,POST等
|
||||
def api_request(method, url, data=None, retries=5):
|
||||
for attempt in range(retries):
|
||||
try:
|
||||
response = requests.request(method, url, json=data, auth=AUTH, timeout=5)
|
||||
|
||||
if response.status_code in [200, 201, 204]:
|
||||
return response.json() if response.text else True
|
||||
elif response.status_code == 404:
|
||||
logging.warning(f"API 资源未找到: {method} {url}")
|
||||
return None
|
||||
else:
|
||||
logging.error(f"API 请求失败: {method} {url}, 状态码: {response.status_code}, 响应: {response.text}")
|
||||
|
||||
except RequestException as e:
|
||||
logging.error(f"API 请求异常: {method} {url}, 错误: {e}")
|
||||
|
||||
if attempt < retries - 1:
|
||||
logging.warning(f"请求失败,等待 2 秒后重试 ({attempt+1}/{retries})...")
|
||||
time.sleep(2)
|
||||
|
||||
logging.error(f"API 请求最终失败: {method} {url}")
|
||||
return None
|
||||
|
||||
# 从数据库中获取 page_count <= 6 的文档 id
|
||||
def get_documents_to_delete():
|
||||
try:
|
||||
cursor.execute("SELECT id FROM documents_document WHERE page_count <= 6 and (deleted_at IS NULL OR deleted_at = '') ")
|
||||
rows = cursor.fetchall()
|
||||
return [row[0] for row in rows]
|
||||
except sqlite3.Error as e:
|
||||
logging.error(f"Error querying data: {e}")
|
||||
return []
|
||||
|
||||
# 删除文档
|
||||
def delete_documents(doc_ids):
|
||||
succ_count = 0
|
||||
for doc_id in doc_ids:
|
||||
url = f"{PAPERLESS_URL}/documents/{doc_id}/"
|
||||
result = api_request("DELETE", url)
|
||||
if result:
|
||||
logging.info(f"✅ 文档 {doc_id} 删除成功")
|
||||
succ_count += 1
|
||||
else:
|
||||
logging.error(f"❌ 文档 {doc_id} 删除失败")
|
||||
logging.info(f"\ntotal count: {len(doc_ids)}, deleted: {succ_count}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
doc_ids = get_documents_to_delete()
|
||||
if doc_ids:
|
||||
delete_documents(doc_ids)
|
||||
else:
|
||||
logging.info("没有需要删除的文档")
|
||||
|
||||
# 关闭数据库连接
|
||||
conn.close()
|
||||
Reference in New Issue
Block a user