This repository has been archived on 2026-01-07. You can view files and clone it, but cannot push or open issues or pull requests.
Files
resources/aabook/src/utils.py
2025-03-20 09:53:00 +08:00

67 lines
1.9 KiB
Python

import requests
import re
import os
import json
import time
import csv
import logging
from datetime import datetime
import config
# 从"创建时间 2025-03-08 13:57:00" 中提取时间
def extract_create_time(input_str):
pattern = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}'
match = re.search(pattern, input_str)
if match:
datetime_str = match.group(0)
return datetime_str
else:
return input_str
# 从 "read-374864.html" 中获取数字编号
def extract_page_num(page_str, default_num = 0):
# 定义正则表达式模式
pattern = r'read-(\d+)\.html'
# 使用 re.search 查找匹配项
match = re.search(pattern, page_str)
if match:
number = match.group(1)
return number
else:
return default_num
# 从 "book-5549.html" 中获取数字编号
def extract_book_num(page_str, default_num = 0):
# 定义正则表达式模式
pattern = r'book-(\d+)\.html'
# 使用 re.search 查找匹配项
match = re.search(pattern, page_str)
if match:
number = match.group(1)
return number
else:
return default_num
# 目录页,获取更新时间和字数
def extract_chapter_uptime_words(input_str):
# 定义正则表达式模式
words_pattern = r'字数:(\d+)'
words_match = re.search(words_pattern, input_str)
words = words_match.group(1) if words_match else 0
update_time_pattern = r'更新时间:(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})'
update_time_match = re.search(update_time_pattern, input_str)
update_time = update_time_match.group(1) if update_time_match else datetime.now().strftime("%Y-%m-%d %H:%M:%S")
return words, update_time
# 处理 [都市] 的方括号
def remove_brackets_regex(input_str):
pattern = r'\[(.*?)\]'
match = re.match(pattern, input_str)
if match:
return match.group(1)
return input_str