modify scripts

This commit is contained in:
oscarz
2025-06-03 15:13:55 +08:00
parent e97f49bfb9
commit a4c4fa39d0
10 changed files with 808 additions and 6 deletions

View File

@ -0,0 +1,48 @@
import re
import os
import json
import time
import csv
from datetime import datetime
from urllib.parse import urlparse
import logging
import config
from urllib.parse import urlparse, urlunparse, parse_qs, urlencode
# 解析出生日期和地点
def parse_birth_info(text):
match = re.match(r"(.+?) (\d{1,2}), (\d{4}) in (.+)", text)
if match:
return {
"birth_date": f"{match.group(1)} {match.group(2)}, {match.group(3)}",
"birth_year": match.group(3),
"birth_place": match.group(4),
}
return {"birth_date": text, "birth_year": "", "birth_place": ""}
# 解析身高
def parse_height(text):
match = re.match(r"(\d+)\s*ft\s*(\d*)\s*in\s*\((\d+)\s*cm\)", text)
if match:
height_ft = f"{match.group(1)}'{match.group(2)}\""
return {"height_ft": height_ft.strip(), "height_cm": match.group(3)}
return {"height_ft": text, "height_cm": ""}
# 解析体重
def parse_weight(text):
match = re.match(r"(\d+)\s*lbs\s*\((\d+)\s*kg\)", text)
if match:
return {"weight_lbs": match.group(1), "weight_kg": match.group(2)}
return {"weight_lbs": text, "weight_kg": ""}
def clean_alias(alias):
alias = re.sub(r'\(Age \d+\)', '', alias) # 去掉 (Age XX)
return [name.strip() for name in alias.split(',') if name.strip()]
def parse_numeric(value):
try:
return float(value)
except (ValueError, TypeError):
return 0 # 默认值为 0