import re import os import json import time import csv from datetime import datetime from urllib.parse import urlparse import logging import config from urllib.parse import urlparse, urlunparse, parse_qs, urlencode # 解析出生日期和地点 def parse_birth_info(text): match = re.match(r"(.+?) (\d{1,2}), (\d{4}) in (.+)", text) if match: return { "birth_date": f"{match.group(1)} {match.group(2)}, {match.group(3)}", "birth_year": match.group(3), "birth_place": match.group(4), } return {"birth_date": text, "birth_year": "", "birth_place": ""} # 解析身高 def parse_height(text): match = re.match(r"(\d+)\s*ft\s*(\d*)\s*in\s*\((\d+)\s*cm\)", text) if match: height_ft = f"{match.group(1)}'{match.group(2)}\"" return {"height_ft": height_ft.strip(), "height_cm": match.group(3)} return {"height_ft": text, "height_cm": ""} # 解析体重 def parse_weight(text): match = re.match(r"(\d+)\s*lbs\s*\((\d+)\s*kg\)", text) if match: return {"weight_lbs": match.group(1), "weight_kg": match.group(2)} return {"weight_lbs": text, "weight_kg": ""} def clean_alias(alias): alias = re.sub(r'\(Age \d+\)', '', alias) # 去掉 (Age XX) return [name.strip() for name in alias.split(',') if name.strip()] def parse_numeric(value): try: return float(value) except (ValueError, TypeError): return 0 # 默认值为 0