49 lines
1.4 KiB
Python
49 lines
1.4 KiB
Python
import re
|
|
import os
|
|
import json
|
|
import time
|
|
import csv
|
|
from datetime import datetime
|
|
from urllib.parse import urlparse
|
|
import logging
|
|
import config
|
|
from urllib.parse import urlparse, urlunparse, parse_qs, urlencode
|
|
|
|
|
|
# 解析出生日期和地点
|
|
def parse_birth_info(text):
|
|
match = re.match(r"(.+?) (\d{1,2}), (\d{4}) in (.+)", text)
|
|
if match:
|
|
return {
|
|
"birth_date": f"{match.group(1)} {match.group(2)}, {match.group(3)}",
|
|
"birth_year": match.group(3),
|
|
"birth_place": match.group(4),
|
|
}
|
|
return {"birth_date": text, "birth_year": "", "birth_place": ""}
|
|
|
|
# 解析身高
|
|
def parse_height(text):
|
|
match = re.match(r"(\d+)\s*ft\s*(\d*)\s*in\s*\((\d+)\s*cm\)", text)
|
|
if match:
|
|
height_ft = f"{match.group(1)}'{match.group(2)}\""
|
|
return {"height_ft": height_ft.strip(), "height_cm": match.group(3)}
|
|
return {"height_ft": text, "height_cm": ""}
|
|
|
|
# 解析体重
|
|
def parse_weight(text):
|
|
match = re.match(r"(\d+)\s*lbs\s*\((\d+)\s*kg\)", text)
|
|
if match:
|
|
return {"weight_lbs": match.group(1), "weight_kg": match.group(2)}
|
|
return {"weight_lbs": text, "weight_kg": ""}
|
|
|
|
def clean_alias(alias):
|
|
alias = re.sub(r'\(Age \d+\)', '', alias) # 去掉 (Age XX)
|
|
return [name.strip() for name in alias.split(',') if name.strip()]
|
|
|
|
|
|
def parse_numeric(value):
|
|
try:
|
|
return float(value)
|
|
except (ValueError, TypeError):
|
|
return 0 # 默认值为 0
|