modify scripts
This commit is contained in:
@ -128,7 +128,7 @@ def json_to_csv(data, output_file):
|
||||
writer.writerow(row)
|
||||
|
||||
|
||||
|
||||
# javbus 使用,处理多语言url归一化
|
||||
def normalize_url(url: str) -> str:
|
||||
"""
|
||||
标准化URL,移除语言前缀,使不同语言版本的URL保持一致
|
||||
@ -166,7 +166,48 @@ def normalize_url(url: str) -> str:
|
||||
print(f"URL标准化失败: {url}, 错误: {e}")
|
||||
return url # 出错时返回原始URL
|
||||
|
||||
import json
|
||||
# javbus使用,归一化的url转为多语言:
|
||||
def generate_multilang_urls(url, languages=['en', 'ja']):
|
||||
"""
|
||||
根据给定的URL生成多语言版本的URL
|
||||
|
||||
Args:
|
||||
url (str): 原始URL
|
||||
languages (list): 需要生成的语言代码列表
|
||||
|
||||
Returns:
|
||||
list: 包含多语言URL的列表
|
||||
"""
|
||||
try:
|
||||
# 解析URL
|
||||
parsed = urlparse(url)
|
||||
path = parsed.path
|
||||
|
||||
# 处理以斜杠开头的路径
|
||||
if path.startswith('/'):
|
||||
path = path[1:] # 移除开头的斜杠
|
||||
|
||||
# 生成多语言URL
|
||||
result = {}
|
||||
for lang in languages:
|
||||
# 构建新的路径:语言代码 + 原始路径
|
||||
new_path = f'/{lang}/{path}'
|
||||
# 构建新的URL
|
||||
new_url = urlunparse((
|
||||
parsed.scheme,
|
||||
parsed.netloc,
|
||||
new_path,
|
||||
parsed.params,
|
||||
parsed.query,
|
||||
parsed.fragment
|
||||
))
|
||||
result[lang] = new_url
|
||||
|
||||
return result
|
||||
|
||||
except Exception as e:
|
||||
print(f"生成多语言URL时出错: {e}")
|
||||
return {}
|
||||
|
||||
def pretty_print_json(data, n=10, indent=4, sort_keys=False):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user