add some scripts.

This commit is contained in:
2024-11-22 14:18:08 +08:00
parent a261a9e7ed
commit ba31911477
5 changed files with 387 additions and 0 deletions

View File

@ -0,0 +1,51 @@
from yt_dlp.extractor.pornhub import PornHubIE
import re
# 不起作用,还是修改了源码
class CustomPornHubIE(PornHubIE):
def _real_extract(self, url):
# 打印当前解析的 URL
self.to_screen(f"调试: 处理的 URL 是: {url}")
# 调用父类的提取逻辑
original_data = super()._real_extract(url)
# 下载网页内容
webpage = self._download_webpage(url, url)
self.to_screen(f"调试: 收藏原始内容: {webpage}")
# 提取收藏次数favoritesCounter 的内容)
favorites_raw = self._search_regex(
r'<span class="favoritesCounter">\s*([\dKkMm,. ]+)\s*</span>',
webpage, 'favorites count', fatal=False)
# 调试:打印收藏原始内容
self.to_screen(f"调试: 收藏原始内容: {favorites_raw}")
self.to_screen(f"调试: 收藏原始内容: {original_data}")
# 如果找到收藏次数,则进行解析和单位转换
if favorites_raw:
# 清理空格、换行,并解析数字和单位
favorites_cleaned = favorites_raw.strip().replace(',', '')
favorites_count = self._convert_to_number(favorites_cleaned)
original_data['favorites_count'] = favorites_count
else:
original_data['favorites_count'] = 0
return original_data
def _convert_to_number(self, value):
"""
将字符串解析为实际数字,支持 K和 M百万等单位
"""
match = re.match(r'^([\d.]+)([KkMm]?)$', value)
if not match:
return None
number = float(match.group(1))
unit = match.group(2).upper()
if unit == 'K': # 千
return int(number * 1000)
elif unit == 'M': # 百万
return int(number * 1000000)
return int(number) # 无单位,直接返回数字