add some scripts.
This commit is contained in:
51
scripts/pornhub/custom_pornhub.py
Normal file
51
scripts/pornhub/custom_pornhub.py
Normal file
@ -0,0 +1,51 @@
|
||||
from yt_dlp.extractor.pornhub import PornHubIE
|
||||
import re
|
||||
|
||||
# 不起作用,还是修改了源码
|
||||
class CustomPornHubIE(PornHubIE):
|
||||
def _real_extract(self, url):
|
||||
# 打印当前解析的 URL
|
||||
self.to_screen(f"调试: 处理的 URL 是: {url}")
|
||||
|
||||
# 调用父类的提取逻辑
|
||||
original_data = super()._real_extract(url)
|
||||
|
||||
# 下载网页内容
|
||||
webpage = self._download_webpage(url, url)
|
||||
self.to_screen(f"调试: 收藏原始内容: {webpage}")
|
||||
|
||||
# 提取收藏次数(favoritesCounter 的内容)
|
||||
favorites_raw = self._search_regex(
|
||||
r'<span class="favoritesCounter">\s*([\dKkMm,. ]+)\s*</span>',
|
||||
webpage, 'favorites count', fatal=False)
|
||||
|
||||
# 调试:打印收藏原始内容
|
||||
self.to_screen(f"调试: 收藏原始内容: {favorites_raw}")
|
||||
self.to_screen(f"调试: 收藏原始内容: {original_data}")
|
||||
|
||||
# 如果找到收藏次数,则进行解析和单位转换
|
||||
if favorites_raw:
|
||||
# 清理空格、换行,并解析数字和单位
|
||||
favorites_cleaned = favorites_raw.strip().replace(',', '')
|
||||
favorites_count = self._convert_to_number(favorites_cleaned)
|
||||
original_data['favorites_count'] = favorites_count
|
||||
else:
|
||||
original_data['favorites_count'] = 0
|
||||
|
||||
return original_data
|
||||
|
||||
def _convert_to_number(self, value):
|
||||
"""
|
||||
将字符串解析为实际数字,支持 K(千)和 M(百万)等单位
|
||||
"""
|
||||
match = re.match(r'^([\d.]+)([KkMm]?)$', value)
|
||||
if not match:
|
||||
return None
|
||||
number = float(match.group(1))
|
||||
unit = match.group(2).upper()
|
||||
|
||||
if unit == 'K': # 千
|
||||
return int(number * 1000)
|
||||
elif unit == 'M': # 百万
|
||||
return int(number * 1000000)
|
||||
return int(number) # 无单位,直接返回数字
|
||||
Reference in New Issue
Block a user