76 lines
2.8 KiB
Python
76 lines
2.8 KiB
Python
'''
|
||
不知道为什么,这样写不起作用。修改了源代码
|
||
|
||
# 查看yt_dlp的安装路径
|
||
python3 -c "import yt_dlp; print(yt_dlp.__file__)"
|
||
|
||
进入到 extractor/pornhub.py 文件,找到 _real_extract 函数
|
||
在return语句之前增加代码:
|
||
|
||
|
||
# 提取收藏次数(favoritesCounter 的内容)
|
||
favorites_raw = self._search_regex(
|
||
r'<span class="favoritesCounter">\s*([\dKkMm,. ]+)\s*</span>',
|
||
webpage, 'favorites count', fatal=False)
|
||
|
||
# 如果找到收藏次数,则进行解析和单位转换
|
||
favorites_count = '0'
|
||
if favorites_raw:
|
||
# 清理空格、换行,并解析数字和单位
|
||
favorites_count = favorites_raw.strip().replace(',', '')
|
||
|
||
并在return中增加
|
||
'favorite_count': favorites_count,
|
||
'''
|
||
|
||
from yt_dlp.extractor.pornhub import PornHubIE
|
||
import re
|
||
|
||
# 不起作用,还是修改了源码
|
||
class CustomPornHubIE(PornHubIE):
|
||
def _real_extract(self, url):
|
||
# 打印当前解析的 URL
|
||
self.to_screen(f"调试: 处理的 URL 是: {url}")
|
||
|
||
# 调用父类的提取逻辑
|
||
original_data = super()._real_extract(url)
|
||
|
||
# 下载网页内容
|
||
webpage = self._download_webpage(url, url)
|
||
self.to_screen(f"调试: 收藏原始内容: {webpage}")
|
||
|
||
# 提取收藏次数(favoritesCounter 的内容)
|
||
favorites_raw = self._search_regex(
|
||
r'<span class="favoritesCounter">\s*([\dKkMm,. ]+)\s*</span>',
|
||
webpage, 'favorites count', fatal=False)
|
||
|
||
# 调试:打印收藏原始内容
|
||
self.to_screen(f"调试: 收藏原始内容: {favorites_raw}")
|
||
self.to_screen(f"调试: 收藏原始内容: {original_data}")
|
||
|
||
# 如果找到收藏次数,则进行解析和单位转换
|
||
if favorites_raw:
|
||
# 清理空格、换行,并解析数字和单位
|
||
favorites_cleaned = favorites_raw.strip().replace(',', '')
|
||
favorites_count = self._convert_to_number(favorites_cleaned)
|
||
original_data['favorites_count'] = favorites_count
|
||
else:
|
||
original_data['favorites_count'] = 0
|
||
|
||
return original_data
|
||
|
||
def _convert_to_number(self, value):
|
||
"""
|
||
将字符串解析为实际数字,支持 K(千)和 M(百万)等单位
|
||
"""
|
||
match = re.match(r'^([\d.]+)([KkMm]?)$', value)
|
||
if not match:
|
||
return None
|
||
number = float(match.group(1))
|
||
unit = match.group(2).upper()
|
||
|
||
if unit == 'K': # 千
|
||
return int(number * 1000)
|
||
elif unit == 'M': # 百万
|
||
return int(number * 1000000)
|
||
return int(number) # 无单位,直接返回数字 |