This repository has been archived on 2026-01-07. You can view files and clone it, but cannot push or open issues or pull requests.
Files
resources/pornhub/custom_pornhub.py
2025-03-17 11:30:35 +08:00

76 lines
2.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

'''
不知道为什么,这样写不起作用。修改了源代码
# 查看yt_dlp的安装路径
python3 -c "import yt_dlp; print(yt_dlp.__file__)"
进入到 extractor/pornhub.py 文件,找到 _real_extract 函数
在return语句之前增加代码
# 提取收藏次数favoritesCounter 的内容)
favorites_raw = self._search_regex(
r'<span class="favoritesCounter">\s*([\dKkMm,. ]+)\s*</span>',
webpage, 'favorites count', fatal=False)
# 如果找到收藏次数,则进行解析和单位转换
favorites_count = '0'
if favorites_raw:
# 清理空格、换行,并解析数字和单位
favorites_count = favorites_raw.strip().replace(',', '')
并在return中增加
'favorite_count': favorites_count,
'''
from yt_dlp.extractor.pornhub import PornHubIE
import re
# 不起作用,还是修改了源码
class CustomPornHubIE(PornHubIE):
def _real_extract(self, url):
# 打印当前解析的 URL
self.to_screen(f"调试: 处理的 URL 是: {url}")
# 调用父类的提取逻辑
original_data = super()._real_extract(url)
# 下载网页内容
webpage = self._download_webpage(url, url)
self.to_screen(f"调试: 收藏原始内容: {webpage}")
# 提取收藏次数favoritesCounter 的内容)
favorites_raw = self._search_regex(
r'<span class="favoritesCounter">\s*([\dKkMm,. ]+)\s*</span>',
webpage, 'favorites count', fatal=False)
# 调试:打印收藏原始内容
self.to_screen(f"调试: 收藏原始内容: {favorites_raw}")
self.to_screen(f"调试: 收藏原始内容: {original_data}")
# 如果找到收藏次数,则进行解析和单位转换
if favorites_raw:
# 清理空格、换行,并解析数字和单位
favorites_cleaned = favorites_raw.strip().replace(',', '')
favorites_count = self._convert_to_number(favorites_cleaned)
original_data['favorites_count'] = favorites_count
else:
original_data['favorites_count'] = 0
return original_data
def _convert_to_number(self, value):
"""
将字符串解析为实际数字,支持 K和 M百万等单位
"""
match = re.match(r'^([\d.]+)([KkMm]?)$', value)
if not match:
return None
number = float(match.group(1))
unit = match.group(2).upper()
if unit == 'K': # 千
return int(number * 1000)
elif unit == 'M': # 百万
return int(number * 1000000)
return int(number) # 无单位,直接返回数字