resources/pornhub/cmd.py

import subprocess
import json
from datetime import datetime, timedelta

# 定义一个函数，使用 yt-dlp 提取视频元数据
def fetch_video_metadata(url):
    try:
        # 使用 yt-dlp 提取元数据，不下载视频
        result = subprocess.run(
            ["yt-dlp", "--skip-download", "--dump-json", url],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )
        # 解析 JSON 数据
        video_info = json.loads(result.stdout)
        return video_info
    except Exception as e:
        print(f"提取元数据失败: {e}")
        return None

# 筛选最近一年的视频
def is_recent_video(upload_date):
    try:
        # 上传日期格式为 YYYYMMDD
        video_date = datetime.strptime(upload_date, "%Y%m%d")
        one_year_ago = datetime.now() - timedelta(days=365)
        return video_date >= one_year_ago
    except ValueError:
        return False

# 主函数：爬取一个列表页面中的视频信息
def fetch_recent_videos_from_pornhub(url):
    try:
        # 获取 Pornhub 列表页面的视频元数据
        result = subprocess.run(
            ["yt-dlp", "--flat-playlist", "--dump-json", url],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            text=True,
        )
        # 逐行解析 JSON 数据
        videos = [json.loads(line) for line in result.stdout.strip().split("\n")]

        # 存储符合条件的视频信息
        recent_videos = []
        for video in videos:
            video_url = video.get("url")
            print(f"正在提取视频: {video_url}")
            metadata = fetch_video_metadata(video_url)

            if metadata:
                upload_date = metadata.get("upload_date")
                if upload_date and is_recent_video(upload_date):
                    # 解析指标
                    title = metadata.get("title", "未知标题")
                    like_count = metadata.get("like_count", 0)
                    dislike_count = metadata.get("dislike_count", 0)
                    view_count = metadata.get("view_count", 0)
                    comment_count = metadata.get("comment_count", 0)

                    video_data = {
                        "title": title,
                        "url": video_url,
                        "upload_date": upload_date,
                        "likes": like_count,
                        "dislikes": dislike_count,
                        "views": view_count,
                        "comments": comment_count,
                    }
                    recent_videos.append(video_data)

                    # 输出视频信息
                    print(f"标题: {title}")
                    print(f"上传日期: {upload_date}")
                    print(f"喜欢: {like_count}, 不喜欢: {dislike_count}, 查看: {view_count}, 评论: {comment_count}")
                    print("-" * 50)

        return recent_videos
    except Exception as e:
        print(f"爬取失败: {e}")
        return []

# Pornhub 示例页面（按热度或时间排序的页面）
playlist_url = "https://www.pornhub.com/video?o=mr"  # 按时间排序
videos = fetch_recent_videos_from_pornhub(playlist_url)

# 保存结果到文件
with open("recent_videos.json", "w", encoding="utf-8") as f:
    json.dump(videos, f, ensure_ascii=False, indent=4)

print("已完成爬取，结果保存在 recent_videos.json 中！")