From bd6e1b6ed8dcb504880d9a3c9b6cac11306daaf0 Mon Sep 17 00:00:00 2001
From: sophon <oscar@easyprompt8.com>
Date: Mon, 10 Nov 2025 15:22:45 +0800
Subject: [PATCH] modify scripts

---
 requirements.txt                              | 20 ++++
 ...0b2c66f54410_auto_update_from_resources.py | 70 +++++++++++++
 scrapy_proj/my_sqlalchemy/models/resources.py | 97 ++++++++++++++++++-
 .../my_sqlalchemy/scripts/sync_resources.sh   |  7 ++
 4 files changed, 192 insertions(+), 2 deletions(-)
 create mode 100644 requirements.txt
 create mode 100644 scrapy_proj/my_sqlalchemy/migrations/resources/versions/0b2c66f54410_auto_update_from_resources.py
 create mode 100755 scrapy_proj/my_sqlalchemy/scripts/sync_resources.sh

diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..e6f5809
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,20 @@
+# 公共包，一般情况下都有了
+requests
+beautifulsoup4
+lxml
+pymysql
+cloudscraper
+scrapy
+sqlalchemy>=2.0.0
+alembic>=1.12.0
+sqlacodegen>=3.0.0
+
+# nas 环境下，直接pip安装 scrapy 会报错，cryptography 与系统的有冲突。所以必须要在 虚拟环境下安装和运行
+# apt update && apt install -y python3.12-venv  # 安装 python3.12-venv ，版本对应于 python3 --version
+# python3 -m venv ~/sharedata/pyenv # 创建虚拟环境
+# source ~/sharedata/pyenv/bin/activate # 激活虚拟环境
+# pip install -r requirements.txt # 安装所需要的包
+
+# 每次运行前，都要先激活虚拟环境
+# source ~/sharedata/pyenv/bin/activate
+# export DB_ENV=nas && scrapy crawl u3c3 -a begin='2025-11-09' -a end='2025-11-10' -s STATS_PUSH_MSG=False
diff --git a/scrapy_proj/my_sqlalchemy/migrations/resources/versions/0b2c66f54410_auto_update_from_resources.py b/scrapy_proj/my_sqlalchemy/migrations/resources/versions/0b2c66f54410_auto_update_from_resources.py
new file mode 100644
index 0000000..5f8f2fd
--- /dev/null
+++ b/scrapy_proj/my_sqlalchemy/migrations/resources/versions/0b2c66f54410_auto_update_from_resources.py
@@ -0,0 +1,70 @@
+"""Auto update from resources
+
+Revision ID: 0b2c66f54410
+Revises: 758b3971a51e
+Create Date: 2025-11-10 15:21:58.323573
+
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision: str = '0b2c66f54410'
+down_revision: Union[str, Sequence[str], None] = '758b3971a51e'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    """Upgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.create_table('clm_keywords',
+    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
+    sa.Column('words', sa.String(length=512), nullable=True, comment='关键词（唯一）'),
+    sa.Column('groups', sa.Text(), nullable=True, comment='关键词分组'),
+    sa.Column('tags', sa.Text(), nullable=True, comment='标签'),
+    sa.Column('index_count', sa.Integer(), nullable=True, comment='关联索引数量'),
+    sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间（本地时间）'),
+    sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间（本地时间）'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('words')
+    )
+    op.create_table('sis',
+    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
+    sa.Column('plate_name', sa.Text(), nullable=True, comment='板块名称'),
+    sa.Column('title', sa.Text(), nullable=True, comment='标题'),
+    sa.Column('url', sa.String(length=512), nullable=True, comment='资源链接（唯一）'),
+    sa.Column('size_text', sa.Text(), nullable=True, comment='大小文本描述'),
+    sa.Column('size_gb', sa.Float(), nullable=True, comment='大小（GB）'),
+    sa.Column('update_date', sa.Text(), nullable=True, comment='更新日期'),
+    sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间（本地时间）'),
+    sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间（本地时间）'),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('url')
+    )
+    op.create_table('clm_keywords_index',
+    sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
+    sa.Column('words_id', sa.Integer(), nullable=True, comment='关键词ID（外键）'),
+    sa.Column('index_id', sa.Integer(), nullable=True, comment='索引ID（外键）'),
+    sa.Column('wid_iid', sa.String(length=255), nullable=True, comment='关键词与索引的关联标识'),
+    sa.Column('tags', sa.Text(), nullable=True, comment='关联标签'),
+    sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间（本地时间）'),
+    sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间（本地时间）'),
+    sa.ForeignKeyConstraint(['index_id'], ['clm_index.id'], ),
+    sa.ForeignKeyConstraint(['words_id'], ['clm_keywords.id'], ),
+    sa.PrimaryKeyConstraint('id'),
+    sa.UniqueConstraint('wid_iid')
+    )
+    # ### end Alembic commands ###
+
+
+def downgrade() -> None:
+    """Downgrade schema."""
+    # ### commands auto generated by Alembic - please adjust! ###
+    op.drop_table('clm_keywords_index')
+    op.drop_table('sis')
+    op.drop_table('clm_keywords')
+    # ### end Alembic commands ###
diff --git a/scrapy_proj/my_sqlalchemy/models/resources.py b/scrapy_proj/my_sqlalchemy/models/resources.py
index eec6e4e..b336aa1 100644
--- a/scrapy_proj/my_sqlalchemy/models/resources.py
+++ b/scrapy_proj/my_sqlalchemy/models/resources.py
@@ -1,5 +1,7 @@
-from sqlalchemy import Column, Integer, Text, String, Float, DateTime, func
+from sqlalchemy import Column, Integer, Text, String, Float, DateTime, ForeignKey, func
 from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import relationship, Mapped
+from typing import List, Optional
 
 # 基础模型基类（如果已有全局 Base 可直接复用）
 ResourceBase = declarative_base()
@@ -26,4 +28,95 @@ class U3C3(ResourceBase):
 
     def __repr__(self):
         """打印实例时显示的信息"""
-        return f"<U3c3(id={self.id}, title='{self.title[:20]}...')>"
\ No newline at end of file
+        return f"<U3c3(id={self.id}, title='{self.title[:20]}...')>"
+
+class Sis(ResourceBase):
+    """对应 MySQL 中的 sis 表"""
+    __tablename__ = "sis"  # 表名与原表保持一致
+    
+    # 字段定义（严格映射原表结构）
+    id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
+    plate_name = Column(Text, comment="板块名称")
+    title = Column(Text, comment="标题")
+    url = Column(String(512), unique=True, comment="资源链接（唯一）")
+    size_text = Column(Text, comment="大小文本描述")
+    size_gb = Column(Float, comment="大小（GB）")
+    update_date = Column(Text, comment="更新日期")
+    # 补充：MySQL 中建议用 func.now() 替代 func.datetime(...)，兼容性更好
+    created_at = Column(DateTime, default=func.now(), comment="创建时间（本地时间）")
+    updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间（本地时间）")
+
+    def __repr__(self):
+        return f"<Sis(id={self.id}, title='{self.title[:20]}...')>"
+
+
+class ClmIndex(ResourceBase):
+    """对应 MySQL 中的 clm_index 表"""
+    __tablename__ = "clm_index"  # 表名与原表保持一致
+    
+    # 字段定义（严格映射原表结构）
+    id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
+    category = Column(Text, comment="分类")
+    title = Column(Text, comment="标题")
+    href = Column(String(512), unique=True, comment="资源链接（唯一）")
+    magnet_href = Column(Text, comment="磁力链接")
+    size_text = Column(Text, comment="大小文本描述")
+    size_gb = Column(Float, comment="大小（GB）")
+    heat = Column(Integer, default=0, comment="热度")
+    add_date = Column(Text, comment="添加日期")
+    last_down_date = Column(Text, comment="最后下载日期")
+    created_at = Column(DateTime, default=func.now(), comment="创建时间（本地时间）")
+    updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间（本地时间）")
+
+    # 关系定义：用 Mapped[List["ClmKeywordsIndex"]] 替代 List["ClmKeywordsIndex"]
+    clm_keywords_index: Mapped[List["ClmKeywordsIndex"]] = relationship(
+        "ClmKeywordsIndex", back_populates="index"
+    )
+
+
+    def __repr__(self):
+        return f"<ClmIndex(id={self.id}, title='{self.title[:20]}...')>"
+
+
+class ClmKeywords(ResourceBase):
+    """对应 MySQL 中的 clm_keywords 表"""
+    __tablename__ = "clm_keywords"  # 表名与原表保持一致
+    
+    # 字段定义（严格映射原表结构）
+    id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
+    words = Column(String(512), unique=True, comment="关键词（唯一）")
+    groups = Column(Text, comment="关键词分组")
+    tags = Column(Text, comment="标签")
+    index_count = Column(Integer, default=0, comment="关联索引数量")
+    created_at = Column(DateTime, default=func.now(), comment="创建时间（本地时间）")
+    updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间（本地时间）")
+
+    # 关系定义：用 Mapped[List["ClmKeywordsIndex"]] 替代 List["ClmKeywordsIndex"]
+    clm_keywords_index: Mapped[List["ClmKeywordsIndex"]] = relationship(
+        "ClmKeywordsIndex", back_populates="words"
+    )
+
+    def __repr__(self):
+        return f"<ClmKeywords(id={self.id}, words='{self.words[:20]}...')>"
+
+
+
+class ClmKeywordsIndex(ResourceBase):
+    """对应 MySQL 中的 clm_keywords_index 表（关联表）"""
+    __tablename__ = "clm_keywords_index"  # 表名与原表保持一致
+    
+    # 字段定义（严格映射原表结构）
+    id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
+    words_id = Column(Integer, ForeignKey("clm_keywords.id"), comment="关键词ID（外键）")
+    index_id = Column(Integer, ForeignKey("clm_index.id"), comment="索引ID（外键）")
+    wid_iid = Column(String(255), unique=True, comment="关键词与索引的关联标识")
+    tags = Column(Text, comment="关联标签")
+    created_at = Column(DateTime, default=func.now(), comment="创建时间（本地时间）")
+    updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间（本地时间）")
+
+    # 关系定义：用 Mapped 包装单个对象类型
+    index: Mapped["ClmIndex"] = relationship("ClmIndex", back_populates="clm_keywords_index")
+    words: Mapped["ClmKeywords"] = relationship("ClmKeywords", back_populates="clm_keywords_index")
+
+    def __repr__(self):
+        return f"<ClmKeywordsIndex(id={self.id}, words_id={self.words_id}, index_id={self.index_id})>"
diff --git a/scrapy_proj/my_sqlalchemy/scripts/sync_resources.sh b/scrapy_proj/my_sqlalchemy/scripts/sync_resources.sh
new file mode 100755
index 0000000..befd32a
--- /dev/null
+++ b/scrapy_proj/my_sqlalchemy/scripts/sync_resources.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+cd $(dirname $0)/..
+
+alembic -c migrations/resources/alembic.ini revision --autogenerate -m "Auto update from resources"
+alembic -c migrations/resources/alembic.ini upgrade head
+
+echo "数据库 scrapy 同步完成"