modify scripts

This commit is contained in:
2025-11-10 15:22:45 +08:00
parent 28124c1bff
commit bd6e1b6ed8
4 changed files with 192 additions and 2 deletions

20
requirements.txt Normal file
View File

@ -0,0 +1,20 @@
# 公共包,一般情况下都有了
requests
beautifulsoup4
lxml
pymysql
cloudscraper
scrapy
sqlalchemy>=2.0.0
alembic>=1.12.0
sqlacodegen>=3.0.0
# nas 环境下直接pip安装 scrapy 会报错cryptography 与系统的有冲突。所以必须要在 虚拟环境下安装和运行
# apt update && apt install -y python3.12-venv # 安装 python3.12-venv ,版本对应于 python3 --version
# python3 -m venv ~/sharedata/pyenv # 创建虚拟环境
# source ~/sharedata/pyenv/bin/activate # 激活虚拟环境
# pip install -r requirements.txt # 安装所需要的包
# 每次运行前,都要先激活虚拟环境
# source ~/sharedata/pyenv/bin/activate
# export DB_ENV=nas && scrapy crawl u3c3 -a begin='2025-11-09' -a end='2025-11-10' -s STATS_PUSH_MSG=False

View File

@ -0,0 +1,70 @@
"""Auto update from resources
Revision ID: 0b2c66f54410
Revises: 758b3971a51e
Create Date: 2025-11-10 15:21:58.323573
"""
from typing import Sequence, Union
from alembic import op
import sqlalchemy as sa
# revision identifiers, used by Alembic.
revision: str = '0b2c66f54410'
down_revision: Union[str, Sequence[str], None] = '758b3971a51e'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
"""Upgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('clm_keywords',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
sa.Column('words', sa.String(length=512), nullable=True, comment='关键词(唯一)'),
sa.Column('groups', sa.Text(), nullable=True, comment='关键词分组'),
sa.Column('tags', sa.Text(), nullable=True, comment='标签'),
sa.Column('index_count', sa.Integer(), nullable=True, comment='关联索引数量'),
sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间(本地时间)'),
sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间(本地时间)'),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('words')
)
op.create_table('sis',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
sa.Column('plate_name', sa.Text(), nullable=True, comment='板块名称'),
sa.Column('title', sa.Text(), nullable=True, comment='标题'),
sa.Column('url', sa.String(length=512), nullable=True, comment='资源链接(唯一)'),
sa.Column('size_text', sa.Text(), nullable=True, comment='大小文本描述'),
sa.Column('size_gb', sa.Float(), nullable=True, comment='大小GB'),
sa.Column('update_date', sa.Text(), nullable=True, comment='更新日期'),
sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间(本地时间)'),
sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间(本地时间)'),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('url')
)
op.create_table('clm_keywords_index',
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
sa.Column('words_id', sa.Integer(), nullable=True, comment='关键词ID外键'),
sa.Column('index_id', sa.Integer(), nullable=True, comment='索引ID外键'),
sa.Column('wid_iid', sa.String(length=255), nullable=True, comment='关键词与索引的关联标识'),
sa.Column('tags', sa.Text(), nullable=True, comment='关联标签'),
sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间(本地时间)'),
sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间(本地时间)'),
sa.ForeignKeyConstraint(['index_id'], ['clm_index.id'], ),
sa.ForeignKeyConstraint(['words_id'], ['clm_keywords.id'], ),
sa.PrimaryKeyConstraint('id'),
sa.UniqueConstraint('wid_iid')
)
# ### end Alembic commands ###
def downgrade() -> None:
"""Downgrade schema."""
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('clm_keywords_index')
op.drop_table('sis')
op.drop_table('clm_keywords')
# ### end Alembic commands ###

View File

@ -1,5 +1,7 @@
from sqlalchemy import Column, Integer, Text, String, Float, DateTime, func
from sqlalchemy import Column, Integer, Text, String, Float, DateTime, ForeignKey, func
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import relationship, Mapped
from typing import List, Optional
# 基础模型基类(如果已有全局 Base 可直接复用)
ResourceBase = declarative_base()
@ -27,3 +29,94 @@ class U3C3(ResourceBase):
def __repr__(self):
"""打印实例时显示的信息"""
return f"<U3c3(id={self.id}, title='{self.title[:20]}...')>"
class Sis(ResourceBase):
"""对应 MySQL 中的 sis 表"""
__tablename__ = "sis" # 表名与原表保持一致
# 字段定义(严格映射原表结构)
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
plate_name = Column(Text, comment="板块名称")
title = Column(Text, comment="标题")
url = Column(String(512), unique=True, comment="资源链接(唯一)")
size_text = Column(Text, comment="大小文本描述")
size_gb = Column(Float, comment="大小GB")
update_date = Column(Text, comment="更新日期")
# 补充MySQL 中建议用 func.now() 替代 func.datetime(...),兼容性更好
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
def __repr__(self):
return f"<Sis(id={self.id}, title='{self.title[:20]}...')>"
class ClmIndex(ResourceBase):
"""对应 MySQL 中的 clm_index 表"""
__tablename__ = "clm_index" # 表名与原表保持一致
# 字段定义(严格映射原表结构)
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
category = Column(Text, comment="分类")
title = Column(Text, comment="标题")
href = Column(String(512), unique=True, comment="资源链接(唯一)")
magnet_href = Column(Text, comment="磁力链接")
size_text = Column(Text, comment="大小文本描述")
size_gb = Column(Float, comment="大小GB")
heat = Column(Integer, default=0, comment="热度")
add_date = Column(Text, comment="添加日期")
last_down_date = Column(Text, comment="最后下载日期")
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
# 关系定义:用 Mapped[List["ClmKeywordsIndex"]] 替代 List["ClmKeywordsIndex"]
clm_keywords_index: Mapped[List["ClmKeywordsIndex"]] = relationship(
"ClmKeywordsIndex", back_populates="index"
)
def __repr__(self):
return f"<ClmIndex(id={self.id}, title='{self.title[:20]}...')>"
class ClmKeywords(ResourceBase):
"""对应 MySQL 中的 clm_keywords 表"""
__tablename__ = "clm_keywords" # 表名与原表保持一致
# 字段定义(严格映射原表结构)
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
words = Column(String(512), unique=True, comment="关键词(唯一)")
groups = Column(Text, comment="关键词分组")
tags = Column(Text, comment="标签")
index_count = Column(Integer, default=0, comment="关联索引数量")
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
# 关系定义:用 Mapped[List["ClmKeywordsIndex"]] 替代 List["ClmKeywordsIndex"]
clm_keywords_index: Mapped[List["ClmKeywordsIndex"]] = relationship(
"ClmKeywordsIndex", back_populates="words"
)
def __repr__(self):
return f"<ClmKeywords(id={self.id}, words='{self.words[:20]}...')>"
class ClmKeywordsIndex(ResourceBase):
"""对应 MySQL 中的 clm_keywords_index 表(关联表)"""
__tablename__ = "clm_keywords_index" # 表名与原表保持一致
# 字段定义(严格映射原表结构)
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
words_id = Column(Integer, ForeignKey("clm_keywords.id"), comment="关键词ID外键")
index_id = Column(Integer, ForeignKey("clm_index.id"), comment="索引ID外键")
wid_iid = Column(String(255), unique=True, comment="关键词与索引的关联标识")
tags = Column(Text, comment="关联标签")
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
# 关系定义:用 Mapped 包装单个对象类型
index: Mapped["ClmIndex"] = relationship("ClmIndex", back_populates="clm_keywords_index")
words: Mapped["ClmKeywords"] = relationship("ClmKeywords", back_populates="clm_keywords_index")
def __repr__(self):
return f"<ClmKeywordsIndex(id={self.id}, words_id={self.words_id}, index_id={self.index_id})>"

View File

@ -0,0 +1,7 @@
#!/bin/bash
cd $(dirname $0)/..
alembic -c migrations/resources/alembic.ini revision --autogenerate -m "Auto update from resources"
alembic -c migrations/resources/alembic.ini upgrade head
echo "数据库 scrapy 同步完成"