modify scripts
This commit is contained in:
20
requirements.txt
Normal file
20
requirements.txt
Normal file
@ -0,0 +1,20 @@
|
||||
# 公共包,一般情况下都有了
|
||||
requests
|
||||
beautifulsoup4
|
||||
lxml
|
||||
pymysql
|
||||
cloudscraper
|
||||
scrapy
|
||||
sqlalchemy>=2.0.0
|
||||
alembic>=1.12.0
|
||||
sqlacodegen>=3.0.0
|
||||
|
||||
# nas 环境下,直接pip安装 scrapy 会报错,cryptography 与系统的有冲突。所以必须要在 虚拟环境下安装和运行
|
||||
# apt update && apt install -y python3.12-venv # 安装 python3.12-venv ,版本对应于 python3 --version
|
||||
# python3 -m venv ~/sharedata/pyenv # 创建虚拟环境
|
||||
# source ~/sharedata/pyenv/bin/activate # 激活虚拟环境
|
||||
# pip install -r requirements.txt # 安装所需要的包
|
||||
|
||||
# 每次运行前,都要先激活虚拟环境
|
||||
# source ~/sharedata/pyenv/bin/activate
|
||||
# export DB_ENV=nas && scrapy crawl u3c3 -a begin='2025-11-09' -a end='2025-11-10' -s STATS_PUSH_MSG=False
|
||||
@ -0,0 +1,70 @@
|
||||
"""Auto update from resources
|
||||
|
||||
Revision ID: 0b2c66f54410
|
||||
Revises: 758b3971a51e
|
||||
Create Date: 2025-11-10 15:21:58.323573
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = '0b2c66f54410'
|
||||
down_revision: Union[str, Sequence[str], None] = '758b3971a51e'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.create_table('clm_keywords',
|
||||
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
|
||||
sa.Column('words', sa.String(length=512), nullable=True, comment='关键词(唯一)'),
|
||||
sa.Column('groups', sa.Text(), nullable=True, comment='关键词分组'),
|
||||
sa.Column('tags', sa.Text(), nullable=True, comment='标签'),
|
||||
sa.Column('index_count', sa.Integer(), nullable=True, comment='关联索引数量'),
|
||||
sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间(本地时间)'),
|
||||
sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间(本地时间)'),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('words')
|
||||
)
|
||||
op.create_table('sis',
|
||||
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
|
||||
sa.Column('plate_name', sa.Text(), nullable=True, comment='板块名称'),
|
||||
sa.Column('title', sa.Text(), nullable=True, comment='标题'),
|
||||
sa.Column('url', sa.String(length=512), nullable=True, comment='资源链接(唯一)'),
|
||||
sa.Column('size_text', sa.Text(), nullable=True, comment='大小文本描述'),
|
||||
sa.Column('size_gb', sa.Float(), nullable=True, comment='大小(GB)'),
|
||||
sa.Column('update_date', sa.Text(), nullable=True, comment='更新日期'),
|
||||
sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间(本地时间)'),
|
||||
sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间(本地时间)'),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('url')
|
||||
)
|
||||
op.create_table('clm_keywords_index',
|
||||
sa.Column('id', sa.Integer(), autoincrement=True, nullable=False, comment='主键ID'),
|
||||
sa.Column('words_id', sa.Integer(), nullable=True, comment='关键词ID(外键)'),
|
||||
sa.Column('index_id', sa.Integer(), nullable=True, comment='索引ID(外键)'),
|
||||
sa.Column('wid_iid', sa.String(length=255), nullable=True, comment='关键词与索引的关联标识'),
|
||||
sa.Column('tags', sa.Text(), nullable=True, comment='关联标签'),
|
||||
sa.Column('created_at', sa.DateTime(), nullable=True, comment='创建时间(本地时间)'),
|
||||
sa.Column('updated_at', sa.DateTime(), nullable=True, comment='更新时间(本地时间)'),
|
||||
sa.ForeignKeyConstraint(['index_id'], ['clm_index.id'], ),
|
||||
sa.ForeignKeyConstraint(['words_id'], ['clm_keywords.id'], ),
|
||||
sa.PrimaryKeyConstraint('id'),
|
||||
sa.UniqueConstraint('wid_iid')
|
||||
)
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_table('clm_keywords_index')
|
||||
op.drop_table('sis')
|
||||
op.drop_table('clm_keywords')
|
||||
# ### end Alembic commands ###
|
||||
@ -1,5 +1,7 @@
|
||||
from sqlalchemy import Column, Integer, Text, String, Float, DateTime, func
|
||||
from sqlalchemy import Column, Integer, Text, String, Float, DateTime, ForeignKey, func
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from sqlalchemy.orm import relationship, Mapped
|
||||
from typing import List, Optional
|
||||
|
||||
# 基础模型基类(如果已有全局 Base 可直接复用)
|
||||
ResourceBase = declarative_base()
|
||||
@ -26,4 +28,95 @@ class U3C3(ResourceBase):
|
||||
|
||||
def __repr__(self):
|
||||
"""打印实例时显示的信息"""
|
||||
return f"<U3c3(id={self.id}, title='{self.title[:20]}...')>"
|
||||
return f"<U3c3(id={self.id}, title='{self.title[:20]}...')>"
|
||||
|
||||
class Sis(ResourceBase):
|
||||
"""对应 MySQL 中的 sis 表"""
|
||||
__tablename__ = "sis" # 表名与原表保持一致
|
||||
|
||||
# 字段定义(严格映射原表结构)
|
||||
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
|
||||
plate_name = Column(Text, comment="板块名称")
|
||||
title = Column(Text, comment="标题")
|
||||
url = Column(String(512), unique=True, comment="资源链接(唯一)")
|
||||
size_text = Column(Text, comment="大小文本描述")
|
||||
size_gb = Column(Float, comment="大小(GB)")
|
||||
update_date = Column(Text, comment="更新日期")
|
||||
# 补充:MySQL 中建议用 func.now() 替代 func.datetime(...),兼容性更好
|
||||
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
|
||||
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<Sis(id={self.id}, title='{self.title[:20]}...')>"
|
||||
|
||||
|
||||
class ClmIndex(ResourceBase):
|
||||
"""对应 MySQL 中的 clm_index 表"""
|
||||
__tablename__ = "clm_index" # 表名与原表保持一致
|
||||
|
||||
# 字段定义(严格映射原表结构)
|
||||
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
|
||||
category = Column(Text, comment="分类")
|
||||
title = Column(Text, comment="标题")
|
||||
href = Column(String(512), unique=True, comment="资源链接(唯一)")
|
||||
magnet_href = Column(Text, comment="磁力链接")
|
||||
size_text = Column(Text, comment="大小文本描述")
|
||||
size_gb = Column(Float, comment="大小(GB)")
|
||||
heat = Column(Integer, default=0, comment="热度")
|
||||
add_date = Column(Text, comment="添加日期")
|
||||
last_down_date = Column(Text, comment="最后下载日期")
|
||||
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
|
||||
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
|
||||
|
||||
# 关系定义:用 Mapped[List["ClmKeywordsIndex"]] 替代 List["ClmKeywordsIndex"]
|
||||
clm_keywords_index: Mapped[List["ClmKeywordsIndex"]] = relationship(
|
||||
"ClmKeywordsIndex", back_populates="index"
|
||||
)
|
||||
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ClmIndex(id={self.id}, title='{self.title[:20]}...')>"
|
||||
|
||||
|
||||
class ClmKeywords(ResourceBase):
|
||||
"""对应 MySQL 中的 clm_keywords 表"""
|
||||
__tablename__ = "clm_keywords" # 表名与原表保持一致
|
||||
|
||||
# 字段定义(严格映射原表结构)
|
||||
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
|
||||
words = Column(String(512), unique=True, comment="关键词(唯一)")
|
||||
groups = Column(Text, comment="关键词分组")
|
||||
tags = Column(Text, comment="标签")
|
||||
index_count = Column(Integer, default=0, comment="关联索引数量")
|
||||
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
|
||||
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
|
||||
|
||||
# 关系定义:用 Mapped[List["ClmKeywordsIndex"]] 替代 List["ClmKeywordsIndex"]
|
||||
clm_keywords_index: Mapped[List["ClmKeywordsIndex"]] = relationship(
|
||||
"ClmKeywordsIndex", back_populates="words"
|
||||
)
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ClmKeywords(id={self.id}, words='{self.words[:20]}...')>"
|
||||
|
||||
|
||||
|
||||
class ClmKeywordsIndex(ResourceBase):
|
||||
"""对应 MySQL 中的 clm_keywords_index 表(关联表)"""
|
||||
__tablename__ = "clm_keywords_index" # 表名与原表保持一致
|
||||
|
||||
# 字段定义(严格映射原表结构)
|
||||
id = Column(Integer, primary_key=True, autoincrement=True, comment="主键ID")
|
||||
words_id = Column(Integer, ForeignKey("clm_keywords.id"), comment="关键词ID(外键)")
|
||||
index_id = Column(Integer, ForeignKey("clm_index.id"), comment="索引ID(外键)")
|
||||
wid_iid = Column(String(255), unique=True, comment="关键词与索引的关联标识")
|
||||
tags = Column(Text, comment="关联标签")
|
||||
created_at = Column(DateTime, default=func.now(), comment="创建时间(本地时间)")
|
||||
updated_at = Column(DateTime, default=func.now(), onupdate=func.now(), comment="更新时间(本地时间)")
|
||||
|
||||
# 关系定义:用 Mapped 包装单个对象类型
|
||||
index: Mapped["ClmIndex"] = relationship("ClmIndex", back_populates="clm_keywords_index")
|
||||
words: Mapped["ClmKeywords"] = relationship("ClmKeywords", back_populates="clm_keywords_index")
|
||||
|
||||
def __repr__(self):
|
||||
return f"<ClmKeywordsIndex(id={self.id}, words_id={self.words_id}, index_id={self.index_id})>"
|
||||
|
||||
7
scrapy_proj/my_sqlalchemy/scripts/sync_resources.sh
Executable file
7
scrapy_proj/my_sqlalchemy/scripts/sync_resources.sh
Executable file
@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
cd $(dirname $0)/..
|
||||
|
||||
alembic -c migrations/resources/alembic.ini revision --autogenerate -m "Auto update from resources"
|
||||
alembic -c migrations/resources/alembic.ini upgrade head
|
||||
|
||||
echo "数据库 scrapy 同步完成"
|
||||
Reference in New Issue
Block a user