stock/stockapp/stat_growth_rate.py

"""
Script Name:
Description: 统计hs300的成分股，在区间内的涨幅。取前复权值

Author: [Your Name]
Created Date: YYYY-MM-DD
Last Modified: YYYY-MM-DD
Version: 1.0

Modification History:
    - YYYY-MM-DD [Your Name]:
    - YYYY-MM-DD [Your Name]:
    - YYYY-MM-DD [Your Name]:
"""

import pymysql
import pandas as pd
import time
from datetime import datetime
import logging
import config

# 设置日志
config.setup_logging()
logger = logging.getLogger()

# 数据库连接函数
def connect_to_db():
    return pymysql.connect(**config.db_config)

# 获取 2024-09-23 对应的 close 值
def get_close_for_date(df, date):
    filtered = df[df['time_key'] == date]
    if not filtered.empty:
        return filtered.iloc[0]['close']
    else:
        logger.warning(f"No data found for date: {date}")
        return None

# 获取年内涨幅的 c1, c3 值（最早和最晚的 close 值）
def get_first_last_close(df):
    df_sorted = df.sort_values(by='time_key')
    c1 = df_sorted.iloc[0]['close']  # 最早的 close 值
    c3 = df_sorted.iloc[-1]['close']  # 最晚的 close 值
    return c1, c3

# 获取最大值和最小值的 close 值
def get_max_min_close(df):
    max_close = df['close'].max()
    min_close = df['close'].min()
    return max_close, min_close

# 主函数
def main():
    try:
        connection = connect_to_db()
        query = """
            SELECT code, name, time_key, close
            FROM hs300_qfq_his
            WHERE time_key >= '2021-01-01 00:00:00'
        """
        df = pd.read_sql(query, connection)

        # 确定要查询的日期
        target_date = '2024-09-23 00:00:00'
        df['time_key'] = pd.to_datetime(df['time_key'])

        results = []

        for code, group in df.groupby('code'):
            logger.info(f"Processing code: {code}")

            # 获取 c1（最早的 close）和 c3（最晚的 close）
            c1, c3 = get_first_last_close(group)

            # 获取 c2（2024-09-23 的 close 值）
            c2 = get_close_for_date(group, target_date)

            if c1 is None or c2 is None or c3 is None:
                logger.warning(f"Skipping code {code} due to missing close values.")
                continue

            # 计算年内涨幅和自2024-09-23以来的涨幅
            year_growth_rate = (c3 / c1 - 1) if c1 else None
            growth_since_2024_09_23 = (c3 / c2 - 1) if c2 else None

            # 获取年内的最大和最小 close 值
            c4, c5 = get_max_min_close(group)
            year_volatility = (c4 / c5 - 1) if c4 and c5 else None

            results.append({
                'code': code,
                'name': group['name'].iloc[0],
                'year_growth_rate': year_growth_rate,
                'growth_since_2024_09_23': growth_since_2024_09_23,
                'year_volatility': year_volatility
            })

            time.sleep(1)

        # 将结果转换为 DataFrame 并显示
        result_df = pd.DataFrame(results)
        print(result_df)

        # 你可以选择将结果保存到 CSV 文件中
        result_df.to_csv('./result/stat_grouth_rate_since2021.csv', index=False)

    except Exception as e:
        logger.error(f"Error occurred: {e}")
    finally:
        if connection:
            connection.close()

if __name__ == "__main__":
    main()