""" Script Name: Description: 统计hs300的成分股,在区间内的涨幅。取前复权值 Author: [Your Name] Created Date: YYYY-MM-DD Last Modified: YYYY-MM-DD Version: 1.0 Modification History: - YYYY-MM-DD [Your Name]: - YYYY-MM-DD [Your Name]: - YYYY-MM-DD [Your Name]: """ import pymysql import pandas as pd import time from datetime import datetime import logging import config # 设置日志 config.setup_logging() logger = logging.getLogger() # 数据库连接函数 def connect_to_db(): return pymysql.connect(**config.db_config) # 获取 2024-09-23 对应的 close 值 def get_close_for_date(df, date): filtered = df[df['time_key'] == date] if not filtered.empty: return filtered.iloc[0]['close'] else: logger.warning(f"No data found for date: {date}") return None # 获取年内涨幅的 c1, c3 值(最早和最晚的 close 值) def get_first_last_close(df): df_sorted = df.sort_values(by='time_key') c1 = df_sorted.iloc[0]['close'] # 最早的 close 值 c3 = df_sorted.iloc[-1]['close'] # 最晚的 close 值 return c1, c3 # 获取最大值和最小值的 close 值 def get_max_min_close(df): max_close = df['close'].max() min_close = df['close'].min() return max_close, min_close # 主函数 def main(): try: connection = connect_to_db() query = """ SELECT code, name, time_key, close FROM hs300_qfq_his WHERE time_key >= '2021-01-01 00:00:00' """ df = pd.read_sql(query, connection) # 确定要查询的日期 target_date = '2024-09-23 00:00:00' df['time_key'] = pd.to_datetime(df['time_key']) results = [] for code, group in df.groupby('code'): logger.info(f"Processing code: {code}") # 获取 c1(最早的 close)和 c3(最晚的 close) c1, c3 = get_first_last_close(group) # 获取 c2(2024-09-23 的 close 值) c2 = get_close_for_date(group, target_date) if c1 is None or c2 is None or c3 is None: logger.warning(f"Skipping code {code} due to missing close values.") continue # 计算年内涨幅和自2024-09-23以来的涨幅 year_growth_rate = (c3 / c1 - 1) if c1 else None growth_since_2024_09_23 = (c3 / c2 - 1) if c2 else None # 获取年内的最大和最小 close 值 c4, c5 = get_max_min_close(group) year_volatility = (c4 / c5 - 1) if c4 and c5 else None results.append({ 'code': code, 'name': group['name'].iloc[0], 'year_growth_rate': year_growth_rate, 'growth_since_2024_09_23': growth_since_2024_09_23, 'year_volatility': year_volatility }) time.sleep(1) # 将结果转换为 DataFrame 并显示 result_df = pd.DataFrame(results) print(result_df) # 你可以选择将结果保存到 CSV 文件中 result_df.to_csv('./result/stat_grouth_rate_since2021.csv', index=False) except Exception as e: logger.error(f"Error occurred: {e}") finally: if connection: connection.close() if __name__ == "__main__": main()