From ddaa568380917351526b42e3a174a9b2a100cc8f Mon Sep 17 00:00:00 2001 From: oscar Date: Sun, 2 Mar 2025 17:03:03 +0800 Subject: [PATCH] modify stockapp.em scripts. --- .gitignore | 20 ++++ gitignore | 28 +++++ stockapp/src/config.py | 5 +- stockapp/src/crawling/stock_hist_em.py | 151 ++++++++++++++++++++++++- stockapp/src/stat_growth_em.py | 51 ++++++--- 5 files changed, 240 insertions(+), 15 deletions(-) create mode 100644 gitignore diff --git a/.gitignore b/.gitignore index baf1067..a57550b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,25 @@ # 忽略 log 目录 log/ +scripts/aabook/log/ +scripts/aabook/local/ +scripts/aabook/data/ +scripts/u9a9/torrents/ +scripts/u9a9/log/ +scripts/javdb/log/ +scripts/javhd/result/tmp/ +scripts/javhd/log/ +scripts/iafd/data/tmp/ +scripts/iafd/result/tmp/ +scripts/iafd/result/bak/ +scripts/iafd/result/performers/ +scripts/iafd/log/ +scripts/thelordofporn/log/ +scripts/vixen_group/log/ +scripts/pornhub/log/ + +stockapp/data/ +stockapp/log/ +stockapp/result/ # 忽略 Python 编译文件 *.pyc diff --git a/gitignore b/gitignore new file mode 100644 index 0000000..a57550b --- /dev/null +++ b/gitignore @@ -0,0 +1,28 @@ +# 忽略 log 目录 +log/ +scripts/aabook/log/ +scripts/aabook/local/ +scripts/aabook/data/ +scripts/u9a9/torrents/ +scripts/u9a9/log/ +scripts/javdb/log/ +scripts/javhd/result/tmp/ +scripts/javhd/log/ +scripts/iafd/data/tmp/ +scripts/iafd/result/tmp/ +scripts/iafd/result/bak/ +scripts/iafd/result/performers/ +scripts/iafd/log/ +scripts/thelordofporn/log/ +scripts/vixen_group/log/ +scripts/pornhub/log/ + +stockapp/data/ +stockapp/log/ +stockapp/result/ + +# 忽略 Python 编译文件 +*.pyc + +# 忽略环境配置文件 +.env \ No newline at end of file diff --git a/stockapp/src/config.py b/stockapp/src/config.py index ced8921..0abdf5b 100644 --- a/stockapp/src/config.py +++ b/stockapp/src/config.py @@ -6,7 +6,7 @@ from pathlib import Path # MySQL 配置 db_config = { - 'host': '172.18.0.3', + 'host': '172.18.0.5', 'user': 'root', 'password': 'mysqlpw', 'database': 'stockdb' @@ -14,6 +14,9 @@ db_config = { log_dir_prefix = '../log' +global_share_data_dir = '/root/sharedata' +global_stock_data_dir = '/root/hostdir/stock_data' + # 获取log目录 def get_log_directory(): """ diff --git a/stockapp/src/crawling/stock_hist_em.py b/stockapp/src/crawling/stock_hist_em.py index 7f8e429..72eaec9 100644 --- a/stockapp/src/crawling/stock_hist_em.py +++ b/stockapp/src/crawling/stock_hist_em.py @@ -6,11 +6,96 @@ Desc: 东方财富网-行情首页-沪深京 A 股 """ import requests import pandas as pd +import time from functools import lru_cache -def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048') -> pd.DataFrame: +def fetch_with_retries_em(url, params, max_retries=3, delay=2): + """带重试机制的 GET 请求""" + for attempt in range(max_retries): + try: + response = requests.get(url, params=params, timeout=5) + response.raise_for_status() + return response.json() + except requests.RequestException as e: + print(f"请求失败,第 {attempt + 1} 次重试: {e}") + time.sleep(delay) + return None + +def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', pz=200) -> pd.DataFrame: + """ + 东方财富网-沪深京 A 股-实时行情 + https://quote.eastmoney.com/center/gridlist.html#hs_a_board + """ + url = "http://82.push2.eastmoney.com/api/qt/clist/get" + pn = 1 # 初始页数 + pn_max = 10000 # 设定初始最大页数 + all_data = [] + + while pn <= pn_max: + params = { + "pn": str(pn), + "pz": str(pz), + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": fs, + "fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221", + "_": "1623833739532", + } + + data_json = fetch_with_retries_em(url, params) + if not data_json or "data" not in data_json or "diff" not in data_json["data"]: + break + + diff_data = data_json["data"]["diff"] + if not diff_data: + break + + all_data.extend(diff_data) + + # 获取 total 数据来更新 pn_max + if pn == 1: + pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz + + pn += 1 + time.sleep(0.5) # 防止请求过快 + + if not all_data: + return pd.DataFrame() + + temp_df = pd.DataFrame(all_data) + column_map = { + "f2": "最新价", "f3": "涨跌幅", "f4": "涨跌额", "f5": "成交量", "f6": "成交额", "f7": "振幅", "f8": "换手率", + "f9": "市盈率动", "f10": "量比", "f11": "5分钟涨跌", "f12": "代码", "f14": "名称", "f15": "最高", "f16": "最低", + "f17": "今开", "f18": "昨收", "f20": "总市值", "f21": "流通市值", "f22": "涨速", "f23": "市净率", "f24": "60日涨跌幅", + "f25": "年初至今涨跌幅", "f26": "上市时间", "f37": "加权净资产收益率", "f38": "总股本", "f39": "已流通股份", + "f40": "营业收入", "f41": "营业收入同比增长", "f45": "归属净利润", "f46": "归属净利润同比增长", "f48": "每股未分配利润", + "f49": "毛利率", "f57": "资产负债率", "f61": "每股公积金", "f100": "所处行业", "f112": "每股收益", "f113": "每股净资产", + "f114": "市盈率静", "f115": "市盈率TTM", "f221": "报告期" + } + temp_df.rename(columns=column_map, inplace=True) + + numeric_columns = [ + "最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "换手率", "量比", "今开", "最高", "最低", "昨收", "涨速", "5分钟涨跌", "60日涨跌幅", + "年初至今涨跌幅", "市盈率动", "市盈率TTM", "市盈率静", "市净率", "每股收益", "每股净资产", "每股公积金", "每股未分配利润", + "加权净资产收益率", "毛利率", "资产负债率", "营业收入", "营业收入同比增长", "归属净利润", "归属净利润同比增长", "总股本", "已流通股份", + "总市值", "流通市值" + ] + for col in numeric_columns: + temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce") + + temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce") + temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce") + + return temp_df + + +def stock_zh_a_spot_em_old(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048') -> pd.DataFrame: """ 东方财富网-沪深京 A 股-实时行情 https://quote.eastmoney.com/center/gridlist.html#hs_a_board @@ -308,6 +393,70 @@ def code_id_map_em_older() -> dict: @lru_cache() def code_id_map_em() -> dict: + url = "http://80.push2.eastmoney.com/api/qt/clist/get" + pz = 200 # 固定每页 200 条 + pn = 1 # 初始页码 + pn_max = 10000 # 预设一个较大的初始值 + + params = { + "pn": str(pn), + "pz": str(pz), + "po": "1", + "np": "1", + "ut": "bd1d9ddb04089700cf9c27f6f7426281", + "fltt": "2", + "invt": "2", + "fid": "f3", + "fs": "", + "fields": "f12,f13", + "_": "1623833739532", + } + + market_fs = { + "china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048", + "hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2", + "us": "m:105,m:106,m:107" + } + + code_id_dict = {} + + for market_id, fs in market_fs.items(): + params["fs"] = fs + pn = 1 # 每个市场都从第一页开始 + total = 0 + fetched_cnt = 0 + while pn <= pn_max: + params["pn"] = str(pn) + data_json = fetch_with_retries_em(url, params) + + if not data_json or "data" not in data_json or "diff" not in data_json["data"]: + print(f"市场 {market_id} 数据获取失败或为空,跳过。") + break + + temp_df = pd.DataFrame(data_json["data"]["diff"]) + temp_df["market_id"] = 1 + + # 处理 total 以计算 pn_max + if pn == 1 and "total" in data_json["data"]: + total = int(data_json["data"]["total"]) + pn_max = (total // pz) + 1 # 计算最大页数 + print(f"市场 {market_id} 总数据量: {total}, 需要页数: {pn_max}") + + # 按 f13 进行分组并存入字典 + grouped = temp_df.groupby('f13') + for id, group in grouped: + code_id_dict.update(dict.fromkeys(group["f12"], id)) + fetched_cnt += len(group) + # print(f'获取 {market_id} 股票列表,f13: {id}, 股票数: {len(group)}, 已获取总股票数: {fetched_cnt}, 总股票数: {total}') + + pn += 1 # 翻页继续 + + print(f'获取 {market_id} 已获取总股票数: {fetched_cnt}, 总股票数: {total}') + + return code_id_dict + +@lru_cache() +def code_id_map_em2() -> dict: """ 东方财富-股票和市场代码 http://quote.eastmoney.com/center/gridlist.html#hs_a_board diff --git a/stockapp/src/stat_growth_em.py b/stockapp/src/stat_growth_em.py index 136fd99..b9996d8 100644 --- a/stockapp/src/stat_growth_em.py +++ b/stockapp/src/stat_growth_em.py @@ -32,6 +32,8 @@ config.setup_logging() current_date = datetime.now().strftime("%Y%m%d") current_year = datetime.now().strftime("%Y") +res_dir = config.global_stock_data_dir + # 刷新代码列表,并返回 def flush_code_map(): code_id_map_em_df = his_em.code_id_map_em() @@ -99,20 +101,30 @@ def load_index_codes(): conn = pymysql.connect(**config.db_config) cursor = conn.cursor(pymysql.cursors.DictCursor) #沪深300 - #cursor.execute("SELECT code, code_name FROM hs_index where index_code='000300' ") + #cursor.execute("SELECT code, code_name FROM index_hs where index_code='000300' ") #中证A500 - #cursor.execute("SELECT code, code_name FROM hs_index where index_code='000510' ") + #cursor.execute("SELECT code, code_name FROM index_hs where index_code='000510' ") #沪深300和中证A500的并集,去重 - #cursor.execute("SELECT DISTINCT CONCAT('index-', code) as code, code_name FROM hs_index where index_code IN ('000300', '000510') ") + #cursor.execute("SELECT DISTINCT CONCAT('index-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ") #沪深300和中证A500的合并,不去重 - #cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM hs_index where index_code IN ('000300', '000510') ") + cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ") #沪深300、中证A500、中证A50、科创芯片、科创创业50,不去重 - cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM hs_index where index_code IN ('000300', '000510', '930050', '000685', '931643') ") + #cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510', '930050', '000685', '931643') ") hs300_data = cursor.fetchall() + + #港股国企指数成分股、恒生科技指数成分股等 + cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hk where index_code IN ('HSCEI', 'HSTECH') ") + hk_data = cursor.fetchall() + + #美股中概股等 + cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_us where index_code IN ('CN_US') ") + us_data = cursor.fetchall() + + cursor.close() conn.close() - return hs300_data + return hs300_data + hk_data + us_data # 读取富途自选股的指定分类股 def load_futu_all_codes(): @@ -172,7 +184,14 @@ def calculate_stock_statistics(market, code, code_name): logging.warning(f"{code}, {code_name} 未找到0923的数据, 以 {defaut_row['日期']} 的数据来代替") row_0923 = defaut_row - # 获取1008收盘价 + # 获取0930收盘价 + try: + row_0930 = data[data['日期'] == '2024-09-30'].iloc[0] + except IndexError: + logging.warning(f"{code}, {code_name} 未找到0930的数据, 以 {defaut_row['日期']} 的数据来代替") + row_0930 = defaut_row + + # 获取1008开盘价、收盘价 try: row_1008 = data[data['日期'] == '2024-10-08'].iloc[0] except IndexError: @@ -198,6 +217,7 @@ def calculate_stock_statistics(market, code, code_name): try: year_increase = (current_row['收盘'] / year_begin_row['收盘'] - 1) growth_0923 = (current_row['收盘'] / row_0923['收盘'] - 1) + growth_0930 = (current_row['收盘'] / row_0930['收盘'] - 1) growth_1008 = (current_row['收盘'] / row_1008['收盘'] - 1) growth_1008_open = (current_row['收盘'] / row_1008['开盘'] - 1) year_amplitude = (year_max_row['收盘'] / year_min_row['收盘'] - 1) @@ -215,6 +235,7 @@ def calculate_stock_statistics(market, code, code_name): current_row['日期'], current_row['收盘'], year_begin_row['日期'], year_begin_row['收盘'], row_0923['日期'], row_0923['收盘'] , + row_0930['日期'], row_0930['收盘'] , row_1008['日期'], row_1008['开盘'] ,row_1008['收盘'] , max_close_row['日期'], max_close_row['收盘'], min_close_row['日期'], min_close_row['收盘'], @@ -222,6 +243,7 @@ def calculate_stock_statistics(market, code, code_name): year_min_row['日期'], year_min_row['收盘'], year_increase, growth_0923 if growth_0923 is not None else 'N/A', + growth_0930 if growth_0930 is not None else 'N/A', growth_1008 if growth_1008 is not None else 'N/A', growth_1008_open if growth_1008_open is not None else 'N/A', year_amplitude, @@ -235,16 +257,18 @@ def calculate_stock_statistics(market, code, code_name): return None # 写入到文件中 -def write_to_csv(results, filename='../stock_statistics.csv'): +def write_to_csv(results, filename): """将所有结果写入CSV文件""" try: with open(filename, mode='w', newline='', encoding='utf-8') as file: writer = csv.writer(file) # 写入表头 writer.writerow([ - "股市", "股票代码", "股票名称", "当前日期", "当前收盘", "年初日期", "年初收盘", "0923日期", "0923收盘", "1008日期", "1008开盘", "1008收盘", - "最高日期", "最高收盘", "最低日期", "最低收盘", "年内最高日期", "年内最高收盘", "年内最低日期", "年内最低收盘", "年内涨幅", "相比0923收盘价涨幅", - "相比1008收盘价涨幅", "相比1008开盘价涨幅", "年内振幅", "最大振幅", "股价自最高点恢复", "市盈率TTM", "市净率", "总市值" + "股市", "股票代码", "股票名称", "当前日期", "当前收盘", "年初日期", "年初收盘", + "0923日期", "0923收盘", "0930日期", "0930收盘", "1008日期", "1008开盘", "1008收盘", + "最高日期", "最高收盘", "最低日期", "最低收盘", "年内最高日期", "年内最高收盘", "年内最低日期", "年内最低收盘", "年内涨幅", + "相比0923收盘价涨幅", "相比0930收盘价涨幅", "相比1008收盘价涨幅", "相比1008开盘价涨幅", + "年内振幅", "最大振幅", "股价自最高点恢复", "市盈率TTM", "市净率", "总市值" ]) # 写入每行数据 for result in results: @@ -273,7 +297,7 @@ def main(list, debug): if snap_data.empty: logging.error(f"fetching snapshot data error!") return - file_name = f'../result/snapshot_em_{current_date}.csv' + file_name = f'{res_dir}/snapshot_em_{current_date}.csv' snap_data.to_csv(file_name, index=False, encoding='utf-8') logging.info(f"市场快照数据已经写入 CSV 文件 {file_name}\n\n") @@ -304,7 +328,7 @@ def main(list, debug): break if all_results: - file_name = f'../result/stock_statistics_{list}_{current_date}' + file_name = f'{res_dir}/stock_statistics_{list}_{current_date}' if debug: file_name = f'{file_name}_debug' file_name = f'{file_name}.csv' @@ -321,4 +345,5 @@ if __name__ == "__main__": args = parser.parse_args() # 调用主函数 + #flush_code_map() main(args.list, args.debug) \ No newline at end of file