From ddaa568380917351526b42e3a174a9b2a100cc8f Mon Sep 17 00:00:00 2001
From: oscar <oscar@easyprompt8.com>
Date: Sun, 2 Mar 2025 17:03:03 +0800
Subject: [PATCH] modify stockapp.em scripts.

---
 .gitignore                             |  20 ++++
 gitignore                              |  28 +++++
 stockapp/src/config.py                 |   5 +-
 stockapp/src/crawling/stock_hist_em.py | 151 ++++++++++++++++++++++++-
 stockapp/src/stat_growth_em.py         |  51 ++++++---
 5 files changed, 240 insertions(+), 15 deletions(-)
 create mode 100644 gitignore

diff --git a/.gitignore b/.gitignore
index baf1067..a57550b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,5 +1,25 @@
 # 忽略 log 目录
 log/
+scripts/aabook/log/
+scripts/aabook/local/
+scripts/aabook/data/
+scripts/u9a9/torrents/
+scripts/u9a9/log/
+scripts/javdb/log/
+scripts/javhd/result/tmp/
+scripts/javhd/log/
+scripts/iafd/data/tmp/
+scripts/iafd/result/tmp/
+scripts/iafd/result/bak/
+scripts/iafd/result/performers/
+scripts/iafd/log/
+scripts/thelordofporn/log/
+scripts/vixen_group/log/
+scripts/pornhub/log/
+
+stockapp/data/
+stockapp/log/
+stockapp/result/
 
 # 忽略 Python 编译文件
 *.pyc
diff --git a/gitignore b/gitignore
new file mode 100644
index 0000000..a57550b
--- /dev/null
+++ b/gitignore
@@ -0,0 +1,28 @@
+# 忽略 log 目录
+log/
+scripts/aabook/log/
+scripts/aabook/local/
+scripts/aabook/data/
+scripts/u9a9/torrents/
+scripts/u9a9/log/
+scripts/javdb/log/
+scripts/javhd/result/tmp/
+scripts/javhd/log/
+scripts/iafd/data/tmp/
+scripts/iafd/result/tmp/
+scripts/iafd/result/bak/
+scripts/iafd/result/performers/
+scripts/iafd/log/
+scripts/thelordofporn/log/
+scripts/vixen_group/log/
+scripts/pornhub/log/
+
+stockapp/data/
+stockapp/log/
+stockapp/result/
+
+# 忽略 Python 编译文件
+*.pyc
+
+# 忽略环境配置文件
+.env
\ No newline at end of file
diff --git a/stockapp/src/config.py b/stockapp/src/config.py
index ced8921..0abdf5b 100644
--- a/stockapp/src/config.py
+++ b/stockapp/src/config.py
@@ -6,7 +6,7 @@ from pathlib import Path
 
 # MySQL 配置
 db_config = {
-    'host': '172.18.0.3',
+    'host': '172.18.0.5',
     'user': 'root',
     'password': 'mysqlpw',
     'database': 'stockdb'
@@ -14,6 +14,9 @@ db_config = {
 
 log_dir_prefix = '../log'
 
+global_share_data_dir = '/root/sharedata'
+global_stock_data_dir = '/root/hostdir/stock_data'
+
 # 获取log目录
 def get_log_directory():
     """
diff --git a/stockapp/src/crawling/stock_hist_em.py b/stockapp/src/crawling/stock_hist_em.py
index 7f8e429..72eaec9 100644
--- a/stockapp/src/crawling/stock_hist_em.py
+++ b/stockapp/src/crawling/stock_hist_em.py
@@ -6,11 +6,96 @@ Desc: 东方财富网-行情首页-沪深京 A 股
 """
 import requests
 import pandas as pd
+import time
 
 from functools import lru_cache
 
 
-def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048') -> pd.DataFrame:
+def fetch_with_retries_em(url, params, max_retries=3, delay=2):
+    """带重试机制的 GET 请求"""
+    for attempt in range(max_retries):
+        try:
+            response = requests.get(url, params=params, timeout=5)
+            response.raise_for_status()
+            return response.json()
+        except requests.RequestException as e:
+            print(f"请求失败，第 {attempt + 1} 次重试: {e}")
+            time.sleep(delay)
+    return None
+
+def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', pz=200) -> pd.DataFrame:
+    """
+    东方财富网-沪深京 A 股-实时行情
+    https://quote.eastmoney.com/center/gridlist.html#hs_a_board
+    """
+    url = "http://82.push2.eastmoney.com/api/qt/clist/get"
+    pn = 1  # 初始页数
+    pn_max = 10000  # 设定初始最大页数
+    all_data = []
+    
+    while pn <= pn_max:
+        params = {
+            "pn": str(pn),
+            "pz": str(pz),
+            "po": "1",
+            "np": "1",
+            "ut": "bd1d9ddb04089700cf9c27f6f7426281",
+            "fltt": "2",
+            "invt": "2",
+            "fid": "f3",
+            "fs": fs,
+            "fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221",
+            "_": "1623833739532",
+        }
+        
+        data_json = fetch_with_retries_em(url, params)
+        if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
+            break
+        
+        diff_data = data_json["data"]["diff"]
+        if not diff_data:
+            break
+        
+        all_data.extend(diff_data)
+        
+        # 获取 total 数据来更新 pn_max
+        if pn == 1:
+            pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz
+        
+        pn += 1
+        time.sleep(0.5)  # 防止请求过快
+    
+    if not all_data:
+        return pd.DataFrame()
+    
+    temp_df = pd.DataFrame(all_data)
+    column_map = {
+        "f2": "最新价", "f3": "涨跌幅", "f4": "涨跌额", "f5": "成交量", "f6": "成交额", "f7": "振幅", "f8": "换手率",
+        "f9": "市盈率动", "f10": "量比", "f11": "5分钟涨跌", "f12": "代码", "f14": "名称", "f15": "最高", "f16": "最低",
+        "f17": "今开", "f18": "昨收", "f20": "总市值", "f21": "流通市值", "f22": "涨速", "f23": "市净率", "f24": "60日涨跌幅",
+        "f25": "年初至今涨跌幅", "f26": "上市时间", "f37": "加权净资产收益率", "f38": "总股本", "f39": "已流通股份",
+        "f40": "营业收入", "f41": "营业收入同比增长", "f45": "归属净利润", "f46": "归属净利润同比增长", "f48": "每股未分配利润",
+        "f49": "毛利率", "f57": "资产负债率", "f61": "每股公积金", "f100": "所处行业", "f112": "每股收益", "f113": "每股净资产",
+        "f114": "市盈率静", "f115": "市盈率TTM", "f221": "报告期"
+    }
+    temp_df.rename(columns=column_map, inplace=True)
+    
+    numeric_columns = [
+        "最新价", "涨跌幅", "涨跌额", "成交量", "成交额", "振幅", "换手率", "量比", "今开", "最高", "最低", "昨收", "涨速", "5分钟涨跌", "60日涨跌幅",
+        "年初至今涨跌幅", "市盈率动", "市盈率TTM", "市盈率静", "市净率", "每股收益", "每股净资产", "每股公积金", "每股未分配利润",
+        "加权净资产收益率", "毛利率", "资产负债率", "营业收入", "营业收入同比增长", "归属净利润", "归属净利润同比增长", "总股本", "已流通股份",
+        "总市值", "流通市值"
+    ]
+    for col in numeric_columns:
+        temp_df[col] = pd.to_numeric(temp_df[col], errors="coerce")
+    
+    temp_df["报告期"] = pd.to_datetime(temp_df["报告期"], format='%Y%m%d', errors="coerce")
+    temp_df["上市时间"] = pd.to_datetime(temp_df["上市时间"], format='%Y%m%d', errors="coerce")
+    
+    return temp_df
+
+
+def stock_zh_a_spot_em_old(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048') -> pd.DataFrame:
     """
     东方财富网-沪深京 A 股-实时行情
     https://quote.eastmoney.com/center/gridlist.html#hs_a_board
@@ -308,6 +393,70 @@ def code_id_map_em_older() -> dict:
 
 @lru_cache()
 def code_id_map_em() -> dict:
+    url = "http://80.push2.eastmoney.com/api/qt/clist/get"
+    pz = 200  # 固定每页 200 条
+    pn = 1     # 初始页码
+    pn_max = 10000  # 预设一个较大的初始值
+    
+    params = {
+        "pn": str(pn),
+        "pz": str(pz),
+        "po": "1",
+        "np": "1",
+        "ut": "bd1d9ddb04089700cf9c27f6f7426281",
+        "fltt": "2",
+        "invt": "2",
+        "fid": "f3",
+        "fs": "",
+        "fields": "f12,f13",
+        "_": "1623833739532",
+    }
+    
+    market_fs = {
+        "china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
+        "hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
+        "us": "m:105,m:106,m:107"
+    }
+    
+    code_id_dict = {}
+    
+    for market_id, fs in market_fs.items():
+        params["fs"] = fs
+        pn = 1  # 每个市场都从第一页开始
+        total = 0
+        fetched_cnt = 0
+        while pn <= pn_max:
+            params["pn"] = str(pn)
+            data_json = fetch_with_retries_em(url, params)
+            
+            if not data_json or "data" not in data_json or "diff" not in data_json["data"]:
+                print(f"市场 {market_id} 数据获取失败或为空，跳过。")
+                break
+            
+            temp_df = pd.DataFrame(data_json["data"]["diff"])
+            temp_df["market_id"] = 1
+            
+            # 处理 total 以计算 pn_max
+            if pn == 1 and "total" in data_json["data"]:
+                total = int(data_json["data"]["total"])
+                pn_max = (total // pz) + 1  # 计算最大页数
+                print(f"市场 {market_id} 总数据量: {total}, 需要页数: {pn_max}")
+            
+            # 按 f13 进行分组并存入字典
+            grouped = temp_df.groupby('f13')
+            for id, group in grouped:
+                code_id_dict.update(dict.fromkeys(group["f12"], id))
+                fetched_cnt += len(group)
+                # print(f'获取 {market_id} 股票列表，f13: {id}, 股票数: {len(group)}, 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
+            
+            pn += 1  # 翻页继续
+
+        print(f'获取 {market_id} 已获取总股票数: {fetched_cnt}, 总股票数: {total}')
+    
+    return code_id_dict
+
+@lru_cache()
+def code_id_map_em2() -> dict:
     """
     东方财富-股票和市场代码
     http://quote.eastmoney.com/center/gridlist.html#hs_a_board
diff --git a/stockapp/src/stat_growth_em.py b/stockapp/src/stat_growth_em.py
index 136fd99..b9996d8 100644
--- a/stockapp/src/stat_growth_em.py
+++ b/stockapp/src/stat_growth_em.py
@@ -32,6 +32,8 @@ config.setup_logging()
 current_date = datetime.now().strftime("%Y%m%d")
 current_year = datetime.now().strftime("%Y")
 
+res_dir = config.global_stock_data_dir
+
 # 刷新代码列表，并返回
 def flush_code_map():
     code_id_map_em_df = his_em.code_id_map_em()
@@ -99,20 +101,30 @@ def load_index_codes():
     conn = pymysql.connect(**config.db_config)
     cursor = conn.cursor(pymysql.cursors.DictCursor)
     #沪深300
-    #cursor.execute("SELECT code, code_name FROM hs_index where index_code='000300' ")
+    #cursor.execute("SELECT code, code_name FROM index_hs where index_code='000300' ")
     #中证A500
-    #cursor.execute("SELECT code, code_name FROM hs_index where index_code='000510' ")
+    #cursor.execute("SELECT code, code_name FROM index_hs where index_code='000510' ")
     #沪深300和中证A500的并集，去重
-    #cursor.execute("SELECT DISTINCT CONCAT('index-', code) as code, code_name FROM hs_index where index_code IN ('000300', '000510') ")
+    #cursor.execute("SELECT DISTINCT CONCAT('index-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ")
     #沪深300和中证A500的合并，不去重
-    #cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM hs_index where index_code IN ('000300', '000510') ")
+    cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510') ")
     #沪深300、中证A500、中证A50、科创芯片、科创创业50，不去重
-    cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM hs_index where index_code IN ('000300', '000510', '930050', '000685', '931643') ")
+    #cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hs where index_code IN ('000300', '000510', '930050', '000685', '931643') ")
 
     hs300_data = cursor.fetchall()
+
+    #港股国企指数成分股、恒生科技指数成分股等
+    cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_hk where index_code IN ('HSCEI', 'HSTECH') ")
+    hk_data = cursor.fetchall()
+
+    #美股中概股等
+    cursor.execute("SELECT DISTINCT CONCAT(index_code , '-', code) as code, code_name FROM index_us where index_code IN ('CN_US') ")
+    us_data = cursor.fetchall()
+
+
     cursor.close()
     conn.close()
-    return hs300_data
+    return hs300_data + hk_data + us_data
 
 # 读取富途自选股的指定分类股
 def load_futu_all_codes():
@@ -172,7 +184,14 @@ def calculate_stock_statistics(market, code, code_name):
             logging.warning(f"{code}, {code_name} 未找到0923的数据, 以 {defaut_row['日期']} 的数据来代替")
             row_0923 = defaut_row
 
-        # 获取1008收盘价
+        # 获取0930收盘价
+        try:
+            row_0930 = data[data['日期'] == '2024-09-30'].iloc[0]
+        except IndexError:
+            logging.warning(f"{code}, {code_name} 未找到0930的数据, 以 {defaut_row['日期']} 的数据来代替")
+            row_0930 = defaut_row
+
+        # 获取1008开盘价、收盘价
         try:
             row_1008 = data[data['日期'] == '2024-10-08'].iloc[0]
         except IndexError:
@@ -198,6 +217,7 @@ def calculate_stock_statistics(market, code, code_name):
         try:
             year_increase = (current_row['收盘'] / year_begin_row['收盘'] - 1)
             growth_0923 = (current_row['收盘'] / row_0923['收盘'] - 1) 
+            growth_0930 = (current_row['收盘'] / row_0930['收盘'] - 1) 
             growth_1008 = (current_row['收盘'] / row_1008['收盘'] - 1) 
             growth_1008_open = (current_row['收盘'] / row_1008['开盘'] - 1) 
             year_amplitude = (year_max_row['收盘'] / year_min_row['收盘'] - 1)
@@ -215,6 +235,7 @@ def calculate_stock_statistics(market, code, code_name):
             current_row['日期'], current_row['收盘'],
             year_begin_row['日期'], year_begin_row['收盘'],
             row_0923['日期'], row_0923['收盘'] ,
+            row_0930['日期'], row_0930['收盘'] ,
             row_1008['日期'], row_1008['开盘'] ,row_1008['收盘'] ,
             max_close_row['日期'], max_close_row['收盘'],
             min_close_row['日期'], min_close_row['收盘'],
@@ -222,6 +243,7 @@ def calculate_stock_statistics(market, code, code_name):
             year_min_row['日期'], year_min_row['收盘'],
             year_increase,
             growth_0923 if growth_0923 is not None else 'N/A',
+            growth_0930 if growth_0930 is not None else 'N/A',
             growth_1008 if growth_1008 is not None else 'N/A',
             growth_1008_open if growth_1008_open is not None else 'N/A',
             year_amplitude,
@@ -235,16 +257,18 @@ def calculate_stock_statistics(market, code, code_name):
         return None
 
 # 写入到文件中
-def write_to_csv(results, filename='../stock_statistics.csv'):
+def write_to_csv(results, filename):
     """将所有结果写入CSV文件"""
     try:
         with open(filename, mode='w', newline='', encoding='utf-8') as file:
             writer = csv.writer(file)
             # 写入表头
             writer.writerow([
-                "股市", "股票代码", "股票名称", "当前日期", "当前收盘", "年初日期", "年初收盘", "0923日期", "0923收盘", "1008日期", "1008开盘", "1008收盘",
-                "最高日期", "最高收盘", "最低日期", "最低收盘", "年内最高日期", "年内最高收盘", "年内最低日期", "年内最低收盘", "年内涨幅", "相比0923收盘价涨幅",
-                "相比1008收盘价涨幅", "相比1008开盘价涨幅", "年内振幅", "最大振幅", "股价自最高点恢复", "市盈率TTM", "市净率", "总市值"
+                "股市", "股票代码", "股票名称", "当前日期", "当前收盘", "年初日期", "年初收盘", 
+                "0923日期", "0923收盘", "0930日期", "0930收盘", "1008日期", "1008开盘", "1008收盘",
+                "最高日期", "最高收盘", "最低日期", "最低收盘", "年内最高日期", "年内最高收盘", "年内最低日期", "年内最低收盘", "年内涨幅", 
+                "相比0923收盘价涨幅", "相比0930收盘价涨幅", "相比1008收盘价涨幅", "相比1008开盘价涨幅", 
+                "年内振幅", "最大振幅", "股价自最高点恢复", "市盈率TTM", "市净率", "总市值"
             ])
             # 写入每行数据
             for result in results:
@@ -273,7 +297,7 @@ def main(list, debug):
     if snap_data.empty:
         logging.error(f"fetching snapshot data error!")
         return
-    file_name = f'../result/snapshot_em_{current_date}.csv'
+    file_name = f'{res_dir}/snapshot_em_{current_date}.csv'
     snap_data.to_csv(file_name, index=False, encoding='utf-8')
     logging.info(f"市场快照数据已经写入 CSV 文件 {file_name}\n\n")
 
@@ -304,7 +328,7 @@ def main(list, debug):
             break
     
     if all_results:
-        file_name = f'../result/stock_statistics_{list}_{current_date}'
+        file_name = f'{res_dir}/stock_statistics_{list}_{current_date}'
         if debug:
             file_name = f'{file_name}_debug'
         file_name = f'{file_name}.csv'
@@ -321,4 +345,5 @@ if __name__ == "__main__":
     args = parser.parse_args()
     
     # 调用主函数
+    #flush_code_map()
     main(args.list, args.debug)
\ No newline at end of file