diff --git a/src/crawling/stock_hist_em.py b/src/crawling/stock_hist_em.py index e3c75b5..777271a 100644 --- a/src/crawling/stock_hist_em.py +++ b/src/crawling/stock_hist_em.py @@ -33,10 +33,12 @@ def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', p 东方财富网-沪深京 A 股-实时行情 https://quote.eastmoney.com/center/gridlist.html#hs_a_board """ - url = "http://82.push2.eastmoney.com/api/qt/clist/get" + #url = "http://82.push2.eastmoney.com/api/qt/clist/get" + url = "https://82.push2.eastmoney.com/api/qt/clist/get" pn = 1 # 初始页数 pn_max = 10000 # 设定初始最大页数 all_data = [] + total_lines = 0 while pn <= pn_max: params = { @@ -47,7 +49,7 @@ def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', p "ut": "bd1d9ddb04089700cf9c27f6f7426281", "fltt": "2", "invt": "2", - "fid": "f3", + "fid": "f12", "fs": fs, "fields": "f2,f3,f4,f5,f6,f7,f8,f9,f10,f11,f12,f14,f15,f16,f17,f18,f20,f21,f22,f23,f24,f25,f26,f37,f38,f39,f40,f41,f45,f46,f48,f49,f57,f61,f100,f112,f113,f114,f115,f221,f13", "_": "1623833739532", @@ -65,13 +67,15 @@ def stock_zh_a_spot_em(fs='m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048', p # 获取 total 数据来更新 pn_max if pn == 1: + total_lines = data_json["data"].get("total", 0) pn_max = (data_json["data"].get("total", 0) + pz - 1) // pz logging.info(f"market: {fs_desc}, total data lines: {data_json['data'].get('total', 0)}, total pages: {pn_max}, curr lines: {len(diff_data)}, page size: {pz}") pn += 1 time.sleep(0.5) # 防止请求过快 - if not all_data: + if not all_data or total_lines < 1 or len(all_data) / total_lines < 0.95: + logging.warning(f"market: {fs_desc}, fetched data lines: {len(all_data)} is less than 95% of total lines: {total_lines}.") return pd.DataFrame() temp_df = pd.DataFrame(all_data) diff --git a/src/static/daily_snap_em.py b/src/static/daily_snap_em.py index 9915f14..ca9fda6 100644 --- a/src/static/daily_snap_em.py +++ b/src/static/daily_snap_em.py @@ -77,6 +77,7 @@ def fetch_snap_all(market_id, trading_date) -> pd.DataFrame: logging.error(f"未找到市场 {market_id} 的数据源配置,请检查 market_fs 配置") return result + # 获取数据,并保证完整性(获取的数据行数不少于总行数的95%) df = his_em.stock_zh_a_spot_em(fs, fs_desc=market_id) if df.empty: logging.warning(f'{market_id} empty data. please check.') @@ -251,18 +252,19 @@ def main(list, args_debug, notify): continue # 获取快照数据 - snap_data = fetch_snap_all(market_id, trading_date) - if snap_data.empty: - logging.error(f"未获取到 {market_id} 市场的快照数据") - continue - if snap_data.empty: - logging.error(f"fetching snapshot data error for {market_id}!") - continue + while True: + snap_data = fetch_snap_all(market_id, trading_date) + if snap_data.empty: + logging.error(f"未获取到 {market_id} 市场的快照数据, 1分钟后重试...\n\n") + time.sleep(60) + continue + else: + break insert_stock_data_to_db(dataframe=snap_data) logging.info(f"成功获取 {market_id} 市场的快照数据,记录数: {len(snap_data)}") if notify: - send_to_wecom(f"fetched {market_id} snap data, counts: {len(snap_data)}") + send_to_wecom(f"fetched [{market_id}] snap data, counts: {len(snap_data)}") em_code_map.update({row['代码']: row['代码前缀'] for _, row in snap_data.iterrows()}) time.sleep(5)