modify scripts
This commit is contained in:
@ -18,6 +18,7 @@ import pymysql
|
||||
import logging
|
||||
import csv
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
from datetime import datetime
|
||||
from futu import OpenQuoteContext, RET_OK # Futu API client
|
||||
@ -26,6 +27,7 @@ import argparse
|
||||
import src.crawling.stock_hist_em as his_em
|
||||
import src.logger.logger as logger
|
||||
import src.config.config as config
|
||||
from src.crawler.zixuan.xueqiu_zixuan import XueQiuStockFetcher
|
||||
|
||||
# 配置日志
|
||||
logger.setup_logging()
|
||||
@ -41,58 +43,36 @@ def flush_code_map():
|
||||
print(code_id_map_em_df)
|
||||
return code_id_map_em_df
|
||||
|
||||
# 获取历史K线,如果失败,就重试
|
||||
def fetch_with_retry(code: str, s_date, e_date, adjust: str = '', max_retries: int = 3) -> pd.DataFrame :
|
||||
retries = 0
|
||||
while retries < max_retries:
|
||||
try:
|
||||
# 调用 stock_zh_a_hist 获取历史数据
|
||||
df = his_em.stock_zh_a_hist(
|
||||
symbol=code,
|
||||
period="daily",
|
||||
start_date=s_date,
|
||||
end_date=e_date,
|
||||
adjust=adjust,
|
||||
)
|
||||
# 如果获取到的数据为空,记录日志并重试
|
||||
if df.empty:
|
||||
logging.info(f'{code} empty data. retry...')
|
||||
retries += 1
|
||||
time.sleep(3) # 每次重试前休眠 3 秒
|
||||
else:
|
||||
return df
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
time.sleep(3) # 每次重试前休眠 3 秒
|
||||
|
||||
return pd.DataFrame()
|
||||
|
||||
# 获取所有市场的当年股价快照,带重试机制。
|
||||
def fetch_snap_all(max_retries: int = 3) -> pd.DataFrame:
|
||||
# 检查文件是否存在
|
||||
file_name = f'{res_dir}/snapshot_em_{current_date}.csv'
|
||||
if os.path.exists(file_name):
|
||||
try:
|
||||
# 读取本地文件
|
||||
snap_data = pd.read_csv(file_name, encoding='utf-8')
|
||||
logging.info(f"load snapshot data from local: {file_name}\n\n")
|
||||
return snap_data
|
||||
except Exception as e:
|
||||
logging.warning(f"读取本地文件失败: {e},将重新拉取数据\n\n")
|
||||
|
||||
# 拉取数据
|
||||
market_fs = {"china_a": "m:0 t:6,m:0 t:80,m:1 t:2,m:1 t:23,m:0 t:81 s:2048",
|
||||
"hk": "m:128 t:3,m:128 t:4,m:128 t:1,m:128 t:2",
|
||||
"us": "m:105,m:106,m:107"}
|
||||
|
||||
result = pd.DataFrame()
|
||||
for market_id, fs in market_fs.items():
|
||||
retries = 0
|
||||
while retries < max_retries:
|
||||
try:
|
||||
df = his_em.stock_zh_a_spot_em(fs)
|
||||
# 如果获取到的数据为空,记录日志并重试
|
||||
if df.empty:
|
||||
logging.warning(f'{market_id} empty data. retry...')
|
||||
retries += 1
|
||||
time.sleep(3) # 每次重试前休眠 3 秒
|
||||
else:
|
||||
print(f'get {market_id} stock snapshot. stock count: {len(df)}')
|
||||
result = pd.concat([result, df], ignore_index=True)
|
||||
break
|
||||
except Exception as e:
|
||||
retries += 1
|
||||
time.sleep(3) # 每次重试前休眠 3 秒
|
||||
if retries >= max_retries:
|
||||
logging.warning(f'{market_id} fetching error.')
|
||||
df = his_em.stock_zh_a_spot_em(fs, fs_desc=market_id)
|
||||
if df.empty:
|
||||
logging.warning(f'{market_id} empty data. please check.')
|
||||
return pd.DataFrame()
|
||||
else:
|
||||
logging.info(f'get {market_id} stock snapshot. stock count: {len(df)}')
|
||||
result = pd.concat([result, df], ignore_index=True)
|
||||
|
||||
result.to_csv(file_name, index=False, encoding='utf-8')
|
||||
logging.info(f"get snapshot data and write to file: {file_name}\n\n")
|
||||
|
||||
return result
|
||||
|
||||
@ -127,6 +107,55 @@ def load_index_codes():
|
||||
conn.close()
|
||||
return hs300_data + hk_data + us_data
|
||||
|
||||
def format_stock_code(code):
|
||||
"""
|
||||
用正则表达式将 "SZ300750" 转换为 "SZ.300750"
|
||||
"""
|
||||
# 正则模式:匹配开头的1个或多个字母, followed by 1个或多个数字
|
||||
pattern = r'^([A-Za-z]+)(\d+)$'
|
||||
match = re.match(pattern, code)
|
||||
|
||||
if match:
|
||||
# 提取字母部分和数字部分,用点号拼接
|
||||
letters = match.group(1)
|
||||
numbers = match.group(2)
|
||||
return f"{letters}.{numbers}"
|
||||
else:
|
||||
# 不匹配模式时返回原始字符串(如已包含点号、有其他字符等)
|
||||
return code
|
||||
|
||||
def load_xueqiu_codes():
|
||||
# 替换为你的实际cookie
|
||||
USER_COOKIES = "u=5682299253; HMACCOUNT=AA6F9D2598CE96D7; xq_is_login=1; snbim_minify=true; _c_WBKFRo=BuebJX5KAbPh1PGBVFDvQTV7x7VF8W2cvWtaC99v; _nb_ioWEgULi=; cookiesu=661740133906455; device_id=fbe0630e603f726742fec4f9a82eb5fb; s=b312165egu; bid=1f3e6ffcb97fd2d9b4ddda47551d4226_m7fv1brw; Hm_lvt_1db88642e346389874251b5a1eded6e3=1751852390; xq_a_token=a0fd17a76966314ab80c960412f08e3fffb3ec0f; xqat=a0fd17a76966314ab80c960412f08e3fffb3ec0f; xq_id_token=eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJ1aWQiOjU2ODIyOTkyNTMsImlzcyI6InVjIiwiZXhwIjoxNzU0NzAzMjk5LCJjdG0iOjE3NTIxMTEyOTkyODYsImNpZCI6ImQ5ZDBuNEFadXAifQ.Vbs-LDgB4bCJI2N644DwfeptdcamKsAm2hbXxlPnJ_0fnTJhXp6T-2Gc6b6jmhTjXJIsWta8IuS0rQBB1L-9fKpUliNFHkv4lr7FW2x7QhrZ1D4lrvjihgBxKHq8yQl31uO6lmUOJkoRaS4LM1pmkSL_UOVyw8aUeuVjETFcJR1HFDHwWpHCLM8kY55fk6n1gEgDZnYNh1_FACqlm6LU4Vq14wfQgyF9sfrGzF8rxXX0nns_j-Dq2k8vN3mknh8yUHyzCyq6Sfqn6NeVdR0vPOciylyTtNq5kOUBFb8uJe48aV2uLGww3dYV8HbsgqW4k0zam3r3QDErfSRVIg-Usw; xq_r_token=1b73cbfb47fcbd8e2055ca4a6dc7a08905dacd7d; Hm_lpvt_1db88642e346389874251b5a1eded6e3=1752714700; is_overseas=0; ssxmod_itna=QqfxBD2D9DRQPY5i7YYxiwS4GhDYu0D0dGMD3qiQGglDFqAPKDHKm=lerDUhGr5h044VYmkTtDlxWeDZDG9dDqx0orXU7BB411D+iENYYe2GG+=3X0xOguYo7I=xmAkwKhSSIXNG2A+DnmeDQKDoxGkDivoD0IYwDiiTx0rD0eDPxDYDG4mDDvvQ84DjmEmFfoGImAeQIoDbORhz74DROdDS73A+IoGqW3Da1A3z8RGDmKDIhjozmoDFOL3Yq0k54i3Y=Ocaq0OZ+BGR0gvh849m1xkHYRr/oRCYQD4KDx5qAxOx20Z3isrfDxRvt70KGitCH4N4DGbh5gYH7x+GksdC58CNR3sx=1mt2qxkGd+QmoC5ZGYdixKG52q4iiqPj53js4D; ssxmod_itna2=QqfxBD2D9DRQPY5i7YYxiwS4GhDYu0D0dGMD3qiQGglDFqAPKDHKm=lerDUhGr5h044VYmkwYDioSBbrtN4=Htz/DUihxz=w4aD"
|
||||
|
||||
# 初始化获取器
|
||||
fetcher = XueQiuStockFetcher(
|
||||
cookies=USER_COOKIES,
|
||||
size=1000,
|
||||
retry_count=3
|
||||
)
|
||||
all_codes = []
|
||||
stocks = fetcher.get_stocks_by_group(
|
||||
category=1, # 股票
|
||||
pid=-1 # 全部
|
||||
)
|
||||
if stocks:
|
||||
for item in stocks:
|
||||
code = item['symbol']
|
||||
mkt = item['marketplace']
|
||||
|
||||
if mkt:
|
||||
if mkt.lower() == 'cn':
|
||||
code = format_stock_code(code)
|
||||
elif mkt.lower() == 'hk':
|
||||
code = f"HK.{code}"
|
||||
else:
|
||||
code = f"US.{code}"
|
||||
|
||||
all_codes.append({'code': code, 'code_name': item['name']})
|
||||
|
||||
return all_codes
|
||||
|
||||
# 读取富途自选股的指定分类股
|
||||
def load_futu_all_codes():
|
||||
quote_ctx = OpenQuoteContext(host='127.0.0.1', port=11111)
|
||||
@ -143,14 +172,6 @@ def load_futu_all_codes():
|
||||
|
||||
return stock_data
|
||||
|
||||
# 获取特定的行
|
||||
def get_specific_date_row(data, date):
|
||||
"""获取特定日期的行"""
|
||||
for row in data:
|
||||
if row['日期'] == date:
|
||||
return row
|
||||
return None
|
||||
|
||||
# 获取股票数据,并统计收益率
|
||||
def calculate_stock_statistics(market, code, code_name):
|
||||
try:
|
||||
@ -158,10 +179,16 @@ def calculate_stock_statistics(market, code, code_name):
|
||||
last_year = datetime.now().year - 1
|
||||
last_year_str = str(last_year)
|
||||
|
||||
# 获取历史数据
|
||||
data = fetch_with_retry(code, "20210101", current_date, 'qfq')
|
||||
# 调用 stock_zh_a_hist 获取历史数据
|
||||
data = his_em.stock_zh_a_hist_new(
|
||||
em_symbol=code,
|
||||
period="daily",
|
||||
start_date="20210101",
|
||||
end_date=current_date,
|
||||
adjust='qfq',
|
||||
)
|
||||
if data.empty:
|
||||
logging.warning(f'{code}, {code_name} has no data. skipping...')
|
||||
#logging.warning(f'fetch data for {code}, {code_name} failed. skipping...')
|
||||
return None
|
||||
|
||||
# 获取当前日期的股价
|
||||
@ -173,7 +200,7 @@ def calculate_stock_statistics(market, code, code_name):
|
||||
# 获取年初股价,也就是上一年的最后一个交易日的收盘价
|
||||
year_data = data[data['日期'].str.startswith(last_year_str)]
|
||||
if year_data.empty:
|
||||
logging.warning(f"{code}, {code_name} 未找到上一年的数据 ({last_year_str}), 以 {defaut_row['日期']} 的数据来代替")
|
||||
logging.debug(f"{code}, {code_name} 未找到上一年的数据 ({last_year_str}), 以 {defaut_row['日期']} 的数据来代替")
|
||||
year_begin_row = defaut_row
|
||||
else:
|
||||
year_begin_row = year_data.loc[year_data['日期'].idxmax()]
|
||||
@ -182,35 +209,35 @@ def calculate_stock_statistics(market, code, code_name):
|
||||
try:
|
||||
row_0923 = data[data['日期'] == '2024-09-23'].iloc[0]
|
||||
except IndexError:
|
||||
logging.warning(f"{code}, {code_name} 未找到0923的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
logging.debug(f"{code}, {code_name} 未找到0923的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
row_0923 = defaut_row
|
||||
|
||||
# 获取0930收盘价
|
||||
try:
|
||||
row_0930 = data[data['日期'] == '2024-09-30'].iloc[0]
|
||||
except IndexError:
|
||||
logging.warning(f"{code}, {code_name} 未找到0930的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
logging.debug(f"{code}, {code_name} 未找到0930的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
row_0930 = defaut_row
|
||||
|
||||
# 获取1008开盘价、收盘价
|
||||
try:
|
||||
row_1008 = data[data['日期'] == '2024-10-08'].iloc[0]
|
||||
except IndexError:
|
||||
logging.warning(f"{code}, {code_name} 未找到1008的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
logging.debug(f"{code}, {code_name} 未找到1008的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
row_1008 = defaut_row
|
||||
|
||||
# 获取0403收盘价
|
||||
try:
|
||||
row_0403 = data[data['日期'] == '2025-04-03'].iloc[0]
|
||||
except IndexError:
|
||||
logging.warning(f"{code}, {code_name} 未找到0403的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
logging.debug(f"{code}, {code_name} 未找到0403的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
row_0403 = defaut_row
|
||||
|
||||
# 获取0407收盘价
|
||||
try:
|
||||
row_0407 = data[data['日期'] == '2025-04-07'].iloc[0]
|
||||
except IndexError:
|
||||
logging.warning(f"{code}, {code_name} 未找到0407的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
logging.debug(f"{code}, {code_name} 未找到0407的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
row_0407 = defaut_row
|
||||
|
||||
# 获取2021年以来的最高价
|
||||
@ -221,7 +248,7 @@ def calculate_stock_statistics(market, code, code_name):
|
||||
# 获取年内的最高价、最低价
|
||||
year_data = data[data['日期'].str.startswith(current_year)]
|
||||
if year_data.empty:
|
||||
logging.warning(f"{code}, {code_name} 未找到年内的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
logging.debug(f"{code}, {code_name} 未找到年内的数据, 以 {defaut_row['日期']} 的数据来代替")
|
||||
year_min_row = defaut_row
|
||||
year_max_row = defaut_row
|
||||
else:
|
||||
@ -300,27 +327,29 @@ def write_to_csv(results, filename):
|
||||
# 主函数,执行逻辑
|
||||
def main(list, debug):
|
||||
futu_codes = []
|
||||
xueqiu_codes = []
|
||||
index_codes = []
|
||||
|
||||
if list == 'futu':
|
||||
futu_codes = load_futu_all_codes()
|
||||
elif list == 'xueqiu':
|
||||
xueqiu_codes = load_xueqiu_codes()
|
||||
elif list == 'all':
|
||||
futu_codes = load_futu_all_codes()
|
||||
xueqiu_codes = load_xueqiu_codes()
|
||||
index_codes = load_index_codes()
|
||||
else:
|
||||
index_codes = load_index_codes()
|
||||
codes = futu_codes + index_codes
|
||||
codes = futu_codes + index_codes + xueqiu_codes
|
||||
|
||||
all_results = []
|
||||
|
||||
# 获取快照数据,并保存到文件
|
||||
# 获取快照数据
|
||||
snap_data = fetch_snap_all()
|
||||
if snap_data.empty:
|
||||
logging.error(f"fetching snapshot data error!")
|
||||
return
|
||||
file_name = f'{res_dir}/snapshot_em_{current_date}.csv'
|
||||
snap_data.to_csv(file_name, index=False, encoding='utf-8')
|
||||
logging.info(f"市场快照数据已经写入 CSV 文件 {file_name}\n\n")
|
||||
em_code_map = {row['代码']: row['代码前缀'] for _, row in snap_data.iterrows()}
|
||||
|
||||
for item in codes:
|
||||
code = item['code']
|
||||
@ -331,9 +360,13 @@ def main(list, debug):
|
||||
market, clean_code = code.split(".")
|
||||
except ValueError:
|
||||
logging.error(f"wrong format code: {code}")
|
||||
|
||||
if clean_code not in em_code_map:
|
||||
logging.warning(f"wrong stock code {clean_code}, please check.")
|
||||
continue
|
||||
em_code = f"{em_code_map[clean_code]}.{clean_code}"
|
||||
|
||||
logging.info(f"正在处理股票 {market}.{clean_code}, {code_name}...")
|
||||
result = calculate_stock_statistics(market, clean_code, code_name)
|
||||
result = calculate_stock_statistics(market, em_code, code_name)
|
||||
if result:
|
||||
match = snap_data.loc[snap_data['代码'] == clean_code]
|
||||
if not match.empty: # 如果找到了匹配项
|
||||
@ -344,6 +377,9 @@ def main(list, debug):
|
||||
logging.warning(f'{market}.{clean_code} has no snapshot data.')
|
||||
|
||||
all_results.append(result)
|
||||
logging.info(f"get data succ. {market}.{clean_code}, em_code: {em_code}, name: {code_name}...")
|
||||
else:
|
||||
logging.warning(f"get data faild. {market}.{clean_code}, em_code: {em_code}, name: {code_name}")
|
||||
|
||||
if debug:
|
||||
break
|
||||
@ -367,4 +403,6 @@ if __name__ == "__main__":
|
||||
|
||||
# 调用主函数
|
||||
#flush_code_map()
|
||||
#print(load_futu_all_codes())
|
||||
#print(load_xueqiu_codes())
|
||||
main(args.list, args.debug)
|
||||
Reference in New Issue
Block a user