Python下载或更新通达信股票财报数据文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# !/usr/bin/python3
# coding=utf-8
# 2023-11-15
# name = 下载更新通达信财务报表数据


from pytdx.hq import TdxHq_API ,TDXParams
from pytdx.crawler.history_financial_crawler import HistoryFinancialListCrawler
from pytdx.crawler.history_financial_crawler import HistoryFinancialCrawler
from pytdx.reader import HistoryFinancialReader
from pytdx.crawler.base_crawler import demo_reporthook
import pandas as pd
from datetime import date, datetime, timedelta
import os,re,sys
from chinese_calendar import is_workday

api = TdxHq_API(multithread=True, heartbeat=True, auto_retry=True)


下载财务数据

已经存在于本地的则不需要再下载

  1. 查看本地财务数据文件列表 如[‘gpcw20201231.zip’]
  2. 将文件名列表转化成日期样式的列表 [‘20201231’,’20210131’]
1
2
3
4
5
# 查看已经下载到本地的财务数据
fd = "d:\\stock_data\\fdata"
filels = [i for i in os.listdir(fd) if re.search(r"\.zip$", i)]
len(filels)

111
1
2
3
4
5
6
# 计算当前最新期的财务数据
tmpdate = pd.to_datetime(datetime.now() - timedelta(days=90))
tmpdate
tras = {1: "0331", 2: "0630", 3: "0930", 4: "1231"}
newreport = f"gpcw{tmpdate.year}{tras.get(tmpdate.quarter)}.zip"
newreport
Timestamp('2023-08-21 20:33:42.882088')


'gpcw20230930.zip'
1
2
3
4
5
6
7
8
9
# 网络查询财务数据文件df
crawler = HistoryFinancialListCrawler()
df_fns = api.to_df(crawler.fetch_and_parse())
# 筛选文件大小和文件名
sift = (df_fns.filesize>2000) & (df_fns.filename.map(lambda x: re.findall(r'\d+',x)[0])>'20000101'
)
df_fns = df_fns.query('@sift')
downfiles = [i for i in df_fns.filename if i not in filels]
downfiles
[]
1
2
3
4
5
6
7
8
9
10
11
12
13
14
if not downfiles:
print("无财报文件需要下载!")
pass
else:
print("有新季报需要下载!")
crawler = HistoryFinancialCrawler()
for name in downfiles:
if os.path.exists(os.path.join(fd,name)):
pass
else:
crawler.fetch_and_parse(reporthook=demo_reporthook,
filename=name,
path_to_download=os.path.join(fd,name)
)
无财报文件需要下载!

生成的updatefnsdata.py文件

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# !/usr/bin/python3
# coding=utf-8
# 2023-11-15

from pytdx.hq import TdxHq_API ,TDXParams
from pytdx.crawler.history_financial_crawler import HistoryFinancialListCrawler
from pytdx.crawler.history_financial_crawler import HistoryFinancialCrawler
from pytdx.reader import HistoryFinancialReader
from pytdx.crawler.base_crawler import demo_reporthook
import pandas as pd
from datetime import date, datetime, timedelta
import os,re,sys
from chinese_calendar import is_workday

api = TdxHq_API(multithread=True, heartbeat=True, auto_retry=True)


def updatefnsdata():
# 查看已经下载到本地的财务数据
fd = "d:\\stock_data\\fdata"
localfilels = [i for i in os.listdir(fd) if re.search(r"\.zip$", i)]
# 网络查询财务数据文件df
crawler = HistoryFinancialListCrawler()
netdf = api.to_df(crawler.fetch_and_parse())
# 筛选文件大小和文件名
sift = (netdf.filesize>2000) & (netdf.filename.map(lambda x: re.findall(r'\d+',x)[0])>'20000101'
)
netfiles = df_fns.query('@sift').filename.tolist()

downfiles = [i for i in netfiles if i not in localfilels]
if not downfiles:
print("无财报文件需要下载!")
pass
else:
print("有新季报需要下载!")
crawler = HistoryFinancialCrawler()
for name in downfiles:
if os.path.exists(os.path.join(fd,name)):
pass
else:
crawler.fetch_and_parse(reporthook=demo_reporthook,
filename=name,
path_to_download=os.path.join(fd,name)
)

if __name__ == '__main__':
updatefnsdata()
无财报文件需要下载!