0%

Python下载或更新通达信股票财报数据文件

发表于 2023-11-19 更新于 2023-12-18 分类于计算机应用， python 阅读次数：

# !/usr/bin/python3
# coding=utf-8
# 2023-11-15
# name = 下载更新通达信财务报表数据


from pytdx.hq import TdxHq_API ,TDXParams
from pytdx.crawler.history_financial_crawler import HistoryFinancialListCrawler
from pytdx.crawler.history_financial_crawler import HistoryFinancialCrawler
from pytdx.reader import HistoryFinancialReader
from pytdx.crawler.base_crawler import demo_reporthook
import pandas as pd
from datetime import date, datetime, timedelta
import os,re,sys
from chinese_calendar import is_workday

api = TdxHq_API(multithread=True, heartbeat=True, auto_retry=True)

下载财务数据

已经存在于本地的则不需要再下载

查看本地财务数据文件列表如[‘gpcw20201231.zip’]
将文件名列表转化成日期样式的列表 [‘20201231’,’20210131’]

# 查看已经下载到本地的财务数据
fd = "d:\\stock_data\\fdata"
filels = [i for i in os.listdir(fd) if re.search(r"\.zip$", i)]
len(filels)

# 计算当前最新期的财务数据
tmpdate = pd.to_datetime(datetime.now() - timedelta(days=90))
tmpdate
tras = {1: "0331", 2: "0630", 3: "0930", 4: "1231"}
newreport = f"gpcw{tmpdate.year}{tras.get(tmpdate.quarter)}.zip"
newreport

Timestamp('2023-08-21 20:33:42.882088')


'gpcw20230930.zip'

# 网络查询财务数据文件df
crawler = HistoryFinancialListCrawler()
df_fns = api.to_df(crawler.fetch_and_parse())
# 筛选文件大小和文件名
sift = (df_fns.filesize>2000) & (df_fns.filename.map(lambda x: re.findall(r'\d+',x)[0])>'20000101'
)
df_fns  = df_fns.query('@sift')
downfiles = [i for i in df_fns.filename if i not in filels]
downfiles

[]

if not downfiles:
    print("无财报文件需要下载！")
    pass
else:
    print("有新季报需要下载！")
    crawler = HistoryFinancialCrawler()
    for name in downfiles:
        if os.path.exists(os.path.join(fd,name)):
            pass
        else:
            crawler.fetch_and_parse(reporthook=demo_reporthook,
                                        filename=name,
                                        path_to_download=os.path.join(fd,name)
                                        )

无财报文件需要下载！

生成的`updatefnsdata.py`文件

# !/usr/bin/python3
# coding=utf-8
# 2023-11-15

from pytdx.hq import TdxHq_API ,TDXParams
from pytdx.crawler.history_financial_crawler import HistoryFinancialListCrawler
from pytdx.crawler.history_financial_crawler import HistoryFinancialCrawler
from pytdx.reader import HistoryFinancialReader
from pytdx.crawler.base_crawler import demo_reporthook
import pandas as pd
from datetime import date, datetime, timedelta
import os,re,sys
from chinese_calendar import is_workday

api = TdxHq_API(multithread=True, heartbeat=True, auto_retry=True)


def updatefnsdata():
    # 查看已经下载到本地的财务数据
    fd = "d:\\stock_data\\fdata"
    localfilels = [i for i in os.listdir(fd) if re.search(r"\.zip$", i)]
    # 网络查询财务数据文件df
    crawler = HistoryFinancialListCrawler()
    netdf = api.to_df(crawler.fetch_and_parse())
    # 筛选文件大小和文件名
    sift = (netdf.filesize>2000) & (netdf.filename.map(lambda x: re.findall(r'\d+',x)[0])>'20000101'
    )
    netfiles  = df_fns.query('@sift').filename.tolist()

    downfiles = [i for i in netfiles if i not in localfilels]
    if not downfiles:
        print("无财报文件需要下载！")
        pass
    else:
        print("有新季报需要下载！")
        crawler = HistoryFinancialCrawler()
        for name in downfiles:
            if os.path.exists(os.path.join(fd,name)):
                pass
            else:
                crawler.fetch_and_parse(reporthook=demo_reporthook,
                                            filename=name,
                                            path_to_download=os.path.join(fd,name)
                                            )
            
if __name__ == '__main__':
    updatefnsdata()

无财报文件需要下载！