1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
| def updateallfns(): '''更新汇总的财务报表数据''' file = 'D:\\stock_data\\allfns.csv' allfns = pd.read_csv(file,index_col='date',dtype={'code':str},parse_dates=['date']) newdate_allfns = max(allfns.index) fnsfold = r"D:\stock_data\fdata" localfnsfiles = [i for i in os.listdir(fnsfold) if re.search('\.zip$',i)] fnsfilesdates = [re.sub(r'\D','',i) for i in os.listdir(fnsfold) if re.search('\.zip$',i)] newdate_fnsfiles = pd.to_datetime(max(fnsfilesdates)) updatefiles = [i for i in localfnsfiles if re.findall(r'\d+',i)[0] > f"{newdate_allfns:%Y%m%d}"] if not updatefiles: pass else: for filename in updatefiles: zipfile = os.path.join(fnsfold,filename) cols = ['report_date', 'col1', 'col4', 'col6'] single_fns = HistoryFinancialReader().get_df(zipfile).filter(cols) single_fns.reset_index().to_csv(file,mode='a',header=0,index=False,encoding='utf-8-sig') def total_fnsfile(): '''汇总所有的财报数据文件''' fnsfold = r"D:\stock_data\fdata" filenames = [i for i in os.listdir(fnsfold) if re.search(r'\.zip$',i) and (re.findall('\d+',i)[0]>'20131231')] random.choices(filenames,k=3) cols = ['report_date', 'col1', 'col4', 'col6'] total_fns = pd.DataFrame() for filename in filenames: file = os.path.join(fnsfold,filename) single_fns = HistoryFinancialReader().get_df(file).filter(cols) total_fns = pd.concat([total_fns,single_fns]) total_fns.columns = ["date", "EPS", "BPS", "ROE"] total_fns.to_csv(os.path.join("D:\stock_data",'allfns.csv'),encoding='utf-8-sig') if __name__ == '__main__': file = 'D:\\stock_data\\allfns.csv' if os.path.exists(file): updateallfns() else: total_fnsfile()
|