大数据选基金
#coding=utf8 import json,math,requests import numpy as np import matplotlib.pyplot as plt from scipy.stats import gaussian_kde from scipy.integrate import quad from scipy.stats import norm from datetime import datetime import os import pickle import retry token = "XXXXX" workPath = os.path.abspath(os.path.dirname(__file__)) cachePath = os.path.join(workPath, "data", ".cache") if not os.path.exists(cachePath): os.makedirs(cachePath) netWorthDate = "" # 这天是交易日,数据集是起始时间 startDate = "2010-01-04" # 网络请求失败重试 @retry.retry(tries=2, delay=1) def make_request(url): response = requests.get(url) if response.status_code != 200: raise Exception('Request failed:{} url:{}'.format(response.status_code, url)) return response.text def dayPolicy(cacheFile): return datetime.now().strftime('%Y-%m-%d') == datetime.fromtimestamp(os.path.getmtime(cacheFile)).strftime('%Y-%m-%d') def monthPolicy(cacheFile): return datetime.now().strftime('%Y-%m') == datetime.fromtimestamp(os.path.getmtime(cacheFile)).strftime('%Y-%m') def yearPolicy(cacheFile): return datetime.now().strftime('%Y') == datetime.fromtimestamp(os.path.getmtime(cacheFile)).strftime('%Y') def getData(url): try: con = json.loads(make_request(url)) if con["code"] == 200: return con["data"] except Exception as e: print(repr(e)) return None def cache_req(url, cacheName, cacheFunc): response = None cacheFile = os.path.join(cachePath, cacheName+".pkl") # 判断缓存文件是否存在且日期是否相同 if os.path.exists(cacheFile) and cacheFunc(cacheFile): with open(cacheFile, 'rb') as f: response = pickle.load(f) else: response = getData(url) with open(cacheFile, 'wb') as f: pickle.dump(response, f) return response # 获取应该更新基金的日期, 防止有的基金死掉了 def shouldUpdateDate(): url = "https://api.doctorxiong.club/v1/fund/?token={}&code=007346,000793,481010".format(token) response = cache_req(url, "fundUpdateDate", dayPolicy) assert response, "获取最新日期失败" return max(response[0]["netWorthDate"], response[1]["netWorthDate"], response[2]["netWorthDate"]) # 获取混合型和指数型基金 def getHunheFund(): url = "https://api.doctorxiong.club/v1/fund/all?token={}".format(token) infos = cache_req(url, "fundAll", monthPolicy) funds = [] if infos: # one data: ["000001","HXCZHH","华夏成长混合","混合型-灵活","HUAXIACHENGZHANGHUNHE"] for info in infos: if info[3] == "混合型-偏债": continue if info[3].find("混合型")>=0 or info[3].find("指数型")>=0: funds.append(dict(code=info[0], name=info[2])) return funds # 获取符合条件的基金 def getConditionFund(): cacheFile = os.path.join(cachePath, "filterFund.pkl") if os.path.exists(cacheFile) and yearPolicy(cacheFile): with open(cacheFile, 'rb') as f: return pickle.load(f) return None # 保存符合条件的基金 def saveConditionFund(funds): cacheFile = os.path.join(cachePath, "filterFund.pkl") with open(cacheFile, 'wb') as f: pickle.dump(funds, f) # 获取基金涨幅 def getFundNetWorth(code): cacheFile = os.path.join(cachePath, "fundNetWorth_{}.pkl".format(code)) if os.path.exists(cacheFile) and dayPolicy(cacheFile): with open(cacheFile, 'rb') as f: return pickle.load(f) return None # 保存基金涨幅 def saveFundNetWorth(code, netWorthData): cacheFile = os.path.join(cachePath, "fundNetWorth_{}.pkl".format(code)) with open(cacheFile, 'wb') as f: pickle.dump(netWorthData, f) # 获取基金的涨跌幅 def getFundData(code): try: url = "https://api.doctorxiong.club/v1/fund/detail?token={}&code={}&startDate={}".format(token, code, startDate) data = getData(url) if data: return data["netWorthData"] except Exception as e: print(repr(e)) return None def getRawData(data): # one data: ["2016-11-10","0.988","0.5086",""] result = [] for sub_arr in data: result.append(float(sub_arr[2])) return result def precent(rawData): for i in range(len(rawData)): rawData[i] = round(rawData[i]/100, 6) # rawData[i].to # print(json.dumps(rawData[:10])) def getZFA(a): sum = 1 result = [] for v in a: sum = sum * (1 + v) result.append(sum) return result def zf(a, i, j, weight=1): return round( (a[j] / a[i] - 1) * weight, 4) # data 数据进行评价 # 原理是增函数乘以增函数=增函数, 增*减=减 def reward(data): sum = 0 for i,d in enumerate(data): sum += 0.618*(i+1)*(1+d) return sum # 根据样本数据,拟合出核密度函数,在做概率估计 def calc_prob(data, begin=0): kde = gaussian_kde(data) return quad(kde, begin, 1)[0] def getScore(data, n=12): mydatas = [] # 统计大于5的个数 find5 = 0 # 闭眼交易,赢的概率 # 加上权重, 时间越近,权重越大,大概一年长0.1% l = len(data)-n for i in range(l): c = 0 for j in range(1, n+1): zfTmp = zf(data, i, i+j) # 时间越长,权重越低 if zfTmp > 0.05: c = 1/(5+j) mydatas.append(zfTmp) break find5 += c*i/l minZF = min(mydatas) maxZF = max(mydatas) return { # 大于5%的评价:离当前时间越近权重越高 # 涨幅均值,必须要大于0, 因为 (1+0.05)**2 * (1-0.05) ** 2 = 0.995, 涨幅均值为0,收益还是负数 # calc_prob(mydatas, 0.1) 大于0.1的概率 "score":[find5, sum(mydatas)/len(mydatas)], "min": minZF, "max": maxZF, "len": len(data), } def getFundScore(fund): code = fund["code"] netWorthData = getFundNetWorth(code) or getFundData(code) # print(netWorthData) if not netWorthData: return None, None if netWorthData[0][0] != startDate: # 说明基金在startDate这天,还没有成立 return None, None rawData = getRawData(netWorthData) # 新基金前100条数据不稳定,直接过滤 rawData = rawData[100:] # 去掉%,即除以100 precent(rawData) ZFData = getZFA(rawData) # print('{} 涨幅数据长度 {}'.format(fundCode, len(ZFData))) ret= getScore(ZFData) ret.update(fund) return ret, netWorthData def rank(): funds = None fundsFilter = getConditionFund() if fundsFilter: funds = fundsFilter else: funds = getHunheFund() if not funds: return {} fundsInfo = [] if fundsFilter: print('已经过滤完的基金共: {}'.format(len(funds))) for fund in funds: r, _ = getFundScore(fund) if r: # print (r) fundsInfo.append(r) else: print('正在运行所有基金, 比较慢。总共:{}'.format(len(funds))) saveFunds = [] for fund in funds: r, netWorthData = getFundScore(fund) if r: saveFunds.append(fund) saveFundNetWorth(fund["code"], netWorthData) print (r) fundsInfo.append(r) else: print("discard ", fund["code"]) # 保存一下符合结果的基金,防止下次又重新过滤 saveConditionFund(saveFunds) print('--score0') ranks = sorted(fundsInfo, key=lambda x: x["score"][0], reverse=True) print(json.dumps(ranks[:30], ensure_ascii=False, indent=2)) print('--score1') ranks = sorted(fundsInfo, key=lambda x: x["score"][1], reverse=True) print(json.dumps(ranks[:30], ensure_ascii=False, indent=2)) print('\n\n--all') print(json.dumps(ranks, ensure_ascii=False, indent=2)) # getFundData(dict(name="诺安基金", code="320007")) rank()
输出:
已经过滤完的基金共: 359 --score0 [ { "score": [ 57.93579211803641, 0.06230475708502022 ], "min": 0.0501, "max": 0.1197, "len": 3117, "code": "519005", "name": "海富通股票混合" }, { "score": [ 53.65842204129, 0.0653795895096921 ], "min": 0.0501, "max": 0.1347, "len": 3117, "code": "320007", "name": "诺安成长混合" }, { "score": [ 51.67528125800504, 0.0618306209850108 ], "min": 0.0501, "max": 0.108, "len": 3117, "code": "162201", "name": "泰达宏利成长混合" }, { "score": [ 48.746384734296846, 0.06093847850055129 ], "min": 0.0501, "max": 0.1125, "len": 3117, "code": "610004", "name": "信澳中小盘混合" }, { "score": [ 47.72760735859347, 0.06033130165289256 ], "min": 0.0501, "max": 0.1235, "len": 3117, "code": "379010", "name": "上投摩根中小盘混合A" }, { "score": [ 46.71099743577858, 0.059829243353783285 ], "min": 0.0501, "max": 0.1101, "len": 3117, "code": "519110", "name": "浦银安盛价值成长混合A" }, { "score": [ 46.52613441274919, 0.061173489932885856 ], "min": 0.0501, "max": 0.1013, "len": 3117, "code": "519113", "name": "浦银安盛精致生活混合" }, { "score": [ 46.188826492095906, 0.06045589189189196 ], "min": 0.0501, "max": 0.1031, "len": 3117, "code": "310368", "name": "申万菱信竞争优势混合A" }, { "score": [ 45.62630861781332, 0.05915517621145375 ], "min": 0.0501, "max": 0.1107, "len": 3117, "code": "377020", "name": "上投摩根内需动力混合A" }, { "score": [ 44.98871855678978, 0.06026300366300365 ], "min": 0.0501, "max": 0.1098, "len": 3117, "code": "398021", "name": "中海能源策略混合" }, { "score": [ 44.7016311775171, 0.059827283236994225 ], "min": 0.0501, "max": 0.1027, "len": 3117, "code": "162605", "name": "景顺长城鼎益混合(LOF)" }, { "score": [ 44.14293359556362, 0.05992250879249707 ], "min": 0.0501, "max": 0.1076, "len": 3117, "code": "590003", "name": "中邮核心优势灵活配置混合" }, { "score": [ 44.122255758618316, 0.06100240174672486 ], "min": 0.0501, "max": 0.1137, "len": 3117, "code": "519115", "name": "浦银安盛红利精选混合A" }, { "score": [ 43.87496469979353, 0.06002765957446805 ], "min": 0.0501, "max": 0.1095, "len": 3117, "code": "375010", "name": "上投摩根中国优势混合A" }, { "score": [ 43.77640810848399, 0.0587886178861789 ], "min": 0.0501, "max": 0.109, "len": 3117, "code": "213001", "name": "宝盈鸿利收益灵活配置混合A" }, { "score": [ 43.65363071918554, 0.05966674008810578 ], "min": 0.0501, "max": 0.1151, "len": 3117, "code": "460007", "name": "华泰柏瑞行业领先混合" }, { "score": [ 43.63541768793978, 0.05955972515856231 ], "min": 0.0501, "max": 0.1068, "len": 3117, "code": "519670", "name": "银河行业混合A" }, { "score": [ 43.617768222247484, 0.060576173708920225 ], "min": 0.0501, "max": 0.1274, "len": 3117, "code": "519185", "name": "万家精选混合A" }, { "score": [ 43.351638237674045, 0.06001710037174719 ], "min": 0.0501, "max": 0.108, "len": 3117, "code": "162202", "name": "泰达宏利周期混合" }, { "score": [ 43.24003671935615, 0.05965517621145376 ], "min": 0.0501, "max": 0.1067, "len": 3117, "code": "161810", "name": "银华内需精选混合(LOF)" }, { "score": [ 43.17935731490359, 0.06093830303030296 ], "min": 0.0501, "max": 0.108, "len": 3117, "code": "213002", "name": "宝盈泛沿海增长混合" }, { "score": [ 43.056049381769775, 0.05958938156359397 ], "min": 0.0501, "max": 0.107, "len": 3117, "code": "260108", "name": "景顺长城新兴成长混合A" }, { "score": [ 42.89039086848553, 0.0599118390804597 ], "min": 0.0501, "max": 0.1042, "len": 3117, "code": "210003", "name": "金鹰行业优势混合A" }, { "score": [ 42.8036158970906, 0.059940882002383865 ], "min": 0.0501, "max": 0.1202, "len": 3117, "code": "213003", "name": "宝盈策略增长混合" }, { "score": [ 42.79774808921732, 0.05987403726708082 ], "min": 0.0501, "max": 0.1043, "len": 3117, "code": "460001", "name": "华泰柏瑞盛世中国混合" }, { "score": [ 42.51516852496958, 0.05971209016393446 ], "min": 0.0501, "max": 0.1054, "len": 3117, "code": "270021", "name": "广发聚瑞混合A" }, { "score": [ 42.39922318535345, 0.06156709844559578 ], "min": 0.0501, "max": 0.125, "len": 3117, "code": "161903", "name": "万家行业优选混合(LOF)" }, { "score": [ 42.206412380232365, 0.058928051643192425 ], "min": 0.0501, "max": 0.1003, "len": 3117, "code": "180012", "name": "银华富裕主题混合" }, { "score": [ 41.55985608497045, 0.05932930444697835 ], "min": 0.0501, "max": 0.0997, "len": 3117, "code": "161606", "name": "融通行业景气混合A" }, { "score": [ 41.396908254722995, 0.059625191815856786 ], "min": 0.0501, "max": 0.1127, "len": 3117, "code": "050009", "name": "博时新兴成长混合" } ] --score1 [ { "score": [ 53.65842204129, 0.0653795895096921 ], "min": 0.0501, "max": 0.1347, "len": 3117, "code": "320007", "name": "诺安成长混合" }, { "score": [ 57.93579211803641, 0.06230475708502022 ], "min": 0.0501, "max": 0.1197, "len": 3117, "code": "519005", "name": "海富通股票混合" }, { "score": [ 51.67528125800504, 0.0618306209850108 ], "min": 0.0501, "max": 0.108, "len": 3117, "code": "162201", "name": "泰达宏利成长混合" }, { "score": [ 42.39922318535345, 0.06156709844559578 ], "min": 0.0501, "max": 0.125, "len": 3117, "code": "161903", "name": "万家行业优选混合(LOF)" }, { "score": [ 46.52613441274919, 0.061173489932885856 ], "min": 0.0501, "max": 0.1013, "len": 3117, "code": "519113", "name": "浦银安盛精致生活混合" }, { "score": [ 36.79978550340445, 0.06115256064690032 ], "min": 0.0501, "max": 0.1315, "len": 3117, "code": "398041", "name": "中海量化策略混合" }, { "score": [ 44.122255758618316, 0.06100240174672486 ], "min": 0.0501, "max": 0.1137, "len": 3117, "code": "519115", "name": "浦银安盛红利精选混合A" }, { "score": [ 48.746384734296846, 0.06093847850055129 ], "min": 0.0501, "max": 0.1125, "len": 3117, "code": "610004", "name": "信澳中小盘混合" }, { "score": [ 43.17935731490359, 0.06093830303030296 ], "min": 0.0501, "max": 0.108, "len": 3117, "code": "213002", "name": "宝盈泛沿海增长混合" }, { "score": [ 39.04319151741716, 0.06071529808773897 ], "min": 0.0501, "max": 0.1079, "len": 3117, "code": "580003", "name": "东吴行业轮动混合A" }, { "score": [ 37.25803695912039, 0.06066038961038954 ], "min": 0.0501, "max": 0.1242, "len": 3117, "code": "200010", "name": "长城双动力混合A" }, { "score": [ 30.301782173416864, 0.060636724137931024 ], "min": 0.0501, "max": 0.1032, "len": 3117, "code": "620002", "name": "金元顺安成长动力混合" }, { "score": [ 23.91416319408861, 0.06059614093959732 ], "min": 0.0501, "max": 0.1205, "len": 3117, "code": "510050", "name": "华夏上证50ETF" }, { "score": [ 37.56211833189599, 0.060577583892617505 ], "min": 0.0501, "max": 0.1119, "len": 3117, "code": "610002", "name": "信澳精华配置混合A" }, { "score": [ 43.617768222247484, 0.060576173708920225 ], "min": 0.0501, "max": 0.1274, "len": 3117, "code": "519185", "name": "万家精选混合A" }, { "score": [ 38.30859897880004, 0.060553058676654246 ], "min": 0.0501, "max": 0.1263, "len": 3117, "code": "519698", "name": "交银先锋混合A" }, { "score": [ 31.077754359298808, 0.06048118279569889 ], "min": 0.0501, "max": 0.1223, "len": 3117, "code": "257040", "name": "国联安红利混合" }, { "score": [ 46.188826492095906, 0.06045589189189196 ], "min": 0.0501, "max": 0.1031, "len": 3117, "code": "310368", "name": "申万菱信竞争优势混合A" }, { "score": [ 34.86096133603381, 0.06045020080321285 ], "min": 0.0501, "max": 0.1045, "len": 3117, "code": "217012", "name": "招商行业领先混合A" }, { "score": [ 20.04354926895067, 0.06044651162790694 ], "min": 0.0501, "max": 0.1165, "len": 3117, "code": "510060", "name": "工银上证央企ETF" }, { "score": [ 38.82634303844098, 0.060423647798742076 ], "min": 0.0501, "max": 0.1099, "len": 3117, "code": "162607", "name": "景顺长城资源垄断混合" }, { "score": [ 40.85328962897163, 0.060384291187739476 ], "min": 0.0501, "max": 0.1105, "len": 3117, "code": "160605", "name": "鹏华中国50混合" }, { "score": [ 47.72760735859347, 0.06033130165289256 ], "min": 0.0501, "max": 0.1235, "len": 3117, "code": "379010", "name": "上投摩根中小盘混合A" }, { "score": [ 40.4102072934958, 0.06031480552070259 ], "min": 0.0501, "max": 0.1255, "len": 3117, "code": "519029", "name": "华夏稳增混合" }, { "score": [ 32.08130571009108, 0.06031412429378535 ], "min": 0.0501, "max": 0.1125, "len": 3118, "code": "260111", "name": "景顺长城公司治理混合" }, { "score": [ 21.250976621470368, 0.06030568383658976 ], "min": 0.0501, "max": 0.1186, "len": 3117, "code": "510010", "name": "交银上证180公司治理ETF" }, { "score": [ 34.59265833063929, 0.06029973924380701 ], "min": 0.0501, "max": 0.1208, "len": 3117, "code": "580002", "name": "东吴双动力混合A" }, { "score": [ 21.14246272606389, 0.06026673267326729 ], "min": 0.0501, "max": 0.103, "len": 3117, "code": "350001", "name": "天治财富增长混合" }, { "score": [ 44.98871855678978, 0.06026300366300365 ], "min": 0.0501, "max": 0.1098, "len": 3117, "code": "398021", "name": "中海能源策略混合" }, { "score": [ 25.470707278260562, 0.060261672473867565 ], "min": 0.0501, "max": 0.111, "len": 3117, "code": "350002", "name": "天治低碳经济混合" } ]

浙公网安备 33010602011771号