大数据选基金

#coding=utf8
import json,math,requests
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from scipy.integrate import quad
from scipy.stats import norm
from datetime import datetime
import os
import pickle
import retry
token = "XXXXX"
workPath = os.path.abspath(os.path.dirname(__file__))
cachePath = os.path.join(workPath, "data", ".cache")
if not os.path.exists(cachePath):
    os.makedirs(cachePath)

netWorthDate = ""

# 这天是交易日,数据集是起始时间
startDate = "2010-01-04"

# 网络请求失败重试
@retry.retry(tries=2, delay=1)
def make_request(url):
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception('Request failed:{} url:{}'.format(response.status_code, url))
    return response.text

def dayPolicy(cacheFile):
    return datetime.now().strftime('%Y-%m-%d') == datetime.fromtimestamp(os.path.getmtime(cacheFile)).strftime('%Y-%m-%d')

def monthPolicy(cacheFile):
    return datetime.now().strftime('%Y-%m') == datetime.fromtimestamp(os.path.getmtime(cacheFile)).strftime('%Y-%m')

def yearPolicy(cacheFile):
    return datetime.now().strftime('%Y') == datetime.fromtimestamp(os.path.getmtime(cacheFile)).strftime('%Y')

def getData(url):
    try:
        con = json.loads(make_request(url))
        if con["code"] == 200:
            return con["data"]
    except Exception as e:
        print(repr(e))
    return None

def cache_req(url, cacheName, cacheFunc):
    response = None

    cacheFile = os.path.join(cachePath, cacheName+".pkl")
    # 判断缓存文件是否存在且日期是否相同
    if os.path.exists(cacheFile) and cacheFunc(cacheFile):
        with open(cacheFile, 'rb') as f:
            response = pickle.load(f)
    else:
        response = getData(url)
        with open(cacheFile, 'wb') as f:
            pickle.dump(response, f)      
    return response

# 获取应该更新基金的日期, 防止有的基金死掉了
def shouldUpdateDate():
    url = "https://api.doctorxiong.club/v1/fund/?token={}&code=007346,000793,481010".format(token)
    response = cache_req(url, "fundUpdateDate", dayPolicy)
    assert response, "获取最新日期失败"
    return max(response[0]["netWorthDate"], response[1]["netWorthDate"], response[2]["netWorthDate"])

# 获取混合型和指数型基金
def getHunheFund():
    url = "https://api.doctorxiong.club/v1/fund/all?token={}".format(token)
    infos = cache_req(url, "fundAll", monthPolicy)
    funds = []
    if infos:
        # one data: ["000001","HXCZHH","华夏成长混合","混合型-灵活","HUAXIACHENGZHANGHUNHE"]
        for info in infos:
            if info[3] == "混合型-偏债":
                continue
            if info[3].find("混合型")>=0 or info[3].find("指数型")>=0:
                funds.append(dict(code=info[0], name=info[2]))
    return funds

# 获取符合条件的基金
def getConditionFund():
    cacheFile = os.path.join(cachePath, "filterFund.pkl")
    if os.path.exists(cacheFile) and yearPolicy(cacheFile):
        with open(cacheFile, 'rb') as f:
            return pickle.load(f)
    return None

# 保存符合条件的基金
def saveConditionFund(funds):
    cacheFile = os.path.join(cachePath, "filterFund.pkl")
    with open(cacheFile, 'wb') as f:
        pickle.dump(funds, f)  

# 获取基金涨幅
def getFundNetWorth(code):
    cacheFile = os.path.join(cachePath, "fundNetWorth_{}.pkl".format(code))
    if os.path.exists(cacheFile) and dayPolicy(cacheFile):
        with open(cacheFile, 'rb') as f:
            return pickle.load(f)
    return None

# 保存基金涨幅
def saveFundNetWorth(code, netWorthData):
    cacheFile = os.path.join(cachePath, "fundNetWorth_{}.pkl".format(code))
    with open(cacheFile, 'wb') as f:
        pickle.dump(netWorthData, f)

#  获取基金的涨跌幅
def getFundData(code):
    try:
        url = "https://api.doctorxiong.club/v1/fund/detail?token={}&code={}&startDate={}".format(token, code, startDate)
        data = getData(url)
        if data:
            return data["netWorthData"]
    except Exception as e:
        print(repr(e))
    return None

def getRawData(data):
    # one data: ["2016-11-10","0.988","0.5086",""]
    result = []
    for sub_arr in data:
        result.append(float(sub_arr[2]))
    return result

def precent(rawData):
    for i in range(len(rawData)):
        rawData[i] = round(rawData[i]/100, 6)
#   rawData[i].to
# print(json.dumps(rawData[:10]))

def getZFA(a):
    sum = 1
    result = []
    for v in a:
        sum = sum * (1 + v)
        result.append(sum)
    return result

def zf(a, i, j, weight=1):
    return round( (a[j] / a[i] - 1) * weight, 4)

# data 数据进行评价
# 原理是增函数乘以增函数=增函数, 增*减=减
def reward(data):
    sum = 0
    for i,d in enumerate(data):
        sum += 0.618*(i+1)*(1+d)
    return sum

# 根据样本数据,拟合出核密度函数,在做概率估计
def calc_prob(data, begin=0):
    kde = gaussian_kde(data)
    return quad(kde, begin, 1)[0]

def getScore(data, n=12):
    mydatas = []
    
    # 统计大于5的个数
    find5 = 0
    # 闭眼交易,赢的概率
    # 加上权重, 时间越近,权重越大,大概一年长0.1%
    l = len(data)-n
    for i in range(l):
        c = 0
        for j in range(1, n+1):
            zfTmp = zf(data, i, i+j)
            # 时间越长,权重越低
            if zfTmp > 0.05:
                c = 1/(5+j)
                mydatas.append(zfTmp)
                break
        find5 += c*i/l
    minZF = min(mydatas)
    maxZF = max(mydatas)
    return {
        # 大于5%的评价:离当前时间越近权重越高
        # 涨幅均值,必须要大于0, 因为 (1+0.05)**2 * (1-0.05) ** 2 = 0.995, 涨幅均值为0,收益还是负数
        # calc_prob(mydatas, 0.1) 大于0.1的概率
        "score":[find5, sum(mydatas)/len(mydatas)],
        "min": minZF,
        "max": maxZF,
        "len": len(data),
    }

def getFundScore(fund):
    code = fund["code"]
    netWorthData = getFundNetWorth(code) or getFundData(code)
    
    # print(netWorthData)
    if not netWorthData:
        return None, None

    if netWorthData[0][0] != startDate:
        # 说明基金在startDate这天,还没有成立
        return None, None
    rawData = getRawData(netWorthData)
    # 新基金前100条数据不稳定,直接过滤
    rawData = rawData[100:]
    # 去掉%,即除以100
    precent(rawData)

    ZFData = getZFA(rawData)
    # print('{} 涨幅数据长度 {}'.format(fundCode, len(ZFData)))
    ret= getScore(ZFData)
    ret.update(fund)
    return ret, netWorthData

def rank():
    funds = None
    fundsFilter = getConditionFund()
    if fundsFilter:
        funds = fundsFilter
    else:
        funds = getHunheFund()
    if not funds:
        return {}
    
    fundsInfo = []
    if fundsFilter:
        print('已经过滤完的基金共: {}'.format(len(funds)))
        for fund in funds:
            r, _ = getFundScore(fund)
            if r:
                # print (r)
                fundsInfo.append(r)
    else:
        print('正在运行所有基金, 比较慢。总共:{}'.format(len(funds)))

        saveFunds = []
        for fund in funds:
            r, netWorthData = getFundScore(fund)
            if r:
                saveFunds.append(fund)
                saveFundNetWorth(fund["code"], netWorthData)
                print (r)
                fundsInfo.append(r)
            else:
                print("discard ", fund["code"])
        # 保存一下符合结果的基金,防止下次又重新过滤
        saveConditionFund(saveFunds)

    print('--score0')
    ranks = sorted(fundsInfo, key=lambda x: x["score"][0], reverse=True)
    print(json.dumps(ranks[:30], ensure_ascii=False, indent=2))

    print('--score1')
    ranks = sorted(fundsInfo, key=lambda x: x["score"][1], reverse=True)
    print(json.dumps(ranks[:30], ensure_ascii=False, indent=2))
    print('\n\n--all')
    print(json.dumps(ranks, ensure_ascii=False, indent=2))

# getFundData(dict(name="诺安基金", code="320007"))

rank()

 

输出:

已经过滤完的基金共: 359
--score0
[
  {
    "score": [
      57.93579211803641,
      0.06230475708502022
    ],
    "min": 0.0501,
    "max": 0.1197,
    "len": 3117,
    "code": "519005",
    "name": "海富通股票混合"
  },
  {
    "score": [
      53.65842204129,
      0.0653795895096921
    ],
    "min": 0.0501,
    "max": 0.1347,
    "len": 3117,
    "code": "320007",
    "name": "诺安成长混合"
  },
  {
    "score": [
      51.67528125800504,
      0.0618306209850108
    ],
    "min": 0.0501,
    "max": 0.108,
    "len": 3117,
    "code": "162201",
    "name": "泰达宏利成长混合"
  },
  {
    "score": [
      48.746384734296846,
      0.06093847850055129
    ],
    "min": 0.0501,
    "max": 0.1125,
    "len": 3117,
    "code": "610004",
    "name": "信澳中小盘混合"
  },
  {
    "score": [
      47.72760735859347,
      0.06033130165289256
    ],
    "min": 0.0501,
    "max": 0.1235,
    "len": 3117,
    "code": "379010",
    "name": "上投摩根中小盘混合A"
  },
  {
    "score": [
      46.71099743577858,
      0.059829243353783285
    ],
    "min": 0.0501,
    "max": 0.1101,
    "len": 3117,
    "code": "519110",
    "name": "浦银安盛价值成长混合A"
  },
  {
    "score": [
      46.52613441274919,
      0.061173489932885856
    ],
    "min": 0.0501,
    "max": 0.1013,
    "len": 3117,
    "code": "519113",
    "name": "浦银安盛精致生活混合"
  },
  {
    "score": [
      46.188826492095906,
      0.06045589189189196
    ],
    "min": 0.0501,
    "max": 0.1031,
    "len": 3117,
    "code": "310368",
    "name": "申万菱信竞争优势混合A"
  },
  {
    "score": [
      45.62630861781332,
      0.05915517621145375
    ],
    "min": 0.0501,
    "max": 0.1107,
    "len": 3117,
    "code": "377020",
    "name": "上投摩根内需动力混合A"
  },
  {
    "score": [
      44.98871855678978,
      0.06026300366300365
    ],
    "min": 0.0501,
    "max": 0.1098,
    "len": 3117,
    "code": "398021",
    "name": "中海能源策略混合"
  },
  {
    "score": [
      44.7016311775171,
      0.059827283236994225
    ],
    "min": 0.0501,
    "max": 0.1027,
    "len": 3117,
    "code": "162605",
    "name": "景顺长城鼎益混合(LOF)"
  },
  {
    "score": [
      44.14293359556362,
      0.05992250879249707
    ],
    "min": 0.0501,
    "max": 0.1076,
    "len": 3117,
    "code": "590003",
    "name": "中邮核心优势灵活配置混合"
  },
  {
    "score": [
      44.122255758618316,
      0.06100240174672486
    ],
    "min": 0.0501,
    "max": 0.1137,
    "len": 3117,
    "code": "519115",
    "name": "浦银安盛红利精选混合A"
  },
  {
    "score": [
      43.87496469979353,
      0.06002765957446805
    ],
    "min": 0.0501,
    "max": 0.1095,
    "len": 3117,
    "code": "375010",
    "name": "上投摩根中国优势混合A"
  },
  {
    "score": [
      43.77640810848399,
      0.0587886178861789
    ],
    "min": 0.0501,
    "max": 0.109,
    "len": 3117,
    "code": "213001",
    "name": "宝盈鸿利收益灵活配置混合A"
  },
  {
    "score": [
      43.65363071918554,
      0.05966674008810578
    ],
    "min": 0.0501,
    "max": 0.1151,
    "len": 3117,
    "code": "460007",
    "name": "华泰柏瑞行业领先混合"
  },
  {
    "score": [
      43.63541768793978,
      0.05955972515856231
    ],
    "min": 0.0501,
    "max": 0.1068,
    "len": 3117,
    "code": "519670",
    "name": "银河行业混合A"
  },
  {
    "score": [
      43.617768222247484,
      0.060576173708920225
    ],
    "min": 0.0501,
    "max": 0.1274,
    "len": 3117,
    "code": "519185",
    "name": "万家精选混合A"
  },
  {
    "score": [
      43.351638237674045,
      0.06001710037174719
    ],
    "min": 0.0501,
    "max": 0.108,
    "len": 3117,
    "code": "162202",
    "name": "泰达宏利周期混合"
  },
  {
    "score": [
      43.24003671935615,
      0.05965517621145376
    ],
    "min": 0.0501,
    "max": 0.1067,
    "len": 3117,
    "code": "161810",
    "name": "银华内需精选混合(LOF)"
  },
  {
    "score": [
      43.17935731490359,
      0.06093830303030296
    ],
    "min": 0.0501,
    "max": 0.108,
    "len": 3117,
    "code": "213002",
    "name": "宝盈泛沿海增长混合"
  },
  {
    "score": [
      43.056049381769775,
      0.05958938156359397
    ],
    "min": 0.0501,
    "max": 0.107,
    "len": 3117,
    "code": "260108",
    "name": "景顺长城新兴成长混合A"
  },
  {
    "score": [
      42.89039086848553,
      0.0599118390804597
    ],
    "min": 0.0501,
    "max": 0.1042,
    "len": 3117,
    "code": "210003",
    "name": "金鹰行业优势混合A"
  },
  {
    "score": [
      42.8036158970906,
      0.059940882002383865
    ],
    "min": 0.0501,
    "max": 0.1202,
    "len": 3117,
    "code": "213003",
    "name": "宝盈策略增长混合"
  },
  {
    "score": [
      42.79774808921732,
      0.05987403726708082
    ],
    "min": 0.0501,
    "max": 0.1043,
    "len": 3117,
    "code": "460001",
    "name": "华泰柏瑞盛世中国混合"
  },
  {
    "score": [
      42.51516852496958,
      0.05971209016393446
    ],
    "min": 0.0501,
    "max": 0.1054,
    "len": 3117,
    "code": "270021",
    "name": "广发聚瑞混合A"
  },
  {
    "score": [
      42.39922318535345,
      0.06156709844559578
    ],
    "min": 0.0501,
    "max": 0.125,
    "len": 3117,
    "code": "161903",
    "name": "万家行业优选混合(LOF)"
  },
  {
    "score": [
      42.206412380232365,
      0.058928051643192425
    ],
    "min": 0.0501,
    "max": 0.1003,
    "len": 3117,
    "code": "180012",
    "name": "银华富裕主题混合"
  },
  {
    "score": [
      41.55985608497045,
      0.05932930444697835
    ],
    "min": 0.0501,
    "max": 0.0997,
    "len": 3117,
    "code": "161606",
    "name": "融通行业景气混合A"
  },
  {
    "score": [
      41.396908254722995,
      0.059625191815856786
    ],
    "min": 0.0501,
    "max": 0.1127,
    "len": 3117,
    "code": "050009",
    "name": "博时新兴成长混合"
  }
]
--score1
[
  {
    "score": [
      53.65842204129,
      0.0653795895096921
    ],
    "min": 0.0501,
    "max": 0.1347,
    "len": 3117,
    "code": "320007",
    "name": "诺安成长混合"
  },
  {
    "score": [
      57.93579211803641,
      0.06230475708502022
    ],
    "min": 0.0501,
    "max": 0.1197,
    "len": 3117,
    "code": "519005",
    "name": "海富通股票混合"
  },
  {
    "score": [
      51.67528125800504,
      0.0618306209850108
    ],
    "min": 0.0501,
    "max": 0.108,
    "len": 3117,
    "code": "162201",
    "name": "泰达宏利成长混合"
  },
  {
    "score": [
      42.39922318535345,
      0.06156709844559578
    ],
    "min": 0.0501,
    "max": 0.125,
    "len": 3117,
    "code": "161903",
    "name": "万家行业优选混合(LOF)"
  },
  {
    "score": [
      46.52613441274919,
      0.061173489932885856
    ],
    "min": 0.0501,
    "max": 0.1013,
    "len": 3117,
    "code": "519113",
    "name": "浦银安盛精致生活混合"
  },
  {
    "score": [
      36.79978550340445,
      0.06115256064690032
    ],
    "min": 0.0501,
    "max": 0.1315,
    "len": 3117,
    "code": "398041",
    "name": "中海量化策略混合"
  },
  {
    "score": [
      44.122255758618316,
      0.06100240174672486
    ],
    "min": 0.0501,
    "max": 0.1137,
    "len": 3117,
    "code": "519115",
    "name": "浦银安盛红利精选混合A"
  },
  {
    "score": [
      48.746384734296846,
      0.06093847850055129
    ],
    "min": 0.0501,
    "max": 0.1125,
    "len": 3117,
    "code": "610004",
    "name": "信澳中小盘混合"
  },
  {
    "score": [
      43.17935731490359,
      0.06093830303030296
    ],
    "min": 0.0501,
    "max": 0.108,
    "len": 3117,
    "code": "213002",
    "name": "宝盈泛沿海增长混合"
  },
  {
    "score": [
      39.04319151741716,
      0.06071529808773897
    ],
    "min": 0.0501,
    "max": 0.1079,
    "len": 3117,
    "code": "580003",
    "name": "东吴行业轮动混合A"
  },
  {
    "score": [
      37.25803695912039,
      0.06066038961038954
    ],
    "min": 0.0501,
    "max": 0.1242,
    "len": 3117,
    "code": "200010",
    "name": "长城双动力混合A"
  },
  {
    "score": [
      30.301782173416864,
      0.060636724137931024
    ],
    "min": 0.0501,
    "max": 0.1032,
    "len": 3117,
    "code": "620002",
    "name": "金元顺安成长动力混合"
  },
  {
    "score": [
      23.91416319408861,
      0.06059614093959732
    ],
    "min": 0.0501,
    "max": 0.1205,
    "len": 3117,
    "code": "510050",
    "name": "华夏上证50ETF"
  },
  {
    "score": [
      37.56211833189599,
      0.060577583892617505
    ],
    "min": 0.0501,
    "max": 0.1119,
    "len": 3117,
    "code": "610002",
    "name": "信澳精华配置混合A"
  },
  {
    "score": [
      43.617768222247484,
      0.060576173708920225
    ],
    "min": 0.0501,
    "max": 0.1274,
    "len": 3117,
    "code": "519185",
    "name": "万家精选混合A"
  },
  {
    "score": [
      38.30859897880004,
      0.060553058676654246
    ],
    "min": 0.0501,
    "max": 0.1263,
    "len": 3117,
    "code": "519698",
    "name": "交银先锋混合A"
  },
  {
    "score": [
      31.077754359298808,
      0.06048118279569889
    ],
    "min": 0.0501,
    "max": 0.1223,
    "len": 3117,
    "code": "257040",
    "name": "国联安红利混合"
  },
  {
    "score": [
      46.188826492095906,
      0.06045589189189196
    ],
    "min": 0.0501,
    "max": 0.1031,
    "len": 3117,
    "code": "310368",
    "name": "申万菱信竞争优势混合A"
  },
  {
    "score": [
      34.86096133603381,
      0.06045020080321285
    ],
    "min": 0.0501,
    "max": 0.1045,
    "len": 3117,
    "code": "217012",
    "name": "招商行业领先混合A"
  },
  {
    "score": [
      20.04354926895067,
      0.06044651162790694
    ],
    "min": 0.0501,
    "max": 0.1165,
    "len": 3117,
    "code": "510060",
    "name": "工银上证央企ETF"
  },
  {
    "score": [
      38.82634303844098,
      0.060423647798742076
    ],
    "min": 0.0501,
    "max": 0.1099,
    "len": 3117,
    "code": "162607",
    "name": "景顺长城资源垄断混合"
  },
  {
    "score": [
      40.85328962897163,
      0.060384291187739476
    ],
    "min": 0.0501,
    "max": 0.1105,
    "len": 3117,
    "code": "160605",
    "name": "鹏华中国50混合"
  },
  {
    "score": [
      47.72760735859347,
      0.06033130165289256
    ],
    "min": 0.0501,
    "max": 0.1235,
    "len": 3117,
    "code": "379010",
    "name": "上投摩根中小盘混合A"
  },
  {
    "score": [
      40.4102072934958,
      0.06031480552070259
    ],
    "min": 0.0501,
    "max": 0.1255,
    "len": 3117,
    "code": "519029",
    "name": "华夏稳增混合"
  },
  {
    "score": [
      32.08130571009108,
      0.06031412429378535
    ],
    "min": 0.0501,
    "max": 0.1125,
    "len": 3118,
    "code": "260111",
    "name": "景顺长城公司治理混合"
  },
  {
    "score": [
      21.250976621470368,
      0.06030568383658976
    ],
    "min": 0.0501,
    "max": 0.1186,
    "len": 3117,
    "code": "510010",
    "name": "交银上证180公司治理ETF"
  },
  {
    "score": [
      34.59265833063929,
      0.06029973924380701
    ],
    "min": 0.0501,
    "max": 0.1208,
    "len": 3117,
    "code": "580002",
    "name": "东吴双动力混合A"
  },
  {
    "score": [
      21.14246272606389,
      0.06026673267326729
    ],
    "min": 0.0501,
    "max": 0.103,
    "len": 3117,
    "code": "350001",
    "name": "天治财富增长混合"
  },
  {
    "score": [
      44.98871855678978,
      0.06026300366300365
    ],
    "min": 0.0501,
    "max": 0.1098,
    "len": 3117,
    "code": "398021",
    "name": "中海能源策略混合"
  },
  {
    "score": [
      25.470707278260562,
      0.060261672473867565
    ],
    "min": 0.0501,
    "max": 0.111,
    "len": 3117,
    "code": "350002",
    "name": "天治低碳经济混合"
  }
]

 

posted @ 2023-03-16 00:40  浪浪辛  阅读(31)  评论(0)    收藏  举报