# -*- coding: utf-8 -*-
import pandas as pd
from scipy.interpolate import lagrange
inputfile='F:\\python数据挖掘\\chapter6\\chapter6\\demo\\data\\missing_data.xls'
outputfile='F:\\python数据挖掘\\chapter6\\chapter6\\demo\\tmp\\missing_data_sale.xls'
data=pd.read_excel(inputfile,header=None)
#data[u'销量'][(data[u'销量']<400)|(data[u'销量']>5000)]=None
def ployinterp(s,n,k=5):
y=s[list(range(n-k,n))+list(range(n+1,n+1+k))]
print(y)
y=y[y.notnull()]
return lagrange(y.index,list(y))(n)
for i in data.columns:
for j in range(len(data)):
if(data[i].isnull())[j]:
data[i][j]=ployinterp(data[i],j)
print(data[i][j])
data.to_excel(outputfile,header=None,index=False)