temp数据预处理--以24h为周期的序列
1.按照周期来截取数据
从数据库加载下来的是以5min取一次mean()的列,因此24h应取了24*60/5=288次数据
首先把这8352个数据(最后一个以倒数第二个填充)改成288*30的形式
txt=open('my_data.csv','r')
txt1=open('new_data.csv','w')
temp=[]
for line in txt.readlines():
line=line.strip('\n')
temp.append(line)
j=1
n=288
while n:
s=[]
for i in range(len(temp)):
if (i+1)%288 == j:
s.append(temp[i])
else:
pass
for k in s:
txt1.write(k)
txt1.write(',')
j=j+1
n=n-1
if j==288:
s=[]
txt1.write('\n')
for i in range(len(temp)):
if (i+1)%288 ==0:
s.append(temp[i])
else:
pass
for k in s:
txt1.write(k)
txt1.write(',')
else:
pass
txt1.write('\n')
txt.close()
txt1.close()
2.缺失值和异常处理
https://blog.csdn.net/wangxingfan316/article/details/79363420
import numpy as np import matplotlib.pyplot as plt import pandas as pd from sklearn.preprocessing import Imputer import scipy.fftpack as fftpact def meaninstead(list): #遍历数据进行处理 con_clou = len(list) for i in range(0,cont_clou-1): for j in range(0,29): if(list[i][j]>1000): list[i][j]=(list[i-1][j]+list[i+1][j]) elif(list[i][j]<=0): list[i][j]=(list[i-1][j]+list[i+1][j]) return list dataset = pd.read_csv('new_data.csv') list = np.array(dataset.values.tolist()) list = meaninstead(list) list.to_csv('clean.csv')
3.降噪
https://www.douban.com/note/698037655/?type=like
傅里叶变换降噪失真度的计算
https://max.book118.com/html/2018/1126/8077042115001134.shtm
5.分箱
import numpy as np import pandas as pd dataset = pd.read_csv('june_total.csv') list1= np.array(dataset.values.tolist()) temp = list1[1,0:] string =str(pd.qcut(temp,3)[1]) s = string.strip("(").strip("]").split(",") left = float(s[0]) right=float(s[1]) num=0 count=0 for i in temp: if left<i and right>i: num+=i count+=1 else: continue print(num/count)

浙公网安备 33010602011771号