数据分析-时序序列、调频、补值
# -*- coding:utf-8 -*-
from datetime import datetime
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
ResultData = "[\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:57:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0,\\\"2022-04-28 15:56:30.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,1,\\\"2022-04-28 15:56:00.000\\\"]\",\"[\\\"asset-8nmfb4hb\\\",0,0.3333333,\\\"2022-04-28 15:55:30.000\\\"]\"]"
ResultSchema = "[\"{\\\"Name\\\":\\\"c_deviceid\\\",\\\"Type\\\":\\\"String\\\"}\",\"{\\\"Name\\\":\\\"integer1\\\",\\\"Type\\\":\\\"Int64\\\"}\",\"{\\\"Name\\\":\\\"float1\\\",\\\"Type\\\":\\\"Float32\\\"}\",\"{\\\"Name\\\":\\\"c_time\\\",\\\"Type\\\":\\\"DateTime64(3, 'Asia/Shanghai')\\\"}\"]"
def lists_format_to_json(ResultSchema, ResultData):
# 字符串数据格式化
result_data = json.loads(ResultData)
result_schema = json.loads(ResultSchema)
# print(result_data)
# print(result_schema)
# 取出表结构字段
key_schema = []
for schema in result_schema:
# unicode码点str转bytes流str(python默认处理的对象是bytes流str)
#key_schema.append(schema.encode('utf-8'))
schema = json.loads(schema) # 把字符串直接序列化
key_schema.append(schema['Name'])
#print(key_schema)
# 数据跟表头一一对应
data_schema = []
for data in result_data:
data = json.loads(data)
key_data = {}
for i in range(0, len(key_schema)):
key_data[key_schema[i]] = data[i]
data_schema.append(key_data)
print('data_schema:',data_schema)
return data_schema
def generate_sequence_data(data_schema, c_time, key):
# 生成时间序列数据
index = []
value = []
for data in data_schema:
# 字符串类型str转换为dateTime类型
p = data[c_time]
dateTime_p = datetime.strptime(p, '%Y-%m-%d %H:%M:%S.%f')
#print(dateTime_p)
index.append(dateTime_p)
value.append(data[key])
index.reverse()
#print(index)
value.reverse()
sequence_data = pd.Series(value, index=index)
print('sequence_data:',sequence_data)
return sequence_data
def adjust_time_frequent(sequence_data, frequent):
"""
H hourly frequency
T, min minutely frequency
S secondly frequency
:param sequence_data: 时间序列数
:param frequent: 调频间隔
:return:
"""
frequent_data = sequence_data.asfreq(frequent)
print('frequent_data:',frequent_data)
return frequent_data
def fill_front_data(frequent_data):
front_data = frequent_data.fillna(method="ffill")
print('front_data: ', front_data)
def fill_back_data(frequent_data):
back_data = frequent_data.fillna(method="bfill")
print('back_data: ', back_data)
def draw_picture():
v = np.random.randn(20)
tx = pd.Series(v)
tx.index = pd.date_range('2018-12-01', periods=20, freq="d")
# print "tx", "-" * 20, "\n", tx
rm = tx.rolling(window=5, center=False).mean()
rm.plot()
tx.plot()
plt.show()
if __name__ == '__main__':
data_schema = lists_format_to_json(ResultSchema, ResultData)
c_time = 'c_time'
key = 'float1'
sequence_data = generate_sequence_data(data_schema, c_time, key)
#print('-----------------')
# print(sequence_data['2022-04-28 15:57:00.000'])
frequent = '15S'
frequent_data = adjust_time_frequent(sequence_data, frequent)
fill_front_data(frequent_data)
fill_back_data(frequent_data)
draw_picture()
本文来自博客园,作者:ReluStarry,转载请注明原文链接:https://www.cnblogs.com/relustarry/p/16252520.html

浙公网安备 33010602011771号