连续洗浴事件
#10-1
import pandas as pd
import matplotlib.pyplot as plt
inputfile ='original_data.xls'
data = pd.read_excel(inputfile)
lv_non = pd.value_counts(data['有无水流'])['无']
lv_move = pd.value_counts(data['有无水流'])['有']
fig = plt.figure(figsize = (6 ,5))
plt.rcParams['font.sans-serif'] = 'SimHei'
plt.rcParams['axes.unicode_minus'] = False
plt.bar([0,1], height=[lv_non,lv_move], width=0.4, alpha=0.8, color='skyblue')
plt.xticks([index for index in range(2)], ['无','有'])
plt.xlabel('水流状态')
plt.ylabel('记录数')
plt.title('3109不同水流状态记录数')
plt.show()
plt.close()
water = data['水流量']
fig = plt.figure(figsize = (5 ,8))
plt.boxplot(water,
patch_artist=True,
labels = ['水流量'],
boxprops = {'facecolor':'lightblue'})
plt.title('3109水流量分布箱线图')
plt.grid(axis='y')
plt.show()



#10-5
data = pd.read_csv('water_heart.csv')
sj = pd.read_csv('sj.csv')
data["发生时间"] = pd.to_datetime(data["发生时间"],format="%Y%m%d%H%M%S")
timeDel = pd.Timedelta("0.5 sec")
sj["事件开始时间"] = data.iloc[sj["事件起始编号"]-1,0].values- timeDel
sj["事件结束时间"] = data.iloc[sj["事件终止编号"]-1,0].values + timeDel
sj['洗浴时间点'] = [i.hour for i in sj["事件开始时间"]]
sj["总用水时长"] = np.int64(sj["事件结束时间"] - sj["事件开始时间"])/1000000000 + 1
for i in range(len(data)-1):
if (data.loc[i,"水流量"] != 0) & (data.loc[i + 1,"水流量"] == 0) :
data.loc[i + 1,"停顿开始时间"] = data.loc[i +1, "发生时间"] - timeDel
if (data.loc[i,"水流量"] == 0) & (data.loc[i + 1,"水流量"] != 0) :
data.loc[i,"停顿结束时间"] = data.loc[i , "发生时间"] + timeDel
indStopStart = data.index[data["停顿开始时间"].notnull()]+1
indStopEnd = data.index[data["停顿结束时间"].notnull()]+1
Stop = pd.DataFrame(data={"停顿开始编号":indStopStart[:-1],
"停顿结束编号":indStopEnd[1:]})
Stop["停顿时长"] = np.int64(data.loc[indStopEnd[1:]-1,"停顿结束时间"].values-
data.loc[indStopStart[:-1]-1,"停顿开始时间"].values)/1000000000
for i in range(len(sj)):
Stop.loc[(Stop["停顿开始编号"] > sj.loc[i,"事件起始编号"]) &
(Stop["停顿结束编号"] < sj.loc[i,"事件终止编号"]),"停顿归属事件"]=i+1
Stop = Stop[Stop["停顿归属事件"].notnull()]
stopAgg = Stop.groupby("停顿归属事件").agg({"停顿时长":sum,"停顿开始编号":len})
sj.loc[stopAgg.index - 1,"总停顿时长"] = stopAgg.loc[:,"停顿时长"].values
sj.loc[stopAgg.index-1,"停顿次数"] = stopAgg.loc[:,"停顿开始编号"].values
sj.fillna(0,inplace=True)
stopNo0 = sj["停顿次数"] != 0
sj.loc[stopNo0,"平均停顿时长"] = sj.loc[stopNo0,"总停顿时长"]/sj.loc[stopNo0,"停顿次数"]
sj.fillna(0,inplace=True)
sj["用水时长"] = sj["总用水时长"] - sj["总停顿时长"]
sj["用水/总时长"] = sj["用水时长"] / sj["总用水时长"]
print('用水事件用水时长与频率特征构造完成后数据的特征为:\n',sj.columns)
print('用水事件用水时长与频率特征构造完成后数据的前5行5列特征为:\n',
sj.iloc[:5,:5])

#10-6
data["水流量"] =data["水流量"] /60
sj["总用水量"]=0
for i in range(len(sj)):
Start=sj.loc[i,"事件起始编号"]-1
End=sj.loc[i,"事件终止编号"]-1
if Start != End:
for j in range(Start,End):
if data.loc[j,"水流量"] !=0:
sj.loc[i,"总用水量"] = (data.loc[j+1,"发生时间"] -
data.loc[j,"发生时间"]).seconds*\
data.loc[j,"水流量"] + sj.loc[i,"总用水量"]
sj.loc[i,"总用水量"] = sj.loc[i,"总用水量"] + data.loc[End,"水流量"] * 2
else:
sj.loc[i,"总用水量"] = data.loc[Start,"水流量"]* 2
sj["平均水流量"] = sj["总用水量"] / sj["用水时长"]
sj["水流量波动"]=0
for i in range(len(sj)):
Start= sj.loc[i,"事件起始编号"] - 1
End = sj.loc[i,"事件终止编号"] - 1
for j in range(Start,End + 1):
if data.loc[j,"水流量"] != 0:
slbd = (data.loc[j,"水流量"] - sj.loc[i,"平均水流量"])**2
slsj = (data.loc[j+1,"发生时间"] - data.loc[j,"发生时间"]).seconds
sj.loc[i,"水流量波动"] = slbd * slsj + sj.loc[i,"水流量波动"]
sj.loc[i,"水流量波动"] = sj.loc[i,"水流量波动"] / sj.loc[i,"用水时长"]
sj["停顿时长波动"]=0
for i in range(len(sj)):
if sj.loc[i,"停顿次数"] > 1:
for j in Stop.loc[Stop["停顿归属事件"] == (i+1),"停顿时长"].values:
sj.loc[i,"停顿时长波动"] = ((j - sj.loc[i,"平均停顿时长"])**2) *j+ \
sj.loc[i,"停顿时长波动"]
sj.loc[i,"停顿时长波动"] = sj.loc[i,"停顿时长波动"] / sj.loc[i,"总停顿时长"]
print('用水量和波动属性构造完成后数据的属性为:\n',sj.columns)
print('用水量和波动属性构造完成后数据的前5行5列属性为: \n',sj.iloc[:5,:5])


#10-9
from sklearn.metrics import classification_report
from sklearn.metrics import roc_curve
import joblib
import matplotlib.pyplot as plt
bpnn = joblib.load('water_heater_nnet.m') # 加载模型
y_pred = bpnn.predict(x_stdtest) # 返回预测结果
print('神经网络预测结果评价报告:\n',classification_report(y_test,y_pred))
# 绘制roc曲线图
plt.rcParams['font.sans-serif'] = 'SimHei' # 显示中文
plt.rcParams['axes.unicode_minus'] = False # 显示负号
fpr, tpr, thresholds = roc_curve(y_pred,y_test) # 求出TPR和FPR
plt.figure(figsize=(6,4)) # 创建画布
plt.plot(fpr,tpr) # 绘制曲线
plt.title('3109用户用水事件识别ROC曲线') # 标题
plt.xlabel('FPR') # x轴标签
plt.ylabel('TPR') # y轴标签
plt.savefig('./用户用水事件识别ROC曲线.png') # 保存图片
plt.show() # 显示图形


浙公网安备 33010602011771号