时序数据异常检测相关的问题记录
量化曲线的波动情况,即如何判断曲线是否平滑?
# 求导,统计数据波动的次数,跟阈值作比较
def is_fluctuating(series):
ets = series[-1][0]
raw = [item[1] for item in series if item[0] >= (ets - 86400)]
fluctuation = np.diff(raw) > 0
count = 0
for idx in range(0, len(fluctuation)-1):
if fluctuation[idx] != fluctuation[idx+1]:
count = count + 1
return count/len(raw) > 0.5
# 指数加权移动平均
def ewma(series):
s = pd.Series([ ii[1] for ii in series ])
ewma = pd.Series.ewm(s, ignore_na=False, min_periods=0, adjust=True, com=2).mean()
nums = [round(num, 2) for num in ewma.to_list()]
ewma_series = []
for idx in range(0, len(series)):
ewma_series.append([series[idx][0], nums[idx]])
return ewma_series
如何判断两条曲线是否"胶着"在一起?
# 取指定周期的数据做差分
def check_intersect(timeseries):
period = 3600
# Step01 计算之前需要处理数据缺失的问题,避免出现ValueError: operands could not be broadcast together with shapes
ets = timeseries[-1][0]
raw_today = [ii[1] for ii in timeseries if ets - period < ii[0] <= ets]
raw_yesterday = [ii[1] for ii in timeseries if ets - 86400 - period < ii[0] <= ets - 86400]
raw_week = [ii[1] for ii in timeseries if ets - 86400 * 7 - period < ii[0] <= ets - 86400 * 7]
if len(raw_today) < 2 or len(raw_yesterday) < 2 or len(raw_week) < 2:
return False
granularity = 60 if len(timeseries) > 2305 else 300 # 粒度是1分钟还是5分钟
count = period / granularity
while len(raw_today) < count:
raw_today.append(np.mean(raw_today))
while len(raw_yesterday) < count:
raw_yesterday.append(np.mean(raw_yesterday))
while len(raw_week) < count:
raw_week.append(np.mean(raw_week))
# Step02 计算当前数据和历史数据的偏离程度
arr_today = np.array(raw_today)
arr_yesterday = np.array(raw_yesterday)
arr_week = np.array(raw_week)
t2y = arr_today - arr_yesterday
t2w = arr_today - arr_week
percent2y = t2y / arr_today
percent2w = t2w / arr_today
threshold = 0.1
if np.mean(np.abs(percent2y)) < threshold or np.mean(np.abs(percent2w)) < threshold:
return True
return False
判断曲线是否是周期性曲线,并计算出其周期?
参见:判断时序数据的周期性
作者:Standby — 一生热爱名山大川、草原沙漠,还有我们小郭宝贝!
出处:http://www.cnblogs.com/standby/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。
出处:http://www.cnblogs.com/standby/
本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。

浙公网安备 33010602011771号