#!/usr/bin/env python
# coding: utf-8
# In[4]:
import sys
from statistics import median
from statistics import stdev
from scipy.stats import kurtosis,skew
import math
import numpy as np
import os
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import csv
from sklearn import svm
from random import randint
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score
from sklearn.metrics import auc
from sklearn import datasets
from joblib import dump, load
# In[42]:
def feature(FOLDER, label):
FALL_SIZE = 1200
df_list = []
sum_df = pd.DataFrame()
#sum_df = df_.fillna(0) # with 0s rather than NaNs
PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/Annotated Data/'
OUTPUT_PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/train_data_trainsform_lstm/'
#FOLDER = 'CSI'
FILE_PATH = PATH + FOLDER
OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv'
count = 0
final = []
for file in os.listdir(FILE_PATH):
#print(file)
df = pd.read_csv(os.path.join(FILE_PATH,file))
df = df[(df['label'] == FOLDER).idxmax():]
df = df.reset_index(drop=True)
#print(df.head())
#print(df.count())
# if not df.empty:
# df_list.append(df)
#print(df.dtypes)
print(file)
df["acc_x"]= df["acc_x"].astype('float64')
df["acc_y"]= df["acc_y"].astype('float64')
df["acc_z"]= df["acc_z"].astype('float64')
#print(df.dtypes)
df['mag'] = df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z']
#mag = math.sqrt(df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z'])
#print(df.head())
OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '/' + file
OUTPUT_FOLDER_PATH = OUTPUT_PATH + FOLDER
if not os.path.exists(OUTPUT_FOLDER_PATH):
os.makedirs(OUTPUT_FOLDER_PATH)
#if(os.path.isdir(OUTPUT_FOLDER_PATH)):
#else:
# os.mkdir(OUTPUT_FOLDER_PATH)
exists = os.path.isfile(OUTPUT_FILE_PATH)
if(exists):
print(OUTPUT_FILE_PATH + " exist , skip...")
else:
df.to_csv(OUTPUT_FILE_PATH,index=False)
#X = [float(df[k][2]) for k in range(1,1+50)]
X = []
Y = []
Z = []
MAG = []
ymag = []
#X_list = df["acc_x"].tolist()
df_count = df.shape[0]
print(df_count)
if(df_count<FALL_SIZE):
FALL_SIZE = df_count
for i in range(0,FALL_SIZE):
#label = data.iloc[i, 0]
X.append(df.iloc[i, 2])
Y.append(df.iloc[i, 3])
Z.append(df.iloc[i, 4])
MAG.append(df.iloc[i, 12])
ymag.append(float(Y[i])/float(math.sqrt(MAG[i])))
#for fast test
TA = [math.asin(ymag[k]) for k in range(0,FALL_SIZE)]
avgX = sum(X)/len(X)
avgY = sum(Y)/len(Y)
avgZ = sum(Z)/len(Z)
medianX = median(X)
medianY = median(Y)
medianZ = median(Z)
stdX = stdev(X)
stdY = stdev(Y)
stdZ = stdev(Z)
skewX = skew(X)
skewY = skew(Y)
skewZ = skew(Z)
kurtosisX = kurtosis(X)
kurtosisY = kurtosis(Y)
kurtosisZ = kurtosis(Z)
minX = min(X)
minY = min(Y)
minZ = min(Z)
maxX = max(X)
maxY = max(Y)
maxZ = max(Z)
slope = math.sqrt((maxX - minX)**2 + (maxY - minY)**2 + (maxZ - minZ)**2)
meanTA = sum(TA)/len(TA)
stdTA = stdev(TA)
skewTA = skew(TA)
kurtosisTA = kurtosis(TA)
absX = sum([abs(X[k] - avgX) for k in range(0,FALL_SIZE) ]) / len(X)
absY = sum([abs(Y[k] - avgY) for k in range(0,FALL_SIZE) ]) / len(Y)
absZ = sum([abs(Z[k] - avgZ) for k in range(0,FALL_SIZE) ]) / len(Z)
abs_meanX = sum([abs(X[k]) for k in range(0,FALL_SIZE)])/len(X)
abs_meanY = sum([abs(Y[k]) for k in range(0,FALL_SIZE)])/len(Y)
abs_meanZ = sum([abs(Z[k]) for k in range(0,FALL_SIZE)])/len(Z)
abs_medianX = median([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_medianY = median([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_medianZ = median([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_stdX = stdev([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_stdY = stdev([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_stdZ = stdev([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_skewX = skew([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_skewY = skew([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_skewZ = skew([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_kurtosisX = kurtosis([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_kurtosisY = kurtosis([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_kurtosisZ = kurtosis([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_minX = min([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_minY = min([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_minZ = min([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_maxX = max([abs(X[k]) for k in range(0,FALL_SIZE)])
abs_maxY = max([abs(Y[k]) for k in range(0,FALL_SIZE)])
abs_maxZ = max([abs(Z[k]) for k in range(0,FALL_SIZE)])
abs_slope = math.sqrt((abs_maxX - abs_minX)**2 + (abs_maxY - abs_minY)**2 + (abs_maxZ - abs_minZ)**2)
meanMag = sum(MAG)/len(MAG)
stdMag = stdev(MAG)
minMag = min(MAG)
maxMag = max(MAG)
DiffMinMaxMag = maxMag - minMag
ZCR_Mag = 0
AvgResAcc = (1/len(MAG))*sum(MAG)
#label = 0
#print(minX)
test = [avgX,avgY,avgZ,medianX,medianY,medianZ,stdX,stdY,stdZ,skewX,skewY,skewZ,kurtosisX,kurtosisY,kurtosisZ,
minX,minY,minZ,maxX,maxY,maxZ,slope,meanTA,stdTA,skewTA,kurtosisTA,absX,
absY,absZ,abs_meanX,abs_meanY,abs_meanZ,abs_medianX,abs_medianY,abs_medianZ,
abs_stdX,abs_stdY,abs_stdZ,abs_skewX,abs_skewY,abs_skewZ,abs_kurtosisX,
abs_kurtosisY,abs_kurtosisZ,abs_minX,abs_minY,abs_minZ,abs_maxX,abs_maxY
,abs_maxZ,abs_slope,meanMag,stdMag,minMag,maxMag,DiffMinMaxMag,ZCR_Mag,AvgResAcc,label]
final.append(test)
#count = count +1
#if(count > 1):
# break
return final
# In[59]:
OUTPUT_PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/featured/'
FOLDER = 'WAL'
label = 0
OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv'
if(os.path.isfile(OUTPUT_FILE_PATH)):
os.remove(OUTPUT_FILE_PATH)
with open(OUTPUT_FILE_PATH,'a') as f1:
writer=csv.writer(f1, delimiter=',',lineterminator='\n',)
writer.writerow(['AvgX','AvgY','AvgZ','MedianX','MedianY','MedianZ','StdX',
'StdY','StdZ','SkewX','SkewY','SkewZ','KurtosisX','KurtosisY','KurtosisZ','MinX','MinY',
'MinZ','MaxX','MaxY','MaxZ','Slope','MeanTA','StdTA','SkewTA','KurtosisTA',
'AbsX','AbsY','AbsZ','AbsMeanX','AbsMeanY','AbsMeanZ','AbsMedianX','AbsMedianY','AbsMedianZ',
'AbsStdX','AbsStdY','AbsStdZ','AbsSkewX','AbsSkewY','AbsSkewZ',
'AbsKurtosisX','AbsKurtosisY','AbsKurtosisZ','AbsMinX','AbsMinY','AbsMinZ',
'AbsMaxX','AbsMaxY','AbsMaxZ','AbsSlope','MeanMag',
'StdMag','MinMag','MaxMag','DiffMinMaxMag','ZCR_Mag','AverageResultantAcceleration','label'])
lala = feature(FOLDER, label)
data_len = len(lala)
for p in range(0,data_len):
writer.writerow(lala[p])
print("total ", data_len," records process done")
# In[2]:
def get_all_data():
PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/featured/'
fs = os.listdir(PATH)
all_data = pd.DataFrame()
count = 0
for f in fs:
file_path = os.path.join(PATH, f)
#print(file_path)
if 'csv' in f:
#data = pd.read_csv(file_path, index_col=False, nrows=200, low_memory=False)
data = pd.read_csv(file_path, index_col=False, low_memory=False)
#data.info()
data = data.iloc[0:,0:59]
#data.info()
#data = data.fillna(method='ffill')
#print(data.dtypes)
#data = data.convert_objects(convert_numeric=True)
#print(data.dtypes)
#break
all_data = all_data.append(data)
#for fast test
#break
#count = count +1
#if(count > 5):
# break
count_row = all_data.shape[0]
#print(count_row)
count_row = all_data.shape[1]
#print(count_row)
np.random.shuffle(all_data.values)
count_row = all_data.shape[1]
#print(count_row)
return all_data
# In[16]:
#train model
all_data = get_all_data()
#print(all_data.head())
_all_data_x = []
_all_data_y = []
count = all_data.shape[0]
count1 = all_data.shape[1]
for i in range(0,count):
_all_data_x.append(all_data.iloc[i, 0:58])
_all_data_y.append(all_data.iloc[i, 58:59])
#print(_all_data_x[0])
#print(_all_data_y[0])
X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.2, random_state=42)
clf = svm.SVC(gamma='scale')
clf.fit(X_train, y_train)
test_count = len(X_test)
print(test_count)
y_predict = clf.predict(X_test)
score = roc_auc_score(y_test, y_predict)
#print(y_predict)
#print("actual result")
#print(y_test)
#for i in range(0, test_count):
# print("actual vs predict", clf.predict(X_test[i]), ":", y_test[i])
print(score)
precision, recall, thresholds = precision_recall_curve(y_test, y_predict)
f1 = f1_score(y_test, y_predict)
auc = auc(recall, precision)
print("precision is ", precision, "recall is ", recall, "thresholds is " , thresholds)
print("f1 is ", f1, "auc is ", auc)
#print(y_predict)
#print(y_test)
print("done...")
#Persistence the model
dump(clf, 'fall_detect_svm.joblib')
#this is how to load the model
#clf_load = load('fall_detect_svm.joblib')
# for i in range(15):
# _test_x = []S
# _test_y = []
# test_index = randint(0, count)
# print(randint(0, 9))
# _test_x.append(all_data.iloc[test_index, 0:58])
# _test_y.append(all_data.iloc[test_index, 58:59])
# print("actual vs predict", clf.predict(_test_x), ":", _test_y[0])
# In[5]:
#load the model to test
all_data = get_all_data()
#print(all_data.head())
_all_data_x = []
_all_data_y = []
count = all_data.shape[0]
count1 = all_data.shape[1]
for i in range(0,count):
_all_data_x.append(all_data.iloc[i, 0:58])
_all_data_y.append(all_data.iloc[i, 58:59])
#print(_all_data_x[0])
#print(_all_data_y[0])
clf_load = load('fall_detect_svm.joblib')
X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.1, random_state=42)
y_predict = clf_load.predict(X_train)
score = roc_auc_score(y_train, y_predict)
print(score)
# In[ ]: