1 import numpy as np
2 import matplotlib.pyplot as plt
3
4
5 def is_outlier(points, threshold=3.5):
6 if len(points.shape) == 1:
7 points = points[:, None]
8
9 # Find the median number of points
10 median = np.median(points, axis=0)
11
12 diff = np.sum((points - median)**2, axis=-1)
13 diff = np.sqrt(diff)
14 MAD = np.median(diff)
15
16 MZS = 0.6745 * diff / MAD
17
18 return MZS > threshold
19
20 # Create 100 random numbers
21 x = np.random.random(100)
22
23 # The number of the histogram buckets
24 buckets = 50
25
26 # Add in a few outliers
27 x = np.r_[x, -49, 95, 100, -100]
28
29 # The function 'is_outlier()' return a array of boolean
30 # If True, get the element; else pass the element
31 # For example:
32 # x = [1,2,3,4]
33 # y = x[array([False,True,True,False])]
34 # y is [2,3]
35 filtered = x[~is_outlier(x)]
36
37 # Create a new figure
38 plt.figure()
39
40 # Define the width of the figure
41 plt.subplot(211)
42 # Drawing histogram
43 # histogram(arr,bins,normed,facecolor,edgecolor,alpha,histtype)
44 plt.hist(x, buckets)
45 plt.xlabel('Raw')
46
47 plt.subplot(212)
48 plt.hist(filtered, buckets)
49 plt.xlabel('Cleaned')
50
51 # Show the figure
52 plt.show()