1 #coding:utf8
2 import cPickle
3 import gzip
4 import numpy as np
5 from sklearn.svm import libsvm
6
7
8 class SVM(object):
9 def __init__(self, kernel='rbf', degree=3, gamma='auto',
10 coef0=0.0, tol=1e-3, C=1.0,nu=0., epsilon=0.,shrinking=True, probability=False,
11 cache_size=200, class_weight=None, max_iter=-1):
12 self.kernel = kernel
13 self.degree = degree
14 self.gamma = gamma
15 self.coef0 = coef0
16 self.tol = tol
17 self.C = C
18 self.nu = nu
19 self.epsilon = epsilon
20 self.shrinking = shrinking
21 self.probability = probability
22 self.cache_size = cache_size
23 self.class_weight = class_weight
24 self.max_iter = max_iter
25
26 def fit(self, X, y):
27 X= np.array(X, dtype=np.float64, order='C')
28 cls, y = np.unique(y, return_inverse=True)
29 weight = np.ones(cls.shape[0], dtype=np.float64, order='C')
30 self.class_weight_=weight
31 self.classes_ = cls
32 y= np.asarray(y, dtype=np.float64, order='C')
33 sample_weight = np.asarray([])
34 solver_type =0
35 self._gamma = 1.0 / X.shape[1]
36 kernel = self.kernel
37 seed = np.random.randint(np.iinfo('i').max)
38 self.support_, self.support_vectors_, self.n_support_, \
39 self.dual_coef_, self.intercept_, self.probA_, \
40 self.probB_, self.fit_status_ = libsvm.fit(
41 X, y,
42 svm_type=solver_type, sample_weight=sample_weight,
43 class_weight=self.class_weight_, kernel=kernel, C=self.C,
44 nu=self.nu, probability=self.probability, degree=self.degree,
45 shrinking=self.shrinking, tol=self.tol,
46 cache_size=self.cache_size, coef0=self.coef0,
47 gamma=self._gamma, epsilon=self.epsilon,
48 max_iter=self.max_iter, random_seed=seed)
49 self.shape_fit_ = X.shape
50 self._intercept_ = self.intercept_.copy()
51 self._dual_coef_ = self.dual_coef_
52 self.intercept_ *= -1
53 self.dual_coef_ = -self.dual_coef_
54 return self
55
56 def predict(self, X):
57 X= np.array(X,dtype=np.float64, order='C')
58 svm_type = 0
59 return libsvm.predict(
60 X, self.support_, self.support_vectors_, self.n_support_,
61 self._dual_coef_, self._intercept_,
62 self.probA_, self.probB_, svm_type=svm_type, kernel=self.kernel,
63 degree=self.degree, coef0=self.coef0, gamma=self._gamma,
64 cache_size=self.cache_size)
65
66 def load_data():
67 f = gzip.open('../data/mnist.pkl.gz', 'rb')
68 training_data, validation_data, test_data = cPickle.load(f)
69 f.close()
70 return (training_data, validation_data, test_data)
71
72 def svm_test():
73 training_data, validation_data, test_data = load_data()
74 clf = SVM(kernel='linear') # 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed'
75 clf.fit(training_data[0][:10000], training_data[1][:10000])
76 predictions = [int(a) for a in clf.predict(test_data[0][:10000])]
77 num_correct = sum(int(a == y) for a, y in zip(predictions, test_data[1][:10000]))
78 print "Baseline classifier using an SVM."
79 print "%s of %s values correct." % (num_correct, len(test_data[1][:10000])) # 0.9172 'rbf'=0.9214
80
81 if __name__ == "__main__":
82 svm_test()