机器学习--支持向量机实践

机器学习--支持向量机实践

今天,我们来进行支持向量机的实践。

首先是手动实现线性支持向量机:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
from cvxopt import matrix, solvers
import numpy as np
import matplotlib.pyplot as plt
# 使用正态分布的随机数生成
def split_train_test_data(mean1, mean2, sdt, n):
# 生成正例数据
np.random.seed(529)
x_p1 = np.random.normal(loc=mean1, scale=sdt,size=int(n/2)).reshape(-1,1)
x_p2 = np.random.normal(loc=mean1, scale=sdt,size=int(n/2)).reshape(-1,1)
y_p = np.ones(len(x_p1))[:,np.newaxis]
X_p = np.hstack((x_p1, x_p2))

# 生成负例数据
x_n1 = np.random.normal(loc=mean2, scale=sdt,size=int(n/2)).reshape(-1,1)
x_n2 = np.random.normal(loc=mean2, scale=sdt,size=int(n/2)).reshape(-1,1)
y_n = (np.ones(len(x_n1))*-1)[:,np.newaxis]
X_n = np.hstack((x_n1, x_n2))

# 绘图
fig = plt.figure()
ax = fig.add_subplot(111)
ax.scatter(x_p1, x_p2, color = "#ffb07c", s = 100, label = "1")
ax.scatter(x_n1, x_n2, color = "#c94cbe", s = 100, label = "-1")
plt.legend()
plt.show()

# 整合正负例的数据与类别变量,并划分测试集与训练集
F_train = np.vstack((X_n[:int(n/2*0.8)], X_p[:int(n/2*0.8)]))
y_train = np.vstack((y_n[:int(n/2*0.8)], y_p[:int(n/2*0.8)]))
F_test = np.vstack((X_n[int(n/2*0.8):], X_p[int(n/2*0.8):]))
y_test = np.vstack((y_n[int(n/2*0.8):], y_p[int(n/2*0.8):]))

return F_train, F_test, y_train, y_test

F_train, F_test, y_train, y_test = split_train_test_data(6,1,2,50)
print("训练集规模",F_train.shape)
print("测试集规模",F_test.shape)
print("训练集标签",y_train.shape)
print("测试集标签",y_test.shape)

def train(x, y, C):
# 定义x内积计算
k = []
for i in range(x.shape[0]):
k.append([])
for j in range(x.shape[0]):
k[i].append(np.inner(x[i], x[j]))
k = np.array(k)

#定义y的内积
l = np.inner(y, y)

#定义凸优化pq方法
p = matrix(l * k) #定义目标函数
q = matrix(np.ones(40)*-1)
A = matrix(y.reshape(1,-1)) #定义等式约束
b = matrix(0.)
#定义不等式约束
g = matrix(np.vstack((np.eye(40)*-1, np.eye(40))))
h = matrix(np.vstack((np.zeros(len(y)).reshape(-1,1), np.ones(len(y)).reshape(-1,1)*C)))

#求解函数
solution = solvers.qp(p,q,g,h,A,b)

#获得拉格朗日系数a
a = np.ravel(solution['x'])

#获得最优w与b
w_best = np.sum(a.reshape(-1,1)*y*x, axis = 0)
b_best = 0
for j in range(x.shape[0]):
b_best += y[j] - np.sum(y * a.reshape(-1,1)* np.inner(x, x[j].T).reshape(-1,1))
b_best = b_best/x.shape[0]

return w_best, b_best

w, b = train(F_train,y_train,100)
x = np.linspace(-6, 10 , 50)
y = (-w[0]/w[1]*x - b/w[1]).ravel()
[[plt.scatter(data[0], data[1], color = "#c94cbe", s = 100)]for data in F_train[:20]]
[[plt.scatter(data[0], data[1], color = "#ffb07c", s = 100)]for data in F_train[20:]]
plt.plot(x, y, color="#087804")
def test(w, b, x):
prediction=np.sign(np.dot(x, w)+b)
return prediction

prediction = test(w, b, F_test)
num = 0
for i in range(y_test.shape[0]):
if prediction[i] == y_test[i]:
num += 1
acc = 100*num/(y_test.shape[0])
print("acc = %.2f %%"%acc)
x = np.linspace(-6, 10 , 50)
y = (-w[0]/w[1]*x - b/w[1]).ravel()
y1 = (-w[0]/w[1]*x - (b+1)/w[1]).ravel() #间隔线
y2 = (-w[0]/w[1]*x - (b-1)/w[1]).ravel()
[[plt.scatter(data[0], data[1], color = "#c94cbe", s=100)]for data in F_train[:20]] #训练数据
[[plt.scatter(data[0], data[1], color = "#ffb07c", s=100)]for data in F_train[20:]]
[[plt.scatter(data[0], data[1], color = "#c94cbe", marker="*", s=100)]for data in F_test[:5]] #测试数据
[[plt.scatter(data[0], data[1], color = "#ffb07c", marker="*", s=100)]for data in F_test[5:]]
plt.plot(x, y, color="#087804")
plt.plot(x, y1, color="#048243", ls='--')
plt.plot(x, y2, color="#048243", ls='--')

然后是使用sklearn实现支持向量机:

1
2
3
4
5
6
7
8
from sklearn import svm

model = svm.SVC(kernel='linear', C=1, gamma=1)

model.fit(X, y)
model.score(X, y)

predicted= model.predict(x_test)

其中,优化机器学习算法的参数值,能有效地提高模型的性能:

1
sklearn.svm.SVC(C=1.0, kernel='rbf', degree=3, gamma=0.0, coef0=0.0, shrinking=True, probability=False,tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, random_state=None)

其中对模型性能影响较高的重要参数有“kernel”、“gamma”和“C”。

谢谢大家的观看~