5 线性回归

5 线性回归,第1张

from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
# 导入load_boston数据,波士顿房价数据
boston = load_boston()
x = boston['data']
y = boston['target']
names = boston['feature_names']
# 将数据划分为训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,random_state=22)
print('x_train前3行数据为:', x_train[0: 3], '\n','y_train前3个数据为:', y_train[0: 3])
x_train前3行数据为: [[2.24236e+00 0.00000e+00 1.95800e+01 0.00000e+00 6.05000e-01 5.85400e+00
  9.18000e+01 2.42200e+00 5.00000e+00 4.03000e+02 1.47000e+01 3.95110e+02
  1.16400e+01]
 [2.61690e-01 0.00000e+00 9.90000e+00 0.00000e+00 5.44000e-01 6.02300e+00
  9.04000e+01 2.83400e+00 4.00000e+00 3.04000e+02 1.84000e+01 3.96300e+02
  1.17200e+01]
 [6.89900e-02 0.00000e+00 2.56500e+01 0.00000e+00 5.81000e-01 5.87000e+00
  6.97000e+01 2.25770e+00 2.00000e+00 1.88000e+02 1.91000e+01 3.89150e+02
  1.43700e+01]] 
 y_train前3个数据为: [22.7 19.4 22. ]
# 使用LinearRegression类构建线性回归模型
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
# 训练模型
lr_model.fit(x_train, y_train)

print('LinearRegression模型中各特征系数为:\n', lr_model.coef_)
print('LinearRegression模型中截距为:', lr_model.intercept_)

LinearRegression模型中各特征系数为:
 [-1.01199845e-01  4.67962110e-02 -2.06902678e-02  3.58072311e+00
 -1.71288922e+01  3.92207267e+00 -5.67997339e-03 -1.54862273e+00
  2.97156958e-01 -1.00709587e-02 -7.78761318e-01  9.87125185e-03
 -5.25319199e-01]
LinearRegression模型中截距为: 32.428252866991016
print('预测测试集前5个结果为:\n', lr_model.predict(x_test)[: 5])
print('测试集得分为:', lr_model.score(x_test, y_test))

预测测试集前5个结果为:
 [27.99617259 31.37458822 21.16274236 32.97684211 19.85350998]
测试集得分为: 0.7657465943591124
#岭回归
from sklearn.linear_model import Ridge
ridge_model = Ridge()
ridge_model.fit(x_train, y_train)

#print('训练出来的ridge模型为:\n', ridge_model)
#print('迭代次数为:', ridge_model.n_iter_)

print('Ridge模型中各特征系数为:\n', ridge_model.coef_)
print('Ridge模型中截距为:', ridge_model.intercept_)

Ridge模型中各特征系数为:
 [-0.09480494  0.04771602 -0.05491252  3.31822206 -9.58446843  3.96702534
 -0.01214016 -1.44131977  0.27969468 -0.01070112 -0.69650831  0.01024209
 -0.53636964]
Ridge模型中截距为: 27.28471754760631
print('预测测试集前5个结果为:\n', ridge_model.predict(x_test)[: 5])
print('测试集得分为:', ridge_model.score(x_test, y_test))

预测测试集前5个结果为:
 [28.34867714 31.24127881 21.57471674 32.47910016 20.12179682]
测试集得分为: 0.7630850497410888
#Lasso回归
from sklearn.linear_model import Lasso
lasso_model = Lasso(alpha=5)
lasso_model.fit(x_train, y_train)

#print('训练出来的Lasso模型为:\n', lasso_model)
#print('scipy.sparse matrix为:\n', lasso_model.sparse_coef_)

print('Lasso模型中各特征系数为:\n', lasso_model.coef_)
print('Lasso模型中截距为:', lasso_model.intercept_)

Lasso模型中各特征系数为:
 [-0.          0.02579275 -0.          0.          0.          0.
  0.01073689 -0.          0.         -0.00508254 -0.          0.00579759
 -0.74030877]
Lasso模型中截距为: 30.908571385880304
print('预测测试集前5个结果为:\n', lasso_model.predict(x_test)[: 5])
print('测试集得分为:', lasso_model.score(x_test, y_test))
#print('测试集d性网络路径为:\n', lasso_model.path(x_test, y_test))

预测测试集前5个结果为:
 [28.62381554 27.34844922 21.61895603 25.09454176 23.13495752]
测试集得分为: 0.5539853847862071

y_pred1 = lr_model.predict(x_test)
y_pred2 = ridge_model.predict(x_test)
y_pred3 = lasso_model.predict(x_test)

fig = plt.figure(figsize=(10, 8))
ax1 = fig.add_subplot(311)
ax1.set_title("线性回归")
ax1.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax1.plot(range(y_test.shape[0]), y_pred1, label="线性回归",)
ax1.legend()

ax2 = fig.add_subplot(312)
ax2.set_title("岭回归")
ax2.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax2.plot(range(y_test.shape[0]), y_pred2, label="岭回归",)
ax2.legend()

ax3 = fig.add_subplot(313)
ax3.set_title("Lasso回归")
ax3.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax3.plot(range(y_test.shape[0]), y_pred3, label="Lasso回归",)
ax3.legend()

plt.show()

欢迎分享,转载请注明来源:内存溢出

原文地址: http://www.outofmemory.cn/langs/870152.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-13
下一篇 2022-05-13

发表评论

登录后才能评论

评论列表(0条)

保存