from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split
# 导入load_boston数据,波士顿房价数据
boston = load_boston()
x = boston['data']
y = boston['target']
names = boston['feature_names']
# 将数据划分为训练集和测试集
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2,random_state=22)
print('x_train前3行数据为:', x_train[0: 3], '\n','y_train前3个数据为:', y_train[0: 3])
x_train前3行数据为: [[2.24236e+00 0.00000e+00 1.95800e+01 0.00000e+00 6.05000e-01 5.85400e+00 9.18000e+01 2.42200e+00 5.00000e+00 4.03000e+02 1.47000e+01 3.95110e+02 1.16400e+01] [2.61690e-01 0.00000e+00 9.90000e+00 0.00000e+00 5.44000e-01 6.02300e+00 9.04000e+01 2.83400e+00 4.00000e+00 3.04000e+02 1.84000e+01 3.96300e+02 1.17200e+01] [6.89900e-02 0.00000e+00 2.56500e+01 0.00000e+00 5.81000e-01 5.87000e+00 6.97000e+01 2.25770e+00 2.00000e+00 1.88000e+02 1.91000e+01 3.89150e+02 1.43700e+01]] y_train前3个数据为: [22.7 19.4 22. ]
# 使用LinearRegression类构建线性回归模型
from sklearn.linear_model import LinearRegression
lr_model = LinearRegression()
# 训练模型
lr_model.fit(x_train, y_train)
print('LinearRegression模型中各特征系数为:\n', lr_model.coef_)
print('LinearRegression模型中截距为:', lr_model.intercept_)
LinearRegression模型中各特征系数为: [-1.01199845e-01 4.67962110e-02 -2.06902678e-02 3.58072311e+00 -1.71288922e+01 3.92207267e+00 -5.67997339e-03 -1.54862273e+00 2.97156958e-01 -1.00709587e-02 -7.78761318e-01 9.87125185e-03 -5.25319199e-01] LinearRegression模型中截距为: 32.428252866991016
print('预测测试集前5个结果为:\n', lr_model.predict(x_test)[: 5])
print('测试集得分为:', lr_model.score(x_test, y_test))
预测测试集前5个结果为: [27.99617259 31.37458822 21.16274236 32.97684211 19.85350998] 测试集得分为: 0.7657465943591124
#岭回归
from sklearn.linear_model import Ridge
ridge_model = Ridge()
ridge_model.fit(x_train, y_train)
#print('训练出来的ridge模型为:\n', ridge_model)
#print('迭代次数为:', ridge_model.n_iter_)
print('Ridge模型中各特征系数为:\n', ridge_model.coef_)
print('Ridge模型中截距为:', ridge_model.intercept_)
Ridge模型中各特征系数为: [-0.09480494 0.04771602 -0.05491252 3.31822206 -9.58446843 3.96702534 -0.01214016 -1.44131977 0.27969468 -0.01070112 -0.69650831 0.01024209 -0.53636964] Ridge模型中截距为: 27.28471754760631
print('预测测试集前5个结果为:\n', ridge_model.predict(x_test)[: 5])
print('测试集得分为:', ridge_model.score(x_test, y_test))
预测测试集前5个结果为: [28.34867714 31.24127881 21.57471674 32.47910016 20.12179682] 测试集得分为: 0.7630850497410888
#Lasso回归
from sklearn.linear_model import Lasso
lasso_model = Lasso(alpha=5)
lasso_model.fit(x_train, y_train)
#print('训练出来的Lasso模型为:\n', lasso_model)
#print('scipy.sparse matrix为:\n', lasso_model.sparse_coef_)
print('Lasso模型中各特征系数为:\n', lasso_model.coef_)
print('Lasso模型中截距为:', lasso_model.intercept_)
Lasso模型中各特征系数为: [-0. 0.02579275 -0. 0. 0. 0. 0.01073689 -0. 0. -0.00508254 -0. 0.00579759 -0.74030877] Lasso模型中截距为: 30.908571385880304
print('预测测试集前5个结果为:\n', lasso_model.predict(x_test)[: 5])
print('测试集得分为:', lasso_model.score(x_test, y_test))
#print('测试集d性网络路径为:\n', lasso_model.path(x_test, y_test))
预测测试集前5个结果为: [28.62381554 27.34844922 21.61895603 25.09454176 23.13495752] 测试集得分为: 0.5539853847862071
y_pred1 = lr_model.predict(x_test)
y_pred2 = ridge_model.predict(x_test)
y_pred3 = lasso_model.predict(x_test)
fig = plt.figure(figsize=(10, 8))
ax1 = fig.add_subplot(311)
ax1.set_title("线性回归")
ax1.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax1.plot(range(y_test.shape[0]), y_pred1, label="线性回归",)
ax1.legend()
ax2 = fig.add_subplot(312)
ax2.set_title("岭回归")
ax2.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax2.plot(range(y_test.shape[0]), y_pred2, label="岭回归",)
ax2.legend()
ax3 = fig.add_subplot(313)
ax3.set_title("Lasso回归")
ax3.plot(range(y_test.shape[0]), y_test, label="真实值",)
ax3.plot(range(y_test.shape[0]), y_pred3, label="Lasso回归",)
ax3.legend()
plt.show()
欢迎分享,转载请注明来源:内存溢出
评论列表(0条)