以下代码用于实现线性回归模型对波士顿房价数据集的预测,请补全空缺部分。
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
# 1. 导入数据
data = pd.read_csv('boston_housing.csv')
X = data.drop('medv', axis=1) # 特征矩阵(去除房价列)
y = data['medv'] # 目标变量(房价)
# 2. 数据标准化
scaler = StandardScaler()
X_scaled = scaler.______________(X)
# 3. 划分训练集和测试集(测试集占比30%)
X_train, X_test, y_train, y_test = train_test_split(
X_scaled, y, test_size=0.3, random_state=100
)
# 4. 构建线性回归模型
lr = LinearRegression()
lr.fit(______________, y_train)
# 5. 模型预测与评估
y_pred = lr.predict(______________)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"均方误差(MSE):{mse:.2f}")
print(f"决定系数(R²):{r2:.2f}")
# 6. 输出模型系数
print("模型系数:")
for feature, coef in zip(X.columns, lr.coef_):
print(f"{feature}: {coef:.3f}")
print(f"截距:{lr.intercept_:.3f}")