安装 Scikit-learn
pip install scikit-learn
导入 Scikit-learn
import numpy as npimport matplotlib.pyplot as pltfrom sklearn import datasetsfrom sklearn.model_selection import train_test_splitfrom sklearn.preprocessing import StandardScalerfrom sklearn.linear_model import LinearRegressionfrom sklearn.metrics import mean_squared_error, r2_score
加载数据集
iris = datasets.load_iris()X = iris.datay = iris.target
数据预处理
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)scaler = StandardScaler()X_train_scaled = scaler.fit_transform(X_train)X_test_scaled = scaler.transform(X_test)
选择模型
model = LinearRegression()
训练模型
model.fit(X_train_scaled, y_train)
预测
y_pred = model.predict(X_test_scaled)
评估模型
mse = mean_squared_error(y_test, y_pred)r2 = r2_score(y_test, y_pred)print(f"Mean squared error: {mse}")print(f"Coefficient of determination: {r2}")
可视化
plt.scatter(X_test[:, 0], y_test, color='black')plt.plot(X_test[:, 0], y_pred, color='blue', linewidth=3)plt.show()
模型持久化
from joblib import dump, load# 保存模型dump(model, 'model.joblib')# 加载模型loaded_model = load('model.joblib')
探索更多算法
特征选择和降维
高级主题
超参数调优:使用网格搜索(GridSearchCV)或随机搜索(RandomizedSearchCV)来找到更优的模型参数。
模型评估:使用交叉验证(cross-validation)来更准确地评估模型性能。
管道:构建一个处理/预测流水线,自动化数据预处理、模型训练和预测。