需要提前导入的库:
numpy
import numpy as np
绘图
import matplotlib.pyplot as plt
数据集
from sklearn import datasets
各种机器学习库:
kNN算法
from sklearn.neighbors import KNeighborsClassifier
线性回归
from sklearn.linear_model import LinearRegression
PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
pca.fit(X_train)
X_train_reduction = pca.transform(X_train)
X_test_reduction = pca.transform(X_test)
逻辑回归
from sklearn.linear_model import LogisticRegression
SVM
from sklearn.svm import LinearSVC
使用多项式核函数的SVM
from sklearn.svm import SVC
SVC(kernel='poly', degree=degree, C=C)
SVM解决回归问题
from sklearn.svm import LinearSVR
OvO
log_reg = LogisticRegression(multi_class='multinomial', solver='newton-cg')
OvR
from sklearn.multiclass import OneVsRestClassifier
ovr = OneVsRestClassifier(log_reg)
决策树
from sklearn.tree import DecisionTreeClassifier
解决回归问题的决策树
from sklearn.tree import DecisionTreeRegressor
集成学习
from sklearn.ensemble import VotingClassifier
Bagging
from sklearn.ensemble import BaggingClassifier
随机森林
from sklearn.ensemble import RandomForestClassifier
数据处理方法:
train_test_split
from sklearn.model_selection import train_test_split
计算准确度
from sklearn.metrics import accuracy_score
网格搜索
from sklearn.model_selection import GridSearchCV
param_grid = [
{ 'weights':['uniform'], 'n_neighbors':[i for i in range(1, 11)]},
{ 'weights':['distance'], 'n_neighbors':[i for i in range(1, 11)],'p':[i for i in range(1, 6)]}
]
knn_clf = KNeighborsClassifier()
grid_search = GridSearchCV(knn_clf, param_grid)
grid_search.fit(X_train, y_train)
均值方差归一化
from sklearn.preprocessing import StandardScaler
standardScaler = StandardScaler()
standardScaler.fit(X_train)
MSE
from sklearn.metrics import mean_squared_error
MAE
from sklearn.metrics import mean_absolute_error
R Square
from sklearn.metrics import r2_score
添加多项式特征
from sklearn.preprocessing import PolynomialFeatures
poly = PolynomialFeatures(degree=2) # 添加二次幂特征
poly.fit(X)
X2 = poly.transform(X)
Pipeline
from sklearn.pipeline import Pipeline
# 传入每一步骤所对应的类 1.多项式的特征 2.数据归一化 3.线性回归
poly_reg = Pipeline([
("poly", PolynomialFeatures(degree=2)),
("std_scaler", StandardScaler()),
("lin_reg", LinearRegression())
])
poly_reg.fit(X, y)
y_predict = poly_reg.predict(X)
交叉验证
from sklearn.model_selection import cross_val_score
岭回归
from sklearn.linear_model import Ridge
LASSO
from sklearn.linear_model import Lasso
绘制决策边界
def plot_decision_boundary(model, axis):
x0, x1 = np.meshgrid(
np.linspace(axis[0], axis[1], int((axis[1]-axis[0])*100)).reshape(-1,1),
np.linspace(axis[2], axis[3], int((axis[3]-axis[2])*100)).reshape(-1,1)
)
X_new = np.c_[x0.ravel(), x1.ravel()]
y_predict = model.predict(X_new)
zz = y_predict.reshape(x0.shape)
from matplotlib.colors import ListedColormap
custom_cmap = ListedColormap(['#EF9A9A', '#FFF59D','#90CAF9'])
plt.contourf(x0, x1, zz, linewidth=5, cmap=custom_cmap)
混淆矩阵
from sklearn.metrics import confusion_matrix
精准率
from sklearn.metrics import precision_score
召回率
from sklearn.metrics import recall_score
F1 Score
from sklearn.metrics import f1_score
ROC
from sklearn.metrics import roc_curve
ROC_SCORE
from sklearn.metrics import roc_auc_score
本文为互联网自动采集或经作者授权后发布,本文观点不代表立场,若侵权下架请联系我们删帖处理!文章出自:https://wangjiawei.blog.csdn.net/article/details/89218305