更新时间: 试题数量: 购买人数: 提供作者:

有效期: 个月

章节介绍: 共有个章节

收藏
搜索
题库预览
决策树可视化与特征重要性 from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier, plot_tree, export_graphviz from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score import matplotlib.pyplot as plt # 1. 加载数据 data = load_breast_cancer() X, y = data.data, data.target feature_names = data.feature_names # 2. 划分数据集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) # 3. 训练决策树 dt = DecisionTreeClassifier(max_depth=3, random_state=42) dt.fit(X_train, y_train) # 4. 可视化方法一:使用plot_tree plt.figure(figsize=(15, 10)) plot_tree(dt, feature_names=feature_names, class_names=['恶性', '良性'], filled=True, rounded=True, fontsize=10) plt.title("决策树结构可视化") plt.show() # 5. 评估模型 y_pred = dt.predict(X_test) print(f"准确率:{accuracy_score(y_test, y_pred):.4f}") print(f"精确率:{precision_score(y_test, y_pred):.4f}") print(f"召回率:{recall_score(y_test, y_pred):.4f}") print(f"F1分数:{f1_score(y_test, y_pred):.4f}") # 6. 特征重要性 importances = dt.feature_importances_ feature_importance_dict = dict(zip(feature_names, importances)) sorted_features = sorted(feature_importance_dict.items(), key=lambda x: x[1], reverse=True) print("\n特征重要性排序(前5):") for feature, importance in sorted_features[:5]: print(f"{feature}:{importance:.4f}") print(f"\n最重要的两个特征是:'{sorted_features[0][0]}' 和 '{sorted_features[1][0]}'")
1