for idx, species in enumerate(dataset.target_names): X, y = dataset.data, dataset.target clf.fit(X, y == idx) rules = clf.rules_[0:3] print('Rules for iris', species) for rule in rules: print(rule) print() print(20*'=') print()
注意:
如果出现如下错误:
解决方案:
关于 Python 导入错误 : cannot import name 'six' from 'sklearn.externals' ,云朵君在Stack Overflow上找到一个类似的问题:https:///questions/61867945/
解决方案如下
import six import sys sys.modules['sklearn.externals.six'] = six import mlrose
亲测有效!
如果使用“score_top_rules”方法,SkopeRules 也可以用作预测器:
from sklearn.datasets import load_boston from sklearn.metrics import precision_recall_curve from matplotlib import pyplot as plt from skrules import SkopeRules
X, y = dataset.data, dataset.target > 25 X_train, y_train = X[:len(y)//2], y[:len(y)//2] X_test, y_test = X[len(y)//2:], y[len(y)//2:] clf.fit(X_train, y_train) y_score = clf.score_top_rules(X_test) # Get a risk score for each test example precision, recall, _ = precision_recall_curve(y_test, y_score) plt.plot(recall, precision) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Precision Recall curve') plt.show()
实战案例
本案例展示了在著名的泰坦尼克号数据集上使用skope-rules。
skope-rules适用情况:
解决二分类问题
提取可解释的决策规则
本案例分为5个部分
导入相关库
数据准备
模型训练(使用ScopeRules().score_top_rules()方法)
解释 '生存规则'(使用SkopeRules().rules_属性)。
性能分析(使用SkopeRules.predict_top_rules()方法)。
导入相关库
# Import skope-rules from skrules import SkopeRules
# Import librairies import pandas as pd from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier from sklearn.model_selection import train_test_split from sklearn.tree import DecisionTreeClassifier import matplotlib.pyplot as plt from sklearn.metrics import roc_curve, precision_recall_curve from matplotlib import cm import numpy as np from sklearn.metrics import confusion_matrix from IPython.display import display
# Import Titanic data data = pd.read_csv('../data/titanic-train.csv')
for i in range(4): print('Rule '+str(i+1)+':') display(compute_train_test_query_performances(X_train, y_train, X_test, y_test, skope_rules_clf.rules_[i][0]) )
print('The performances reached with '+str(n_rule_chosen)+' discovered rules are the following:') compute_performances_from_y_pred(y_test, y_pred, 'test_set')