6.3 估计概率

决策树同样可以估算某个实例属于特定类k的概率:首先,跟随决策树找到该实例的叶节点,然后返回该节点中类k的训练实例占比。例如,假设你发现一朵花,其花瓣长5cm,宽1.5cm。相应的叶节点为深度2左侧节点,因此决策树输出如下概率:山鸢尾花,0%(0/54);变色鸢尾花,90.7%(49/54);维吉尼亚鸢尾花,9.3%(5/54)。当然,如果你要求它预测类,那么它应该输出变色鸢尾花(类别1),因为它的概率最高。

[1]:
# Python ≥3.5 is required
import sys
assert sys.version_info >= (3, 5)

# Scikit-Learn ≥0.20 is required
import sklearn
assert sklearn.__version__ >= "0.20"

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
CHAPTER_ID = "decision_trees"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", CHAPTER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)
[2]:
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier

iris = load_iris()
X = iris.data[:, 2:] # petal length and width
y = iris.target

tree_clf = DecisionTreeClassifier(max_depth=2, random_state=42)
tree_clf.fit(X, y)
[2]:
DecisionTreeClassifier(max_depth=2, random_state=42)
[3]:
tree_clf.predict_proba([[5, 1.5]])
[3]:
array([[0.        , 0.90740741, 0.09259259]])
[4]:
tree_clf.predict([[5, 1.5]])
[4]:
array([1])
[ ]: