{
 "metadata": {
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.4-final"
  },
  "orig_nbformat": 2,
  "kernelspec": {
   "name": "Python 3.7.4 64-bit ('venv')",
   "display_name": "Python 3.7.4 64-bit ('venv')",
   "metadata": {
    "interpreter": {
     "hash": "e284c72d79b42194b3fe2a0767ff9cca6d233ae03063bab113c99e4bc6bd25a8"
    }
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2,
 "cells": [
  {
   "source": [
    "# 练习 3-3\n",
    "处理Kaggle上的泰坦尼克数据集"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "'./datasets/titanic'"
      ]
     },
     "metadata": {},
     "execution_count": 1
    }
   ],
   "source": [
    "import os\n",
    "TITANTIC_PATH = os.path.join('./datasets', 'titanic')\n",
    "TITANTIC_PATH"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "def load_titantic_data(filename, titantic_path=TITANTIC_PATH):\n",
    "    filepath = os.path.join(titantic_path, filename)\n",
    "    return pd.read_csv(filepath)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "titanic.zip: Skipping, found more recently modified local copy (use --force to force download)\n"
     ]
    }
   ],
   "source": [
    "# pip install kaggle\n",
    "# 使用kaggle提供的api下载数据\n",
    "!kaggle competitions download -c titanic -p datasets"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data = load_titantic_data('train.csv')\n",
    "test_data = load_titantic_data('test.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   PassengerId  Survived  Pclass  \\\n",
       "0            1         0       3   \n",
       "1            2         1       1   \n",
       "2            3         1       3   \n",
       "3            4         1       1   \n",
       "4            5         0       3   \n",
       "\n",
       "                                                Name     Sex   Age  SibSp  \\\n",
       "0                            Braund, Mr. Owen Harris    male  22.0      1   \n",
       "1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   \n",
       "2                             Heikkinen, Miss. Laina  female  26.0      0   \n",
       "3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   \n",
       "4                           Allen, Mr. William Henry    male  35.0      0   \n",
       "\n",
       "   Parch            Ticket     Fare Cabin Embarked  \n",
       "0      0         A/5 21171   7.2500   NaN        S  \n",
       "1      0          PC 17599  71.2833   C85        C  \n",
       "2      0  STON/O2. 3101282   7.9250   NaN        S  \n",
       "3      0            113803  53.1000  C123        S  \n",
       "4      0            373450   8.0500   NaN        S  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>PassengerId</th>\n      <th>Survived</th>\n      <th>Pclass</th>\n      <th>Name</th>\n      <th>Sex</th>\n      <th>Age</th>\n      <th>SibSp</th>\n      <th>Parch</th>\n      <th>Ticket</th>\n      <th>Fare</th>\n      <th>Cabin</th>\n      <th>Embarked</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>1</td>\n      <td>0</td>\n      <td>3</td>\n      <td>Braund, Mr. Owen Harris</td>\n      <td>male</td>\n      <td>22.0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>A/5 21171</td>\n      <td>7.2500</td>\n      <td>NaN</td>\n      <td>S</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>2</td>\n      <td>1</td>\n      <td>1</td>\n      <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n      <td>female</td>\n      <td>38.0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>PC 17599</td>\n      <td>71.2833</td>\n      <td>C85</td>\n      <td>C</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>3</td>\n      <td>1</td>\n      <td>3</td>\n      <td>Heikkinen, Miss. Laina</td>\n      <td>female</td>\n      <td>26.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>STON/O2. 3101282</td>\n      <td>7.9250</td>\n      <td>NaN</td>\n      <td>S</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>4</td>\n      <td>1</td>\n      <td>1</td>\n      <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n      <td>female</td>\n      <td>35.0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>113803</td>\n      <td>53.1000</td>\n      <td>C123</td>\n      <td>S</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>5</td>\n      <td>0</td>\n      <td>3</td>\n      <td>Allen, Mr. William Henry</td>\n      <td>male</td>\n      <td>35.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>373450</td>\n      <td>8.0500</td>\n      <td>NaN</td>\n      <td>S</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 5
    }
   ],
   "source": [
    "train_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "   PassengerId  Pclass                                          Name     Sex  \\\n",
       "0          892       3                              Kelly, Mr. James    male   \n",
       "1          893       3              Wilkes, Mrs. James (Ellen Needs)  female   \n",
       "2          894       2                     Myles, Mr. Thomas Francis    male   \n",
       "3          895       3                              Wirz, Mr. Albert    male   \n",
       "4          896       3  Hirvonen, Mrs. Alexander (Helga E Lindqvist)  female   \n",
       "\n",
       "    Age  SibSp  Parch   Ticket     Fare Cabin Embarked  \n",
       "0  34.5      0      0   330911   7.8292   NaN        Q  \n",
       "1  47.0      1      0   363272   7.0000   NaN        S  \n",
       "2  62.0      0      0   240276   9.6875   NaN        Q  \n",
       "3  27.0      0      0   315154   8.6625   NaN        S  \n",
       "4  22.0      1      1  3101298  12.2875   NaN        S  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>PassengerId</th>\n      <th>Pclass</th>\n      <th>Name</th>\n      <th>Sex</th>\n      <th>Age</th>\n      <th>SibSp</th>\n      <th>Parch</th>\n      <th>Ticket</th>\n      <th>Fare</th>\n      <th>Cabin</th>\n      <th>Embarked</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>892</td>\n      <td>3</td>\n      <td>Kelly, Mr. James</td>\n      <td>male</td>\n      <td>34.5</td>\n      <td>0</td>\n      <td>0</td>\n      <td>330911</td>\n      <td>7.8292</td>\n      <td>NaN</td>\n      <td>Q</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>893</td>\n      <td>3</td>\n      <td>Wilkes, Mrs. James (Ellen Needs)</td>\n      <td>female</td>\n      <td>47.0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>363272</td>\n      <td>7.0000</td>\n      <td>NaN</td>\n      <td>S</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>894</td>\n      <td>2</td>\n      <td>Myles, Mr. Thomas Francis</td>\n      <td>male</td>\n      <td>62.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>240276</td>\n      <td>9.6875</td>\n      <td>NaN</td>\n      <td>Q</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>895</td>\n      <td>3</td>\n      <td>Wirz, Mr. Albert</td>\n      <td>male</td>\n      <td>27.0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>315154</td>\n      <td>8.6625</td>\n      <td>NaN</td>\n      <td>S</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>896</td>\n      <td>3</td>\n      <td>Hirvonen, Mrs. Alexander (Helga E Lindqvist)</td>\n      <td>female</td>\n      <td>22.0</td>\n      <td>1</td>\n      <td>1</td>\n      <td>3101298</td>\n      <td>12.2875</td>\n      <td>NaN</td>\n      <td>S</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 6
    }
   ],
   "source": [
    "test_data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\nRangeIndex: 891 entries, 0 to 890\nData columns (total 12 columns):\n #   Column       Non-Null Count  Dtype  \n---  ------       --------------  -----  \n 0   PassengerId  891 non-null    int64  \n 1   Survived     891 non-null    int64  \n 2   Pclass       891 non-null    int64  \n 3   Name         891 non-null    object \n 4   Sex          891 non-null    object \n 5   Age          714 non-null    float64\n 6   SibSp        891 non-null    int64  \n 7   Parch        891 non-null    int64  \n 8   Ticket       891 non-null    object \n 9   Fare         891 non-null    float64\n 10  Cabin        204 non-null    object \n 11  Embarked     889 non-null    object \ndtypes: float64(2), int64(5), object(5)\nmemory usage: 83.7+ KB\n"
     ]
    }
   ],
   "source": [
    "train_data.info()"
   ]
  },
  {
   "source": [
    "可以看出**Age, Cabin, Embarked**数据是不完全的，特别是的Cabin缺失了74%的数据，没办法只能忽略掉Cabin记录了。Age的缺失的数据可以使用median来代替。\n",
    "\n",
    "而**Name、Ticket**可能有些数字，但是转化为模型可以使用的数字有些棘手，所以也暂时忽略掉相关的记录"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "       PassengerId    Survived      Pclass         Age       SibSp  \\\n",
       "count   891.000000  891.000000  891.000000  714.000000  891.000000   \n",
       "mean    446.000000    0.383838    2.308642   29.699118    0.523008   \n",
       "std     257.353842    0.486592    0.836071   14.526497    1.102743   \n",
       "min       1.000000    0.000000    1.000000    0.420000    0.000000   \n",
       "25%     223.500000    0.000000    2.000000   20.125000    0.000000   \n",
       "50%     446.000000    0.000000    3.000000   28.000000    0.000000   \n",
       "75%     668.500000    1.000000    3.000000   38.000000    1.000000   \n",
       "max     891.000000    1.000000    3.000000   80.000000    8.000000   \n",
       "\n",
       "            Parch        Fare  \n",
       "count  891.000000  891.000000  \n",
       "mean     0.381594   32.204208  \n",
       "std      0.806057   49.693429  \n",
       "min      0.000000    0.000000  \n",
       "25%      0.000000    7.910400  \n",
       "50%      0.000000   14.454200  \n",
       "75%      0.000000   31.000000  \n",
       "max      6.000000  512.329200  "
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>PassengerId</th>\n      <th>Survived</th>\n      <th>Pclass</th>\n      <th>Age</th>\n      <th>SibSp</th>\n      <th>Parch</th>\n      <th>Fare</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>count</th>\n      <td>891.000000</td>\n      <td>891.000000</td>\n      <td>891.000000</td>\n      <td>714.000000</td>\n      <td>891.000000</td>\n      <td>891.000000</td>\n      <td>891.000000</td>\n    </tr>\n    <tr>\n      <th>mean</th>\n      <td>446.000000</td>\n      <td>0.383838</td>\n      <td>2.308642</td>\n      <td>29.699118</td>\n      <td>0.523008</td>\n      <td>0.381594</td>\n      <td>32.204208</td>\n    </tr>\n    <tr>\n      <th>std</th>\n      <td>257.353842</td>\n      <td>0.486592</td>\n      <td>0.836071</td>\n      <td>14.526497</td>\n      <td>1.102743</td>\n      <td>0.806057</td>\n      <td>49.693429</td>\n    </tr>\n    <tr>\n      <th>min</th>\n      <td>1.000000</td>\n      <td>0.000000</td>\n      <td>1.000000</td>\n      <td>0.420000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>25%</th>\n      <td>223.500000</td>\n      <td>0.000000</td>\n      <td>2.000000</td>\n      <td>20.125000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>7.910400</td>\n    </tr>\n    <tr>\n      <th>50%</th>\n      <td>446.000000</td>\n      <td>0.000000</td>\n      <td>3.000000</td>\n      <td>28.000000</td>\n      <td>0.000000</td>\n      <td>0.000000</td>\n      <td>14.454200</td>\n    </tr>\n    <tr>\n      <th>75%</th>\n      <td>668.500000</td>\n      <td>1.000000</td>\n      <td>3.000000</td>\n      <td>38.000000</td>\n      <td>1.000000</td>\n      <td>0.000000</td>\n      <td>31.000000</td>\n    </tr>\n    <tr>\n      <th>max</th>\n      <td>891.000000</td>\n      <td>1.000000</td>\n      <td>3.000000</td>\n      <td>80.000000</td>\n      <td>8.000000</td>\n      <td>6.000000</td>\n      <td>512.329200</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 8
    }
   ],
   "source": [
    "train_data.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0    549\n",
       "1    342\n",
       "Name: Survived, dtype: int64"
      ]
     },
     "metadata": {},
     "execution_count": 9
    }
   ],
   "source": [
    "train_data['Survived'].value_counts()"
   ]
  },
  {
   "source": [
    "- 只有38.34%的人活了下来"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "3    491\n",
       "1    216\n",
       "2    184\n",
       "Name: Pclass, dtype: int64"
      ]
     },
     "metadata": {},
     "execution_count": 10
    }
   ],
   "source": [
    "train_data['Pclass'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "male      577\n",
       "female    314\n",
       "Name: Sex, dtype: int64"
      ]
     },
     "metadata": {},
     "execution_count": 11
    }
   ],
   "source": [
    "train_data['Sex'].value_counts()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "S    644\n",
       "C    168\n",
       "Q     77\n",
       "Name: Embarked, dtype: int64"
      ]
     },
     "metadata": {},
     "execution_count": 12
    }
   ],
   "source": [
    "train_data['Embarked'].value_counts()"
   ]
  },
  {
   "source": [
    "创建一个预处理Pipeline"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.base import BaseEstimator, TransformerMixin\n",
    "\n",
    "class DataFrameSelector(BaseEstimator, TransformerMixin):\n",
    "    def __init__(self, attribute_names):\n",
    "        self.attribute_names = attribute_names\n",
    "\n",
    "    def fit(self, X, y=None):\n",
    "        return self\n",
    "\n",
    "    def transform(self, X):\n",
    "        return X[self.attribute_names]"
   ]
  },
  {
   "source": [
    "创建一个pipeline选出数值属性"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "from sklearn.pipeline import Pipeline\n",
    "try:\n",
    "    from sklearn.impute import SimpleImputer # scikit-learn 0.20+\n",
    "except ImportError:\n",
    "    from sklearn.preprocessing import Imputer as SimpleImputer\n",
    "num_pipeline = Pipeline([\n",
    "    (\"select_numeric\", DataFrameSelector([\"Age\", \"SibSp\", \"Parch\", \"Fare\"])),\n",
    "    (\"imputer\", SimpleImputer(strategy='median'))\n",
    "])"
   ],
   "cell_type": "code",
   "metadata": {},
   "execution_count": 14,
   "outputs": []
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[22.    ,  1.    ,  0.    ,  7.25  ],\n",
       "       [38.    ,  1.    ,  0.    , 71.2833],\n",
       "       [26.    ,  0.    ,  0.    ,  7.925 ],\n",
       "       ...,\n",
       "       [28.    ,  1.    ,  2.    , 23.45  ],\n",
       "       [26.    ,  0.    ,  0.    , 30.    ],\n",
       "       [32.    ,  0.    ,  0.    ,  7.75  ]])"
      ]
     },
     "metadata": {},
     "execution_count": 15
    }
   ],
   "source": [
    "num_pipeline.fit_transform(train_data)"
   ]
  },
  {
   "source": [
    "在0.20以前版本的Scikit-Learn需要使用`LabelBinarizer`或`CategoricalEncoder`才能将分类的值转化为one-hot-vector; 0.20+以上的版本可以直接使用`OneHotEncoder`类"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "PassengerId                              891\n",
       "Survived                                   0\n",
       "Pclass                                     3\n",
       "Name           Allen, Miss. Elisabeth Walton\n",
       "Sex                                     male\n",
       "Age                                       24\n",
       "SibSp                                      0\n",
       "Parch                                      0\n",
       "Ticket                              CA. 2343\n",
       "Fare                                    8.05\n",
       "Cabin                                     G6\n",
       "Embarked                                   S\n",
       "dtype: object"
      ]
     },
     "metadata": {},
     "execution_count": 16
    }
   ],
   "source": [
    "most_ = pd.Series([train_data[c].value_counts().index[0] for c in train_data], index=train_data.columns)\n",
    "most_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "PassengerId\nSurvived\nPclass\nName\nSex\nAge\nSibSp\nParch\nTicket\nFare\nCabin\nEmbarked\n"
     ]
    }
   ],
   "source": [
    "for c in train_data:\n",
    "    print(c)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "'Allen, Miss. Elisabeth Walton'"
      ]
     },
     "metadata": {},
     "execution_count": 18
    }
   ],
   "source": [
    "train_data['Name'].value_counts().index[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "class MostFrequentImputer(BaseEstimator, TransformerMixin):\n",
    "    \"\"\"\n",
    "    计算分类中的值出现的次数最多的的类表是多少\n",
    "    示例：\n",
    "        如tain_data['Set'].value_counts():\n",
    "        male      577\n",
    "        female    314\n",
    "        Name: Sex, dtype: int64\n",
    "\n",
    "        则ain_data['Set'].value_counts().index[0]:male\n",
    "    \"\"\"\n",
    "    def fit(self, X, y=None):\n",
    "        self.most_requent_ = pd.Series([X[c].value_counts().index[0] for c in X], index=X.columns)\n",
    "        return self\n",
    "    \n",
    "    def transform(self, X, y=None):\n",
    "        return X.fillna(self.most_requent_)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import OneHotEncoder\n"
   ]
  },
  {
   "source": [
    "现在创建一个pipeline来处理分类属性"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_pipeline = Pipeline([\n",
    "    ('select_cat', DataFrameSelector(['Pclass', 'Sex', 'Embarked'])),\n",
    "    ('imputer', MostFrequentImputer()),\n",
    "    ('cat_encoder', OneHotEncoder(sparse=False))\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[0., 0., 1., ..., 0., 0., 1.],\n",
       "       [1., 0., 0., ..., 1., 0., 0.],\n",
       "       [0., 0., 1., ..., 0., 0., 1.],\n",
       "       ...,\n",
       "       [0., 0., 1., ..., 0., 0., 1.],\n",
       "       [1., 0., 0., ..., 1., 0., 0.],\n",
       "       [0., 0., 1., ..., 0., 1., 0.]])"
      ]
     },
     "metadata": {},
     "execution_count": 22
    }
   ],
   "source": [
    "cat_pipeline.fit_transform(train_data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_data.head()"
   ]
  },
  {
   "source": [
    "**将数字特征和分类特征结合起来**"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "现在可是使用这个preprocess_pipeline将raw data转化为机器学习模型使用的数据了"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.pipeline import FeatureUnion\n",
    "preprocess_pipeline = FeatureUnion(transformer_list=[\n",
    "    ('num_pipeline', num_pipeline),\n",
    "    ('cat_pipeline', cat_pipeline),\n",
    "])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([[22.,  1.,  0., ...,  0.,  0.,  1.],\n",
       "       [38.,  1.,  0., ...,  1.,  0.,  0.],\n",
       "       [26.,  0.,  0., ...,  0.,  0.,  1.],\n",
       "       ...,\n",
       "       [28.,  1.,  2., ...,  0.,  0.,  1.],\n",
       "       [26.,  0.,  0., ...,  1.,  0.,  0.],\n",
       "       [32.,  0.,  0., ...,  0.,  1.,  0.]])"
      ]
     },
     "metadata": {},
     "execution_count": 25
    }
   ],
   "source": [
    "X_train = preprocess_pipeline.fit_transform(train_data)\n",
    "X_train"
   ]
  },
  {
   "source": [
    "**不要忘了训练的标签数据**"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0      0\n",
       "1      1\n",
       "2      1\n",
       "3      1\n",
       "4      0\n",
       "      ..\n",
       "886    0\n",
       "887    1\n",
       "888    0\n",
       "889    1\n",
       "890    0\n",
       "Name: Survived, Length: 891, dtype: int64"
      ]
     },
     "metadata": {},
     "execution_count": 26
    }
   ],
   "source": [
    "y_train = train_data['Survived']\n",
    "y_train"
   ]
  },
  {
   "source": [
    "### 首先使用SVC模型测试一下"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "SVC(gamma='auto')"
      ]
     },
     "metadata": {},
     "execution_count": 27
    }
   ],
   "source": [
    "from sklearn.svm import SVC\n",
    "svm_clf = SVC(gamma='auto')\n",
    "svm_clf.fit(X_train, y_train)"
   ]
  },
  {
   "source": [
    "使用训练好的SVC模型进行预测"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1,\n",
       "       1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1,\n",
       "       1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,\n",
       "       1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 1,\n",
       "       1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,\n",
       "       0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1,\n",
       "       0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1,\n",
       "       0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,\n",
       "       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0,\n",
       "       1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1,\n",
       "       0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0,\n",
       "       0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1,\n",
       "       1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0,\n",
       "       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0,\n",
       "       0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1,\n",
       "       1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1,\n",
       "       0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0])"
      ]
     },
     "metadata": {},
     "execution_count": 28
    }
   ],
   "source": [
    "X_test = preprocess_pipeline.fit_transform(test_data)\n",
    "y_pred = svm_clf.predict(X_test)\n",
    "y_pred"
   ]
  },
  {
   "source": [
    "此时我们可是使用SVC预测的结果按照Kaggle要求的格式构建号CSV文件，上传Kaggle看我们的得分，不过在此之前我们可是使用交叉验证的方法来看看我们的模型表现如何"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([0.66666667, 0.66292135, 0.71910112, 0.74157303, 0.76404494,\n",
       "       0.71910112, 0.7752809 , 0.73033708, 0.74157303, 0.80898876])"
      ]
     },
     "metadata": {},
     "execution_count": 29
    }
   ],
   "source": [
    "from sklearn.model_selection import cross_val_score\n",
    "svm_scores = cross_val_score(svm_clf, X_train, y_train, cv=10)\n",
    "svm_scores"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0.7329588014981274"
      ]
     },
     "metadata": {},
     "execution_count": 30
    }
   ],
   "source": [
    "svm_scores.mean()"
   ]
  },
  {
   "source": [
    "也就是说模型的accuracy只有73.30%，虽然明显要比随你乱猜要好，但是仍然不是一个好的得分。从kaggle的[leaderboard](https://www.kaggle.com/c/titanic/leaderboard)可以看到前面排名几乎都达到了100%， 不过由于可以下载的到[测试集](https://www.encyclopedia-titanica.org/titanic-victims/)，100%的成绩中有比较大的水分，我们无需理会这些"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "### 试试RandomForestClassifier"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "array([0.74444444, 0.79775281, 0.75280899, 0.80898876, 0.88764045,\n",
       "       0.83146067, 0.83146067, 0.7752809 , 0.85393258, 0.84269663])"
      ]
     },
     "metadata": {},
     "execution_count": 31
    }
   ],
   "source": [
    "from sklearn.ensemble import RandomForestClassifier\n",
    "forest_clf = RandomForestClassifier(n_estimators=100, random_state=42)\n",
    "forest_scores = cross_val_score(forest_clf, X_train, y_train, cv=10)\n",
    "forest_scores\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0.8126466916354558"
      ]
     },
     "metadata": {},
     "execution_count": 32
    }
   ],
   "source": [
    "forest_scores.mean()"
   ]
  },
  {
   "source": [
    "可以看到RandomForestClassifier明显好了一下"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "source": [
    "### 试试AdaBoostClassifier"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0.812621722846442"
      ]
     },
     "metadata": {},
     "execution_count": 33
    }
   ],
   "source": [
    "from sklearn.ensemble import AdaBoostClassifier\n",
    "boost_clf = AdaBoostClassifier(n_estimators=100, random_state=42)\n",
    "boost_scores = cross_val_score(boost_clf, X_train, y_train, cv=10)\n",
    "boost_scores.mean()"
   ]
  },
  {
   "source": [
    "### 试试KNeighborsClassifier"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0.7172659176029963"
      ]
     },
     "metadata": {},
     "execution_count": 34
    }
   ],
   "source": [
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "knn_clf = KNeighborsClassifier()\n",
    "knn_scores = cross_val_score(knn_clf, X_train, y_train, cv=10)\n",
    "knn_scores.mean()"
   ]
  },
  {
   "source": [
    "可以看到`RandomForestClassifier`表现的还是不错的，我们来找一下随机森林的最佳参数"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import GridSearchCV\n",
    "params_grid = [{\n",
    "    'n_estimators':[10,20,30, 40, 50],\n",
    "    'criterion':['gini','entropy'],\n",
    "    'max_features':['sqrt','log2']}]\n",
    "grid_search = GridSearchCV(forest_clf, params_grid, cv=5, verbose=3, n_jobs=-1)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "output_type": "stream",
     "name": "stdout",
     "text": [
      "Fitting 5 folds for each of 20 candidates, totalling 100 fits\n",
      "[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.\n",
      "[Parallel(n_jobs=-1)]: Done  16 tasks      | elapsed:    3.6s\n",
      "[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:    5.8s finished\n"
     ]
    },
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "GridSearchCV(cv=5, estimator=RandomForestClassifier(random_state=42), n_jobs=-1,\n",
       "             param_grid=[{'criterion': ['gini', 'entropy'],\n",
       "                          'max_features': ['sqrt', 'log2'],\n",
       "                          'n_estimators': [10, 20, 30, 40, 50]}],\n",
       "             verbose=3)"
      ]
     },
     "metadata": {},
     "execution_count": 36
    }
   ],
   "source": [
    "grid_search.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "{'criterion': 'entropy', 'max_features': 'sqrt', 'n_estimators': 50}"
      ]
     },
     "metadata": {},
     "execution_count": 37
    }
   ],
   "source": [
    "grid_search.best_params_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "0.8182397003745316"
      ]
     },
     "metadata": {},
     "execution_count": 39
    }
   ],
   "source": [
    "forest_clf = RandomForestClassifier(**grid_search.best_params_)\n",
    "forest_scores = cross_val_score(forest_clf, X_train, y_train, cv=10)\n",
    "forest_scores.mean()"
   ]
  },
  {
   "source": [
    "### 实时BaggingClassifier"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "(0.8272034956304619, 0.7498901958778095)"
      ]
     },
     "metadata": {},
     "execution_count": 46
    }
   ],
   "source": [
    "from sklearn.ensemble import BaggingClassifier\n",
    "bagging_clf = BaggingClassifier(forest_clf)\n",
    "bagging_clf_acc_sorces = cross_val_score(bagging_clf, X_train, y_train, cv=10, scoring='accuracy')\n",
    "bagging_clf_f1_sorces = cross_val_score(bagging_clf, X_train, y_train, cv=10, scoring='f1')\n",
    "bagging_clf_acc_sorces.mean(), bagging_clf_f1_sorces.mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {},
   "outputs": [],
   "source": [
    "bagging_grid_params=[{\n",
    "    'base_estimator':[svm_clf, forest_clf],\n",
    "    'n_estimators':[10,20,30, 40, 50],\n",
    "    'max_samples':[0.1, 0.3, 0.5, 0.8, 1.0],\n",
    "    'max_features':[0.1, 0.3, 0.5, 0.8, 1.0]\n",
    "}]\n",
    "bagging_grid_search = GridSearchCV(bagging_clf, bagging_grid_params, cv=5, verbose=3, n_jobs=-1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "bagging_grid_search.fit(X_train, y_train)"
   ]
  },
  {
   "source": [
    "这里我们不关注每个模型的10折叠的平均分，而是看一下每个模型的每次折叠的箱线图"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "%matplotlib inline\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "output_type": "display_data",
     "data": {
      "text/plain": "<Figure size 576x288 with 1 Axes>",
      "image/svg+xml": "<?xml version=\"1.0\" encoding=\"utf-8\" standalone=\"no\"?>\n<!DOCTYPE svg PUBLIC \"-//W3C//DTD SVG 1.1//EN\"\n  \"http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd\">\n<!-- Created with matplotlib (https://matplotlib.org/) -->\n<svg height=\"248.518125pt\" version=\"1.1\" viewBox=\"0 0 507.615 248.518125\" width=\"507.615pt\" xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\">\n <metadata>\n  <rdf:RDF xmlns:cc=\"http://creativecommons.org/ns#\" xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n   <cc:Work>\n    <dc:type rdf:resource=\"http://purl.org/dc/dcmitype/StillImage\"/>\n    <dc:date>2020-10-11T22:43:16.300560</dc:date>\n    <dc:format>image/svg+xml</dc:format>\n    <dc:creator>\n     <cc:Agent>\n      <dc:title>Matplotlib v3.3.2, https://matplotlib.org/</dc:title>\n     </cc:Agent>\n    </dc:creator>\n   </cc:Work>\n  </rdf:RDF>\n </metadata>\n <defs>\n  <style type=\"text/css\">*{stroke-linecap:butt;stroke-linejoin:round;}</style>\n </defs>\n <g id=\"figure_1\">\n  <g id=\"patch_1\">\n   <path d=\"M 0 248.518125 \nL 507.615 248.518125 \nL 507.615 0 \nL 0 0 \nz\n\" style=\"fill:none;\"/>\n  </g>\n  <g id=\"axes_1\">\n   <g id=\"patch_2\">\n    <path d=\"M 54.015 224.64 \nL 500.415 224.64 \nL 500.415 7.2 \nL 54.015 7.2 \nz\n\" style=\"fill:#ffffff;\"/>\n   </g>\n   <g id=\"matplotlib.axis_1\">\n    <g id=\"xtick_1\">\n     <g id=\"line2d_1\">\n      <defs>\n       <path d=\"M 0 0 \nL 0 3.5 \n\" id=\"m1c7e321281\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"165.615\" xlink:href=\"#m1c7e321281\" y=\"224.64\"/>\n      </g>\n     </g>\n     <g id=\"text_1\">\n      <!-- SVM -->\n      <g transform=\"translate(154.706406 239.238437)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 53.515625 70.515625 \nL 53.515625 60.890625 \nQ 47.90625 63.578125 42.921875 64.890625 \nQ 37.9375 66.21875 33.296875 66.21875 \nQ 25.25 66.21875 20.875 63.09375 \nQ 16.5 59.96875 16.5 54.203125 \nQ 16.5 49.359375 19.40625 46.890625 \nQ 22.3125 44.4375 30.421875 42.921875 \nL 36.375 41.703125 \nQ 47.40625 39.59375 52.65625 34.296875 \nQ 57.90625 29 57.90625 20.125 \nQ 57.90625 9.515625 50.796875 4.046875 \nQ 43.703125 -1.421875 29.984375 -1.421875 \nQ 24.8125 -1.421875 18.96875 -0.25 \nQ 13.140625 0.921875 6.890625 3.21875 \nL 6.890625 13.375 \nQ 12.890625 10.015625 18.65625 8.296875 \nQ 24.421875 6.59375 29.984375 6.59375 \nQ 38.421875 6.59375 43.015625 9.90625 \nQ 47.609375 13.234375 47.609375 19.390625 \nQ 47.609375 24.75 44.3125 27.78125 \nQ 41.015625 30.8125 33.5 32.328125 \nL 27.484375 33.5 \nQ 16.453125 35.6875 11.515625 40.375 \nQ 6.59375 45.0625 6.59375 53.421875 \nQ 6.59375 63.09375 13.40625 68.65625 \nQ 20.21875 74.21875 32.171875 74.21875 \nQ 37.3125 74.21875 42.625 73.28125 \nQ 47.953125 72.359375 53.515625 70.515625 \nz\n\" id=\"DejaVuSans-83\"/>\n        <path d=\"M 28.609375 0 \nL 0.78125 72.90625 \nL 11.078125 72.90625 \nL 34.1875 11.53125 \nL 57.328125 72.90625 \nL 67.578125 72.90625 \nL 39.796875 0 \nz\n\" id=\"DejaVuSans-86\"/>\n        <path d=\"M 9.8125 72.90625 \nL 24.515625 72.90625 \nL 43.109375 23.296875 \nL 61.8125 72.90625 \nL 76.515625 72.90625 \nL 76.515625 0 \nL 66.890625 0 \nL 66.890625 64.015625 \nL 48.09375 14.015625 \nL 38.1875 14.015625 \nL 19.390625 64.015625 \nL 19.390625 0 \nL 9.8125 0 \nz\n\" id=\"DejaVuSans-77\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-83\"/>\n       <use x=\"63.476562\" xlink:href=\"#DejaVuSans-86\"/>\n       <use x=\"131.884766\" xlink:href=\"#DejaVuSans-77\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"xtick_2\">\n     <g id=\"line2d_2\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"388.815\" xlink:href=\"#m1c7e321281\" y=\"224.64\"/>\n      </g>\n     </g>\n     <g id=\"text_2\">\n      <!-- Random Forest -->\n      <g transform=\"translate(351.189219 239.238437)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 44.390625 34.1875 \nQ 47.5625 33.109375 50.5625 29.59375 \nQ 53.5625 26.078125 56.59375 19.921875 \nL 66.609375 0 \nL 56 0 \nL 46.6875 18.703125 \nQ 43.0625 26.03125 39.671875 28.421875 \nQ 36.28125 30.8125 30.421875 30.8125 \nL 19.671875 30.8125 \nL 19.671875 0 \nL 9.8125 0 \nL 9.8125 72.90625 \nL 32.078125 72.90625 \nQ 44.578125 72.90625 50.734375 67.671875 \nQ 56.890625 62.453125 56.890625 51.90625 \nQ 56.890625 45.015625 53.6875 40.46875 \nQ 50.484375 35.9375 44.390625 34.1875 \nz\nM 19.671875 64.796875 \nL 19.671875 38.921875 \nL 32.078125 38.921875 \nQ 39.203125 38.921875 42.84375 42.21875 \nQ 46.484375 45.515625 46.484375 51.90625 \nQ 46.484375 58.296875 42.84375 61.546875 \nQ 39.203125 64.796875 32.078125 64.796875 \nz\n\" id=\"DejaVuSans-82\"/>\n        <path d=\"M 34.28125 27.484375 \nQ 23.390625 27.484375 19.1875 25 \nQ 14.984375 22.515625 14.984375 16.5 \nQ 14.984375 11.71875 18.140625 8.90625 \nQ 21.296875 6.109375 26.703125 6.109375 \nQ 34.1875 6.109375 38.703125 11.40625 \nQ 43.21875 16.703125 43.21875 25.484375 \nL 43.21875 27.484375 \nz\nM 52.203125 31.203125 \nL 52.203125 0 \nL 43.21875 0 \nL 43.21875 8.296875 \nQ 40.140625 3.328125 35.546875 0.953125 \nQ 30.953125 -1.421875 24.3125 -1.421875 \nQ 15.921875 -1.421875 10.953125 3.296875 \nQ 6 8.015625 6 15.921875 \nQ 6 25.140625 12.171875 29.828125 \nQ 18.359375 34.515625 30.609375 34.515625 \nL 43.21875 34.515625 \nL 43.21875 35.40625 \nQ 43.21875 41.609375 39.140625 45 \nQ 35.0625 48.390625 27.6875 48.390625 \nQ 23 48.390625 18.546875 47.265625 \nQ 14.109375 46.140625 10.015625 43.890625 \nL 10.015625 52.203125 \nQ 14.9375 54.109375 19.578125 55.046875 \nQ 24.21875 56 28.609375 56 \nQ 40.484375 56 46.34375 49.84375 \nQ 52.203125 43.703125 52.203125 31.203125 \nz\n\" id=\"DejaVuSans-97\"/>\n        <path d=\"M 54.890625 33.015625 \nL 54.890625 0 \nL 45.90625 0 \nL 45.90625 32.71875 \nQ 45.90625 40.484375 42.875 44.328125 \nQ 39.84375 48.1875 33.796875 48.1875 \nQ 26.515625 48.1875 22.3125 43.546875 \nQ 18.109375 38.921875 18.109375 30.90625 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 21.34375 51.125 25.703125 53.5625 \nQ 30.078125 56 35.796875 56 \nQ 45.21875 56 50.046875 50.171875 \nQ 54.890625 44.34375 54.890625 33.015625 \nz\n\" id=\"DejaVuSans-110\"/>\n        <path d=\"M 45.40625 46.390625 \nL 45.40625 75.984375 \nL 54.390625 75.984375 \nL 54.390625 0 \nL 45.40625 0 \nL 45.40625 8.203125 \nQ 42.578125 3.328125 38.25 0.953125 \nQ 33.9375 -1.421875 27.875 -1.421875 \nQ 17.96875 -1.421875 11.734375 6.484375 \nQ 5.515625 14.40625 5.515625 27.296875 \nQ 5.515625 40.1875 11.734375 48.09375 \nQ 17.96875 56 27.875 56 \nQ 33.9375 56 38.25 53.625 \nQ 42.578125 51.265625 45.40625 46.390625 \nz\nM 14.796875 27.296875 \nQ 14.796875 17.390625 18.875 11.75 \nQ 22.953125 6.109375 30.078125 6.109375 \nQ 37.203125 6.109375 41.296875 11.75 \nQ 45.40625 17.390625 45.40625 27.296875 \nQ 45.40625 37.203125 41.296875 42.84375 \nQ 37.203125 48.484375 30.078125 48.484375 \nQ 22.953125 48.484375 18.875 42.84375 \nQ 14.796875 37.203125 14.796875 27.296875 \nz\n\" id=\"DejaVuSans-100\"/>\n        <path d=\"M 30.609375 48.390625 \nQ 23.390625 48.390625 19.1875 42.75 \nQ 14.984375 37.109375 14.984375 27.296875 \nQ 14.984375 17.484375 19.15625 11.84375 \nQ 23.34375 6.203125 30.609375 6.203125 \nQ 37.796875 6.203125 41.984375 11.859375 \nQ 46.1875 17.53125 46.1875 27.296875 \nQ 46.1875 37.015625 41.984375 42.703125 \nQ 37.796875 48.390625 30.609375 48.390625 \nz\nM 30.609375 56 \nQ 42.328125 56 49.015625 48.375 \nQ 55.71875 40.765625 55.71875 27.296875 \nQ 55.71875 13.875 49.015625 6.21875 \nQ 42.328125 -1.421875 30.609375 -1.421875 \nQ 18.84375 -1.421875 12.171875 6.21875 \nQ 5.515625 13.875 5.515625 27.296875 \nQ 5.515625 40.765625 12.171875 48.375 \nQ 18.84375 56 30.609375 56 \nz\n\" id=\"DejaVuSans-111\"/>\n        <path d=\"M 52 44.1875 \nQ 55.375 50.25 60.0625 53.125 \nQ 64.75 56 71.09375 56 \nQ 79.640625 56 84.28125 50.015625 \nQ 88.921875 44.046875 88.921875 33.015625 \nL 88.921875 0 \nL 79.890625 0 \nL 79.890625 32.71875 \nQ 79.890625 40.578125 77.09375 44.375 \nQ 74.3125 48.1875 68.609375 48.1875 \nQ 61.625 48.1875 57.5625 43.546875 \nQ 53.515625 38.921875 53.515625 30.90625 \nL 53.515625 0 \nL 44.484375 0 \nL 44.484375 32.71875 \nQ 44.484375 40.625 41.703125 44.40625 \nQ 38.921875 48.1875 33.109375 48.1875 \nQ 26.21875 48.1875 22.15625 43.53125 \nQ 18.109375 38.875 18.109375 30.90625 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 21.1875 51.21875 25.484375 53.609375 \nQ 29.78125 56 35.6875 56 \nQ 41.65625 56 45.828125 52.96875 \nQ 50 49.953125 52 44.1875 \nz\n\" id=\"DejaVuSans-109\"/>\n        <path id=\"DejaVuSans-32\"/>\n        <path d=\"M 9.8125 72.90625 \nL 51.703125 72.90625 \nL 51.703125 64.59375 \nL 19.671875 64.59375 \nL 19.671875 43.109375 \nL 48.578125 43.109375 \nL 48.578125 34.8125 \nL 19.671875 34.8125 \nL 19.671875 0 \nL 9.8125 0 \nz\n\" id=\"DejaVuSans-70\"/>\n        <path d=\"M 41.109375 46.296875 \nQ 39.59375 47.171875 37.8125 47.578125 \nQ 36.03125 48 33.890625 48 \nQ 26.265625 48 22.1875 43.046875 \nQ 18.109375 38.09375 18.109375 28.8125 \nL 18.109375 0 \nL 9.078125 0 \nL 9.078125 54.6875 \nL 18.109375 54.6875 \nL 18.109375 46.1875 \nQ 20.953125 51.171875 25.484375 53.578125 \nQ 30.03125 56 36.53125 56 \nQ 37.453125 56 38.578125 55.875 \nQ 39.703125 55.765625 41.0625 55.515625 \nz\n\" id=\"DejaVuSans-114\"/>\n        <path d=\"M 56.203125 29.59375 \nL 56.203125 25.203125 \nL 14.890625 25.203125 \nQ 15.484375 15.921875 20.484375 11.0625 \nQ 25.484375 6.203125 34.421875 6.203125 \nQ 39.59375 6.203125 44.453125 7.46875 \nQ 49.3125 8.734375 54.109375 11.28125 \nL 54.109375 2.78125 \nQ 49.265625 0.734375 44.1875 -0.34375 \nQ 39.109375 -1.421875 33.890625 -1.421875 \nQ 20.796875 -1.421875 13.15625 6.1875 \nQ 5.515625 13.8125 5.515625 26.8125 \nQ 5.515625 40.234375 12.765625 48.109375 \nQ 20.015625 56 32.328125 56 \nQ 43.359375 56 49.78125 48.890625 \nQ 56.203125 41.796875 56.203125 29.59375 \nz\nM 47.21875 32.234375 \nQ 47.125 39.59375 43.09375 43.984375 \nQ 39.0625 48.390625 32.421875 48.390625 \nQ 24.90625 48.390625 20.390625 44.140625 \nQ 15.875 39.890625 15.1875 32.171875 \nz\n\" id=\"DejaVuSans-101\"/>\n        <path d=\"M 44.28125 53.078125 \nL 44.28125 44.578125 \nQ 40.484375 46.53125 36.375 47.5 \nQ 32.28125 48.484375 27.875 48.484375 \nQ 21.1875 48.484375 17.84375 46.4375 \nQ 14.5 44.390625 14.5 40.28125 \nQ 14.5 37.15625 16.890625 35.375 \nQ 19.28125 33.59375 26.515625 31.984375 \nL 29.59375 31.296875 \nQ 39.15625 29.25 43.1875 25.515625 \nQ 47.21875 21.78125 47.21875 15.09375 \nQ 47.21875 7.46875 41.1875 3.015625 \nQ 35.15625 -1.421875 24.609375 -1.421875 \nQ 20.21875 -1.421875 15.453125 -0.5625 \nQ 10.6875 0.296875 5.421875 2 \nL 5.421875 11.28125 \nQ 10.40625 8.6875 15.234375 7.390625 \nQ 20.0625 6.109375 24.8125 6.109375 \nQ 31.15625 6.109375 34.5625 8.28125 \nQ 37.984375 10.453125 37.984375 14.40625 \nQ 37.984375 18.0625 35.515625 20.015625 \nQ 33.0625 21.96875 24.703125 23.78125 \nL 21.578125 24.515625 \nQ 13.234375 26.265625 9.515625 29.90625 \nQ 5.8125 33.546875 5.8125 39.890625 \nQ 5.8125 47.609375 11.28125 51.796875 \nQ 16.75 56 26.8125 56 \nQ 31.78125 56 36.171875 55.265625 \nQ 40.578125 54.546875 44.28125 53.078125 \nz\n\" id=\"DejaVuSans-115\"/>\n        <path d=\"M 18.3125 70.21875 \nL 18.3125 54.6875 \nL 36.8125 54.6875 \nL 36.8125 47.703125 \nL 18.3125 47.703125 \nL 18.3125 18.015625 \nQ 18.3125 11.328125 20.140625 9.421875 \nQ 21.96875 7.515625 27.59375 7.515625 \nL 36.8125 7.515625 \nL 36.8125 0 \nL 27.59375 0 \nQ 17.1875 0 13.234375 3.875 \nQ 9.28125 7.765625 9.28125 18.015625 \nL 9.28125 47.703125 \nL 2.6875 47.703125 \nL 2.6875 54.6875 \nL 9.28125 54.6875 \nL 9.28125 70.21875 \nz\n\" id=\"DejaVuSans-116\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-82\"/>\n       <use x=\"67.232422\" xlink:href=\"#DejaVuSans-97\"/>\n       <use x=\"128.511719\" xlink:href=\"#DejaVuSans-110\"/>\n       <use x=\"191.890625\" xlink:href=\"#DejaVuSans-100\"/>\n       <use x=\"255.367188\" xlink:href=\"#DejaVuSans-111\"/>\n       <use x=\"316.548828\" xlink:href=\"#DejaVuSans-109\"/>\n       <use x=\"413.960938\" xlink:href=\"#DejaVuSans-32\"/>\n       <use x=\"445.748047\" xlink:href=\"#DejaVuSans-70\"/>\n       <use x=\"499.642578\" xlink:href=\"#DejaVuSans-111\"/>\n       <use x=\"560.824219\" xlink:href=\"#DejaVuSans-114\"/>\n       <use x=\"599.6875\" xlink:href=\"#DejaVuSans-101\"/>\n       <use x=\"661.210938\" xlink:href=\"#DejaVuSans-115\"/>\n       <use x=\"713.310547\" xlink:href=\"#DejaVuSans-116\"/>\n      </g>\n     </g>\n    </g>\n   </g>\n   <g id=\"matplotlib.axis_2\">\n    <g id=\"ytick_1\">\n     <g id=\"line2d_3\">\n      <defs>\n       <path d=\"M 0 0 \nL -3.5 0 \n\" id=\"m996bcf011f\" style=\"stroke:#000000;stroke-width:0.8;\"/>\n      </defs>\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"54.015\" xlink:href=\"#m996bcf011f\" y=\"183.693506\"/>\n      </g>\n     </g>\n     <g id=\"text_3\">\n      <!-- 0.70 -->\n      <g transform=\"translate(24.749375 187.492725)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 66.40625 \nQ 24.171875 66.40625 20.328125 58.90625 \nQ 16.5 51.421875 16.5 36.375 \nQ 16.5 21.390625 20.328125 13.890625 \nQ 24.171875 6.390625 31.78125 6.390625 \nQ 39.453125 6.390625 43.28125 13.890625 \nQ 47.125 21.390625 47.125 36.375 \nQ 47.125 51.421875 43.28125 58.90625 \nQ 39.453125 66.40625 31.78125 66.40625 \nz\nM 31.78125 74.21875 \nQ 44.046875 74.21875 50.515625 64.515625 \nQ 56.984375 54.828125 56.984375 36.375 \nQ 56.984375 17.96875 50.515625 8.265625 \nQ 44.046875 -1.421875 31.78125 -1.421875 \nQ 19.53125 -1.421875 13.0625 8.265625 \nQ 6.59375 17.96875 6.59375 36.375 \nQ 6.59375 54.828125 13.0625 64.515625 \nQ 19.53125 74.21875 31.78125 74.21875 \nz\n\" id=\"DejaVuSans-48\"/>\n        <path d=\"M 10.6875 12.40625 \nL 21 12.40625 \nL 21 0 \nL 10.6875 0 \nz\n\" id=\"DejaVuSans-46\"/>\n        <path d=\"M 8.203125 72.90625 \nL 55.078125 72.90625 \nL 55.078125 68.703125 \nL 28.609375 0 \nL 18.3125 0 \nL 43.21875 64.59375 \nL 8.203125 64.59375 \nz\n\" id=\"DejaVuSans-55\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-55\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_2\">\n     <g id=\"line2d_4\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"54.015\" xlink:href=\"#m996bcf011f\" y=\"141.805714\"/>\n      </g>\n     </g>\n     <g id=\"text_4\">\n      <!-- 0.75 -->\n      <g transform=\"translate(24.749375 145.604933)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 10.796875 72.90625 \nL 49.515625 72.90625 \nL 49.515625 64.59375 \nL 19.828125 64.59375 \nL 19.828125 46.734375 \nQ 21.96875 47.46875 24.109375 47.828125 \nQ 26.265625 48.1875 28.421875 48.1875 \nQ 40.625 48.1875 47.75 41.5 \nQ 54.890625 34.8125 54.890625 23.390625 \nQ 54.890625 11.625 47.5625 5.09375 \nQ 40.234375 -1.421875 26.90625 -1.421875 \nQ 22.3125 -1.421875 17.546875 -0.640625 \nQ 12.796875 0.140625 7.71875 1.703125 \nL 7.71875 11.625 \nQ 12.109375 9.234375 16.796875 8.0625 \nQ 21.484375 6.890625 26.703125 6.890625 \nQ 35.15625 6.890625 40.078125 11.328125 \nQ 45.015625 15.765625 45.015625 23.390625 \nQ 45.015625 31 40.078125 35.4375 \nQ 35.15625 39.890625 26.703125 39.890625 \nQ 22.75 39.890625 18.8125 39.015625 \nQ 14.890625 38.140625 10.796875 36.28125 \nz\n\" id=\"DejaVuSans-53\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-55\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_3\">\n     <g id=\"line2d_5\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"54.015\" xlink:href=\"#m996bcf011f\" y=\"99.917922\"/>\n      </g>\n     </g>\n     <g id=\"text_5\">\n      <!-- 0.80 -->\n      <g transform=\"translate(24.749375 103.717141)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 31.78125 34.625 \nQ 24.75 34.625 20.71875 30.859375 \nQ 16.703125 27.09375 16.703125 20.515625 \nQ 16.703125 13.921875 20.71875 10.15625 \nQ 24.75 6.390625 31.78125 6.390625 \nQ 38.8125 6.390625 42.859375 10.171875 \nQ 46.921875 13.96875 46.921875 20.515625 \nQ 46.921875 27.09375 42.890625 30.859375 \nQ 38.875 34.625 31.78125 34.625 \nz\nM 21.921875 38.8125 \nQ 15.578125 40.375 12.03125 44.71875 \nQ 8.5 49.078125 8.5 55.328125 \nQ 8.5 64.0625 14.71875 69.140625 \nQ 20.953125 74.21875 31.78125 74.21875 \nQ 42.671875 74.21875 48.875 69.140625 \nQ 55.078125 64.0625 55.078125 55.328125 \nQ 55.078125 49.078125 51.53125 44.71875 \nQ 48 40.375 41.703125 38.8125 \nQ 48.828125 37.15625 52.796875 32.3125 \nQ 56.78125 27.484375 56.78125 20.515625 \nQ 56.78125 9.90625 50.3125 4.234375 \nQ 43.84375 -1.421875 31.78125 -1.421875 \nQ 19.734375 -1.421875 13.25 4.234375 \nQ 6.78125 9.90625 6.78125 20.515625 \nQ 6.78125 27.484375 10.78125 32.3125 \nQ 14.796875 37.15625 21.921875 38.8125 \nz\nM 18.3125 54.390625 \nQ 18.3125 48.734375 21.84375 45.5625 \nQ 25.390625 42.390625 31.78125 42.390625 \nQ 38.140625 42.390625 41.71875 45.5625 \nQ 45.3125 48.734375 45.3125 54.390625 \nQ 45.3125 60.0625 41.71875 63.234375 \nQ 38.140625 66.40625 31.78125 66.40625 \nQ 25.390625 66.40625 21.84375 63.234375 \nQ 18.3125 60.0625 18.3125 54.390625 \nz\n\" id=\"DejaVuSans-56\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-56\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_4\">\n     <g id=\"line2d_6\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"54.015\" xlink:href=\"#m996bcf011f\" y=\"58.03013\"/>\n      </g>\n     </g>\n     <g id=\"text_6\">\n      <!-- 0.85 -->\n      <g transform=\"translate(24.749375 61.829349)scale(0.1 -0.1)\">\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-56\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-53\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"ytick_5\">\n     <g id=\"line2d_7\">\n      <g>\n       <use style=\"stroke:#000000;stroke-width:0.8;\" x=\"54.015\" xlink:href=\"#m996bcf011f\" y=\"16.142338\"/>\n      </g>\n     </g>\n     <g id=\"text_7\">\n      <!-- 0.90 -->\n      <g transform=\"translate(24.749375 19.941556)scale(0.1 -0.1)\">\n       <defs>\n        <path d=\"M 10.984375 1.515625 \nL 10.984375 10.5 \nQ 14.703125 8.734375 18.5 7.8125 \nQ 22.3125 6.890625 25.984375 6.890625 \nQ 35.75 6.890625 40.890625 13.453125 \nQ 46.046875 20.015625 46.78125 33.40625 \nQ 43.953125 29.203125 39.59375 26.953125 \nQ 35.25 24.703125 29.984375 24.703125 \nQ 19.046875 24.703125 12.671875 31.3125 \nQ 6.296875 37.9375 6.296875 49.421875 \nQ 6.296875 60.640625 12.9375 67.421875 \nQ 19.578125 74.21875 30.609375 74.21875 \nQ 43.265625 74.21875 49.921875 64.515625 \nQ 56.59375 54.828125 56.59375 36.375 \nQ 56.59375 19.140625 48.40625 8.859375 \nQ 40.234375 -1.421875 26.421875 -1.421875 \nQ 22.703125 -1.421875 18.890625 -0.6875 \nQ 15.09375 0.046875 10.984375 1.515625 \nz\nM 30.609375 32.421875 \nQ 37.25 32.421875 41.125 36.953125 \nQ 45.015625 41.5 45.015625 49.421875 \nQ 45.015625 57.28125 41.125 61.84375 \nQ 37.25 66.40625 30.609375 66.40625 \nQ 23.96875 66.40625 20.09375 61.84375 \nQ 16.21875 57.28125 16.21875 49.421875 \nQ 16.21875 41.5 20.09375 36.953125 \nQ 23.96875 32.421875 30.609375 32.421875 \nz\n\" id=\"DejaVuSans-57\"/>\n       </defs>\n       <use xlink:href=\"#DejaVuSans-48\"/>\n       <use x=\"63.623047\" xlink:href=\"#DejaVuSans-46\"/>\n       <use x=\"95.410156\" xlink:href=\"#DejaVuSans-57\"/>\n       <use x=\"159.033203\" xlink:href=\"#DejaVuSans-48\"/>\n      </g>\n     </g>\n    </g>\n    <g id=\"text_8\">\n     <!-- Accuracy -->\n     <g transform=\"translate(17.837813 147.879375)rotate(-90)scale(0.14 -0.14)\">\n      <defs>\n       <path d=\"M 34.1875 63.1875 \nL 20.796875 26.90625 \nL 47.609375 26.90625 \nz\nM 28.609375 72.90625 \nL 39.796875 72.90625 \nL 67.578125 0 \nL 57.328125 0 \nL 50.6875 18.703125 \nL 17.828125 18.703125 \nL 11.1875 0 \nL 0.78125 0 \nz\n\" id=\"DejaVuSans-65\"/>\n       <path d=\"M 48.78125 52.59375 \nL 48.78125 44.1875 \nQ 44.96875 46.296875 41.140625 47.34375 \nQ 37.3125 48.390625 33.40625 48.390625 \nQ 24.65625 48.390625 19.8125 42.84375 \nQ 14.984375 37.3125 14.984375 27.296875 \nQ 14.984375 17.28125 19.8125 11.734375 \nQ 24.65625 6.203125 33.40625 6.203125 \nQ 37.3125 6.203125 41.140625 7.25 \nQ 44.96875 8.296875 48.78125 10.40625 \nL 48.78125 2.09375 \nQ 45.015625 0.34375 40.984375 -0.53125 \nQ 36.96875 -1.421875 32.421875 -1.421875 \nQ 20.0625 -1.421875 12.78125 6.34375 \nQ 5.515625 14.109375 5.515625 27.296875 \nQ 5.515625 40.671875 12.859375 48.328125 \nQ 20.21875 56 33.015625 56 \nQ 37.15625 56 41.109375 55.140625 \nQ 45.0625 54.296875 48.78125 52.59375 \nz\n\" id=\"DejaVuSans-99\"/>\n       <path d=\"M 8.5 21.578125 \nL 8.5 54.6875 \nL 17.484375 54.6875 \nL 17.484375 21.921875 \nQ 17.484375 14.15625 20.5 10.265625 \nQ 23.53125 6.390625 29.59375 6.390625 \nQ 36.859375 6.390625 41.078125 11.03125 \nQ 45.3125 15.671875 45.3125 23.6875 \nL 45.3125 54.6875 \nL 54.296875 54.6875 \nL 54.296875 0 \nL 45.3125 0 \nL 45.3125 8.40625 \nQ 42.046875 3.421875 37.71875 1 \nQ 33.40625 -1.421875 27.6875 -1.421875 \nQ 18.265625 -1.421875 13.375 4.4375 \nQ 8.5 10.296875 8.5 21.578125 \nz\nM 31.109375 56 \nz\n\" id=\"DejaVuSans-117\"/>\n       <path d=\"M 32.171875 -5.078125 \nQ 28.375 -14.84375 24.75 -17.8125 \nQ 21.140625 -20.796875 15.09375 -20.796875 \nL 7.90625 -20.796875 \nL 7.90625 -13.28125 \nL 13.1875 -13.28125 \nQ 16.890625 -13.28125 18.9375 -11.515625 \nQ 21 -9.765625 23.484375 -3.21875 \nL 25.09375 0.875 \nL 2.984375 54.6875 \nL 12.5 54.6875 \nL 29.59375 11.921875 \nL 46.6875 54.6875 \nL 56.203125 54.6875 \nz\n\" id=\"DejaVuSans-121\"/>\n      </defs>\n      <use xlink:href=\"#DejaVuSans-65\"/>\n      <use x=\"66.658203\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"121.638672\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"176.619141\" xlink:href=\"#DejaVuSans-117\"/>\n      <use x=\"239.998047\" xlink:href=\"#DejaVuSans-114\"/>\n      <use x=\"281.111328\" xlink:href=\"#DejaVuSans-97\"/>\n      <use x=\"342.390625\" xlink:href=\"#DejaVuSans-99\"/>\n      <use x=\"397.371094\" xlink:href=\"#DejaVuSans-121\"/>\n     </g>\n    </g>\n   </g>\n   <g id=\"line2d_8\">\n    <defs>\n     <path d=\"M 0 1.5 \nC 0.397805 1.5 0.77937 1.341951 1.06066 1.06066 \nC 1.341951 0.77937 1.5 0.397805 1.5 0 \nC 1.5 -0.397805 1.341951 -0.77937 1.06066 -1.06066 \nC 0.77937 -1.341951 0.397805 -1.5 0 -1.5 \nC -0.397805 -1.5 -0.77937 -1.341951 -1.06066 -1.06066 \nC -1.341951 -0.77937 -1.5 -0.397805 -1.5 0 \nC -1.5 0.397805 -1.341951 0.77937 -1.06066 1.06066 \nC -0.77937 1.341951 -0.397805 1.5 0 1.5 \nz\n\" id=\"md24f1d5831\" style=\"stroke:#1f77b4;\"/>\n    </defs>\n    <g clip-path=\"url(#p29cc28a635)\">\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"211.618701\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"214.756364\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"167.691429\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"148.865455\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"130.039481\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"167.691429\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"120.626494\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"158.278442\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"148.865455\"/>\n     <use style=\"fill:#1f77b4;stroke:#1f77b4;\" x=\"165.615\" xlink:href=\"#md24f1d5831\" y=\"92.387532\"/>\n    </g>\n   </g>\n   <g id=\"line2d_9\">\n    <defs>\n     <path d=\"M 0 1.5 \nC 0.397805 1.5 0.77937 1.341951 1.06066 1.06066 \nC 1.341951 0.77937 1.5 0.397805 1.5 0 \nC 1.5 -0.397805 1.341951 -0.77937 1.06066 -1.06066 \nC 0.77937 -1.341951 0.397805 -1.5 0 -1.5 \nC -0.397805 -1.5 -0.77937 -1.341951 -1.06066 -1.06066 \nC -1.341951 -0.77937 -1.5 -0.397805 -1.5 0 \nC -1.5 0.397805 -1.341951 0.77937 -1.06066 1.06066 \nC -0.77937 1.341951 -0.397805 1.5 0 1.5 \nz\n\" id=\"m15f7559098\" style=\"stroke:#ff7f0e;\"/>\n    </defs>\n    <g clip-path=\"url(#p29cc28a635)\">\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"127.843117\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"82.974545\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"130.039481\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"64.148571\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"17.083636\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"54.735584\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"92.387532\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"130.039481\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"82.974545\"/>\n     <use style=\"fill:#ff7f0e;stroke:#ff7f0e;\" x=\"388.815\" xlink:href=\"#m15f7559098\" y=\"64.148571\"/>\n    </g>\n   </g>\n   <g id=\"line2d_10\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 148.875 167.691429 \nL 182.355 167.691429 \nL 182.355 134.745974 \nL 148.875 134.745974 \nL 148.875 167.691429 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_11\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 165.615 167.691429 \nL 165.615 214.756364 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_12\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 165.615 134.745974 \nL 165.615 92.387532 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_13\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 157.245 214.756364 \nL 173.985 214.756364 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_14\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 157.245 92.387532 \nL 173.985 92.387532 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_15\"/>\n   <g id=\"line2d_16\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 372.075 118.979221 \nL 405.555 118.979221 \nL 405.555 64.148571 \nL 372.075 64.148571 \nL 372.075 118.979221 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_17\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 388.815 118.979221 \nL 388.815 130.039481 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_18\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 388.815 64.148571 \nL 388.815 17.083636 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_19\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 380.445 130.039481 \nL 397.185 130.039481 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_20\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 380.445 17.083636 \nL 397.185 17.083636 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_21\"/>\n   <g id=\"line2d_22\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 148.875 153.571948 \nL 182.355 153.571948 \n\" style=\"fill:none;stroke:#ff7f0e;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"line2d_23\">\n    <path clip-path=\"url(#p29cc28a635)\" d=\"M 372.075 82.974545 \nL 405.555 82.974545 \n\" style=\"fill:none;stroke:#ff7f0e;stroke-linecap:square;\"/>\n   </g>\n   <g id=\"patch_3\">\n    <path d=\"M 54.015 224.64 \nL 54.015 7.2 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_4\">\n    <path d=\"M 500.415 224.64 \nL 500.415 7.2 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_5\">\n    <path d=\"M 54.015 224.64 \nL 500.415 224.64 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n   <g id=\"patch_6\">\n    <path d=\"M 54.015 7.2 \nL 500.415 7.2 \n\" style=\"fill:none;stroke:#000000;stroke-linecap:square;stroke-linejoin:miter;stroke-width:0.8;\"/>\n   </g>\n  </g>\n </g>\n <defs>\n  <clipPath id=\"p29cc28a635\">\n   <rect height=\"217.44\" width=\"446.4\" x=\"54.015\" y=\"7.2\"/>\n  </clipPath>\n </defs>\n</svg>\n",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAfwAAAD4CAYAAAAJtFSxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8vihELAAAACXBIWXMAAAsTAAALEwEAmpwYAAAZOklEQVR4nO3df7RdZX3n8feHhBSnCk2AWgfyA1pU0Lqw3IlxWfxRf6HLQrXTNpixxJZhOSPWWrum2NqBolaxP1i00lrKuKAURKq1po6OtYJtWgnk3oIgZKExJSTU1RWStFarhiTf+WPv2xyuN3CSe+65N2e/X2uddfZ+9o/z9ZrL5z7Pec5zUlVIkqTRdtRcFyBJkmafgS9JUgcY+JIkdYCBL0lSBxj4kiR1wMK5LmC2nHDCCbVixYq5LkOSpKGZmJh4pKpOnO7YyAb+ihUrGB8fn+syJEkamiRbD3bMIX1JkjrAwJckqQMMfEmSOsDAlySpA4Ya+EnOSfJAks1JLpnm+PIkn0tyT5LPJzm559gFSb7SPi4YZt2SJB3phhb4SRYAVwOvAs4Azk9yxpTTfhv4k6p6DnA58N722iXApcDzgJXApUkWD6t2SZKOdMPs4a8ENlfVlqraA9wMnDflnDOAW9vt23qOvxL4bFXtqqrdwGeBc4ZQsyR1x7Y7Yf3vNM8aOcP8HP5JwLae/e00PfZeXwReB1wFvBZ4SpLjD3LtSVNfIMlFwEUAy5YtG1jhkjTytt0J158L+/bAgkVwwTpYunKuq9IAzbeFd34Z+ECStcDfAg8D+/q9uKquAa4BGBsbq9koUJKOZEn6O/HXp/bHHqvK/8QeaYY5pP8wsLRn/+S27T9U1T9V1euq6rnAr7Vt/9LPtZKkJ1ZV0z8euoN611Obc9711Gb/YOca9kekYQb+RuC0JKckWQSsBtb1npDkhCSTNb0D+FC7/RngFUkWt5P1XtG2SZIGYenKZhgfHM4fUUML/KraC1xME9SbgFuq6r4klyc5tz3txcADSb4MPBV4T3vtLuBdNH80bAQub9skSYMyGfKG/UjKqA7NjI2NlV+eI0mHJolD9kewJBNVNTbdMVfakySpAwx8SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAwx8SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAwx8SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAwx8SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAwx8SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAwx8SVJj252PfdZIMfAlSU3IX39us339uYb+CBpq4Cc5J8kDSTYnuWSa48uS3JbkriT3JHl1274iybeS3N0+PjjMuiVp5D24Hvbtabb37Wn2NVIWDuuFkiwArgZeDmwHNiZZV1X395z2TuCWqvrDJGcAnwJWtMe+WlVnDqteSeqUFWfDgkXN9oJFzb5GyjB7+CuBzVW1par2ADcD5005p4Bj2+3jgH8aYn2S1F1LV8IF65rtC9Y1+xopwwz8k4BtPfvb27ZelwH/Lcl2mt79W3qOndIO9f9Nkmn/9ExyUZLxJOM7duwYYOmS1AGTIW/Yj6T5NmnvfOC6qjoZeDVwQ5KjgK8By6rqucAvATclOXbqxVV1TVWNVdXYiSeeONTCJUmaz4YZ+A8DS3v2T27bev08cAtAVd0OHAOcUFXfqaqdbfsE8FXg6bNesSRJI2KYgb8ROC3JKUkWAauBdVPOeQh4KUCS02kCf0eSE9tJfyQ5FTgN2DK0yiVJOsINLfCrai9wMfAZYBPNbPz7klyepP3wJ28H/nuSLwIfBtZWVQEvBO5JcjfwUeBNVbVrWLVLUieMX/fYZ42UoX0sD6CqPkUzGa+37X/3bN8PvGCa6z4GfGzWC5Skrhq/Dj751mZ78nls7VxVo1mQpgM9esbGxmp8fHyuy5CkoVqyZAm7d++e0xoWL17Mrl0Ows6FJBNVNTbdsaH28CVJs2v37t0cVkeut4cP8JqrDruHn+SwrtPsMvAlSQfCfdMn4PTzHM4fQQa+JKkxttagH2HzbeEdSZI0Cwx8SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAwx8SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAwx8SVJj/Dq44bXNs0aOX48rSWpC/pNvbba/emvz7FfljpRU1VzXMCvGxsZqfHx8rsuQpOG67Li5rqBx2b/OdQWdlGSiqsamO2YPX5JGSH7j6xxWR663hw/wmqsOu4efhLrssC7VLDLwJUkHwn3TJ+D08xzOH0EGviSpMbbWoB9hfc3ST/ITSRbMdjGSJGl29PuxvBuBh5NckeTps1mQJEkavH4D/weAS4EXAZuS/F2SNyb53tkrTZIkDUpfgV9V/1ZVf1RVq4DnAHcA7wW+luSPk6yazSIlSdLMHPJKe1V1H3AlcA2wCPgZYH2SO5I8Z8D1qYMmtu7m6ts2M7F191yXInXLtjth/e80zxo5fc/ST3I08Frg54CX0vTy3wR8BFgM/Ga7ffrgy1RXTGzdzZprN7Bn734WLTyKGy9cxVnLF891WdLo23YnXH8u7NsDCxbBBetg6cq5rkoD1FfgJ/l94HyggBuAX6qq+3tO+VaSS4B/GnyJGkVJ+jpv7N0HPzaqq0RKc+LB9U3Y177m+cH1Bv6I6XdI/wzgYuCkqpoa9pMeAV4ysMo00qpq2sf4g7t4xjs/BcAz3vkpxh/cddBzJQ3QirObnn0WNM8rzp7rijRgrqWveWdi627GVixh/MFdDudLhyjJ4f9BvO3Opme/4uwZ9e5nVINmZMZr6Sd5D7Ctqj44pf1NNL3+X595mVJjMuQNe2nIlq50GH+E9Tuk/wbgrmnaJ4Cf7ffFkpyT5IEkm9v3/KceX5bktiR3Jbknyat7jr2jve6BJK/s9zUlSVL/s/S/H9gxTftO4Kn93KBdmvdq4OXAdmBjknVT5gO8E7ilqv4wyRnAp4AV7fZq4FnAfwb+OsnTq2pfn/VLktRp/fbwHwKmm8HxQprw7sdKYHNVbamqPcDNwHlTzing2Hb7OA7M+j8PuLmqvlNV/whsbu8nSZL60G8P/4+AK5MsAm5t215Ks9reFX3e4yRgW8/+duB5U865DPirJG8Bvhd4Wc+1G6Zce9LUF0hyEXARwLJly/osS5Kk0ddX4FfV7yQ5Afg9mtX1APYAV1XV+wdYz/nAde3rPR+4Icmz+724qq6hWQGQsbExp4hKktTqe6W9qnpHknfTfCYfYFNVfeMQXuthYGnP/sltW6+fB85pX+/2JMcAJ/R5rSRJOohDWku/qr5ZVRvbx6GEPcBG4LQkp7RvDawG1k055yGatwpIcjpwDM1kwXXA6iTfk+QU4DTAxZ4lSerToayl/xKaIfdlHBjWB6CqfuyJrq+qvUkuBj4DLAA+VFX3JbkcGK+qdcDbgT9O8jaaCXxrq1m94b4ktwD3A3uBNztDX5Kk/vW10l6StcAHgY/TfIHOJ4CnA6cAf1pVF89ijYfFlfaObK7UJR2e+fC7Mx9q6KrHW2mv3yH9XwYurqrzgUeBd1TVc4E/BQ51aF+SJA1Zv4F/KvDX7fZ3gCe32x8A1g64JkmSNGD9Bv5O4Cnt9sPA5EfljgeeNOiiJEnSYPU7aW898ArgXuAW4PeSvJxmRv1nZ6k2SZI0IP0G/sU0H5GDZnW9vcALaML/3bNQlyRJGqAnDPwkC2k+M/8XAFW1n/6X05UkSfPAE76HX1V7gd8Cjp79ciRJ0mzod9LeBuCs2SxEkiTNnn7fw/9j4LeTLAMmgG/2Hqyqfxh0YZIkaXD6Dfyb2uffneZY0SyVK0mS5ql+A/+UWa1CkiTNqr4Cv6q2znYhkiRp9vQV+Ele93jHq+rPB1OOJEmaDf0O6X/0IO2TX4fke/gamImtu//j+azli+e4GunIk2ROX3/xYn9v56O+PpZXVUf1PoBFwPNoltx94WwWqG6Z2LqbNdduAGDNtRv+I/wl9aeqZvQYxD127do1xz8FTaffz+E/RlXtraqNwK8CfzDYktRlG7bsZM/e/QA8unc/G7bsnOOKJGk0HFbg9/gX4AcHUIcEwKpTj2fRwuaf5dELj2LVqcfPcUWSNBoyOYTzuCclPzK1CXga8CsAVXX24EubmbGxsRofH5/rMnQYJrbuZmzFEsYf3OV7+NKQJaGfXND8lGSiqsamO9bvpL1xmgl6U2eCbADeOIPapO8yGfKGvSQNzuEuvLMf2FFV3x5wPZIkaRa48I4kqbHtzgPPS1fObS0auL4m7SV5T5I3TdP+piTvGnxZkqSh2nYnXH9us339uQfCXyOj31n6bwDumqZ9AvjZwZUjPXbhHUlD8uB62Nu+S7v3282+Rkq/gf/9wI5p2ncCTx1cOeo6F96R5si3v86BxVOr3dco6XfS3kPA2cCWKe0vBLYPtCJ12nQL7zhbXxqcfpbdzW98Hbi8fUzPj+4defoN/D8CrkyyCLi1bXsp8F7gitkoTN3kwjvS7DpoUI9fB59864H911wFY2uHUZKGpK+FdwCSvBf4RZp19AH2AFdV1SWzU9rMuPDOkcuFd6Q5Mn4dbPoEnH6eYX+EeryFd/oO/PZG3wuc0e5uqqpvDKC+WWHgH9lc7UuSDt2MV9pL8gPAwqraDmzsaT8ZeLSq/nkglUqSpFnR7yz9PwVeNU37K4EbBleORsGSJUtIMqMHMON7LFmyZI5/EpI0f/Q7aW8MePM07euB3xpcORoFu3fvnhfD8f3MRpakrui3h78Q+J5p2o85SLskSZpH+g38O4D/MU37m+l5T1+SJM1P/Q7p/xpwa5LncOBz+D8G/AjN5/H7kuQc4CpgAXBtVb1vyvErgZe0u/8J+P6q+r722D7g3vbYQ1V1br+vK0lS1/X7bXkbkjwf+F/A69rmfwD+J3BiP/dIsgC4Gng5zep8G5Osq6r7e17nbT3nvwV4bs8tvlVVZ/bzWpIk6bH6HdKnqr5YVWuq6lk0s/O/DHwc+Eyft1gJbK6qLVW1B7gZOO9xzj8f+HC/9UmSpIPrO/CTLEjyuiT/F/hH4CeADwI/1OctTgK29exvb9ume63lwCkcePsA4Jgk40k2JPmJg1x3UXvO+I4d033XjyRJ3fSEQ/pJngFcSPM1uN8EbqLp4b+hdzh+wFYDH62qfT1ty6vq4SSn0swnuLeqvtp7UVVdA1wDzUp7s1SbJElHnMft4SdZD2wAFgM/XVWnVtU7OfAdiofiYWBpz/7Jbdt0VjNlOL+qHm6ftwCf57Hv70uSpMfxREP6zwf+BLiyqv5mhq+1ETgtySntt+6tBtZNPSnJM2n+wLi9p21xku9pt08AXgDM1uiCJEkj54kC/7/QDPv/XZK7krytXVf/kFXVXuBimkl+m4Bbquq+JJcn6f2I3Wrg5nrsUm2nA+NJvgjcBrxvFt9OkCRp5PT1bXlJjgF+Cvg54Edp/lC4hOaz9LtntcLD5LflzZ2ZftPdTXc8xKe/9DVe9eyn8frnLZuzOiTpSDPjb8urqm/TfEnODUl+iGYS39uAdye5taqm+2Id6ZDddMdD/OrHm/WV1n/lEYAZhb4kqdH3x/ImVdXmqrqEZgLeTwN7Bl6VOuvTX/ra4+5Lkg7PIQf+pKraV1WfqKrHWzxHOiSvevbTHndfknR4+l1LXxqKyeH7QbyHL0k6wMDXvPP65y0z6CVpwA57SF+SJB057OFr4OrSY+Gy4+a6jKYOSRJg4GsW5De+Pi8+/56Eumyuq5Ck+cEhfc07E1t3c/Vtm5nYOi/XdJKkI5I9fM0rE1t3s+baDezZu59FC4/ixgtXcdbyxXNdliQd8ezha17ZsGUne/buZ3/Bo3v3s2HLzrkuSZJGgoGveWXVqcezaOFRLAgcvfAoVp16/FyXJEkjwSF9zStnLV/MjReuYsOWnaw69XiH8yVpQAx8zTtnLV9s0EvSgDmkL0lSBxj4kiR1gIEvSVIHGPiSJHWAga9556Y7HuIN/+cObrrjobkuRZJGhrP0Na/cdMdD/OrH7wVg/VceAfCrciVpAAx8zYokA7nPmitgzWFeu3ixH+2TpEkO6WvgquqwHzdu2MryX/kkAMt/5ZPcuGHrYd9r165dc/yTkKT5wx6+5pXJ4fs1V8BvvvaHHc6XpAGxh695ZzLkDXtJGhwDX5KkDjDwJUnqAANfkqQOMPAlSeoAA1+SpA4w8CVJ6gADX5KkDjDwJUnqAANfkqQOGGrgJzknyQNJNie5ZJrjVya5u318Ocm/9By7IMlX2scFw6xbkqQj3dDW0k+yALgaeDmwHdiYZF1V3T95TlW9ref8twDPbbeXAJcCY0ABE+21u4dVvyRJR7Jh9vBXApuraktV7QFuBs57nPPPBz7cbr8S+GxV7WpD/rPAObNarSRJI2SYgX8SsK1nf3vb9l2SLAdOAW49lGuTXJRkPMn4jh07BlK0JEmjYL5O2lsNfLSq9h3KRVV1TVWNVdXYiSeeOEulSZJ05Blm4D8MLO3ZP7ltm85qDgznH+q1kiRpimEG/kbgtCSnJFlEE+rrpp6U5JnAYuD2nubPAK9IsjjJYuAVbZskSerD0GbpV9XeJBfTBPUC4ENVdV+Sy4HxqpoM/9XAzVVVPdfuSvIumj8aAC6vql3Dql2SpCNdenJ1pIyNjdX4+Phcl6HDlIRR/bcpSbMlyURVjU13bL5O2pMkSQNk4EuS1AEGviRJHWDgS5LUAQa+JEkdYOBLktQBBr4kSR1g4EuS1AEGviRJHWDgS5LUAQa+JEkdYOBLktQBBr4kSR1g4EuS1AEGviRJHWDgS5LUAQa+JEkdYOBLktQBBr4kSR1g4EuS1AEGvuadia27H/MsSZo5A1/zysTW3ay5dgMAa67dYOhL0oAY+JpXNmzZyZ69+wF4dO9+NmzZOccVSdJoMPA1r6w69XgWHhUAFhwVVp16/BxXJEmjwcDX/JM89lmSNGML57oAdVP6CPOvvOfVjL3n4MeraoAVSdJoM/A1Jw4W1pOT9h7du5+jFx7FjReu4qzli4dcnSSNHgNf88pZyxdz44Wr2LBlJ6tOPd6wl6QBMfA175y1fLFBL0kD5qQ9SZI6wMCXJKkDDHxJkjrAwJckqQMMfEmSOsDAlySpAzKqq5Ul2QFsnes6dNhOAB6Z6yKkDvJ378i2vKpOnO7AyAa+jmxJxqtqbK7rkLrG373R5ZC+JEkdYOBLktQBBr7mq2vmugCpo/zdG1G+hy9JUgfYw5ckqQMMfEmSOsDA19Al+bUk9yW5J8ndSS5N8t4p55yZZFO7/WCS9VOO353kS8OsW5oNSfZN/ntO8pdJvm9A912b5AODuNeU+34+yQNtzXcn+a+Dfo32dVYkef1s3LurDHwNVZLnA68BfqSqngO8DLgN+Jkpp64GPtyz/5QkS9t7nD6MWqUh+VZVnVlVzwZ2AW+e64L6sKat+cyq+mg/FyRZeIivsQIw8AfIwNewPQ14pKq+A1BVj1TV3wK7kzyv57yf5rGBfwsH/ig4f8oxaVTcDpwEkGRlktuT3JXkC0me0bavTfLnSf5fkq8kef/kxUnemOTLSe4EXtDTviLJre2o2ueSLGvbr0vyh0k2JNmS5MVJPpRkU5Lr+i06yZIkf9Hef0OS57TtlyW5IcnfAzckOTHJx5JsbB8vaM97Uc+IwV1JngK8Dzi7bXvbTH+wAqrKh4+hPYAnA3cDXwb+AHhR2/7LwJXt9ipgvOeaB4FnAF9o9+8CzgC+NNf/e3z4mOkD+Eb7vAD4M+Ccdv9YYGG7/TLgY+32WmALcBxwDM0S4ktp/ph+CDgRWAT8PfCB9pq/BC5ot38O+It2+zrgZiDAecDXgR+m6QxOAGdOU+/ngQfa3+O7geOB3wcubY//GHB3u31Ze58ntfs3AT/abi8DNvXU94J2+8nAQuDFwCfn+v+fUXoc6hCLNCNV9Y0kZwFnAy8BPpLkEuAjwBeSvJ3vHs4H2EkzCrAa2AT8+xDLlmbTk5LcTdOz3wR8tm0/Drg+yWlAAUf3XPO5qvpXgCT3A8tp1sD/fFXtaNs/Ajy9Pf/5wOva7RuA9/fc6y+rqpLcC/xzVd3bXn8fzbD63dPUvKaqxid3kvwo8JMAVXVrkuOTHNseXldV32q3XwackWTy0mOTPJnmj5PfTXIj8OdVtb3nHA2IQ/oauqraV1Wfr6pLgYuBn6yqbcA/Ai+i+Q/HR6a59CPA1Ticr9Hyrao6kya0w4H38N8F3FbNe/s/TtObn/Sdnu19MKPO2+S99k+57/4Z3nfSN3u2jwJW1YH3/0+qqm9U1fuAC4EnAX+f5JkDeF1NYeBrqJI8o+2xTDqTA99q+GHgSmBLVW2f5vKP0/RMPjOrRUpzoKr+HfgF4O3tBLfjgIfbw2v7uMUdwIva3vXRwE/1HPsCzcgZwBpg/dSLZ2h9e1+SvJhmns7Xpznvr4C3TO4kObN9/sGqureqrgA2As8E/g14yoDr7DQDX8P2ZJphyvuT3EPzXvxl7bE/A57FQXrwVfVvVXVFVe0ZSqXSkFXVXcA9NBNT3w+8N8ld9NHTrqqv0fwu3U4zRL6p5/BbgDe2v3NvAN462Mq5DDirvf/7gAsOct4vAGPt5L77gTe17b/YfizxHuBR4NM0P4d9Sb7opL3BcGldSZI6wB6+JEkdYOBLktQBBr4kSR1g4EuS1AEGviRJHWDgS5LUAQa+JEkd8P8BRwxIZRxqTYcAAAAASUVORK5CYII=\n"
     },
     "metadata": {
      "needs_background": "light"
     }
    }
   ],
   "source": [
    "plt.figure(figsize=(8, 4))\n",
    "plt.plot([1]*10, svm_scores, '.')\n",
    "plt.plot([2]*10, forest_scores, '.')\n",
    "plt.boxplot([svm_scores, forest_scores], labels=('SVM', 'Random Forest'))\n",
    "plt.ylabel('Accuracy', fontsize=14)\n",
    "plt.show()"
   ]
  },
  {
   "source": [
    "为了进一步改善结果，可以进行如下操作:\n",
    "- 对更多模型，使用cross validation和grid search调整超参数\n",
    "- 使用更多的特征工程,例如:\n",
    "    - 是**SibSp**和**Parch**的和代替他们\n",
    "    - 尝试找出与**Survived**属性很好相关的部分\n",
    "- 尝试把年龄属性更改为年龄段属性\n"
   ],
   "cell_type": "markdown",
   "metadata": {}
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "           Survived\n",
       "AgeBucket          \n",
       "0.0        0.576923\n",
       "15.0       0.362745\n",
       "30.0       0.423256\n",
       "45.0       0.404494\n",
       "60.0       0.240000\n",
       "75.0       1.000000"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Survived</th>\n    </tr>\n    <tr>\n      <th>AgeBucket</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0.0</th>\n      <td>0.576923</td>\n    </tr>\n    <tr>\n      <th>15.0</th>\n      <td>0.362745</td>\n    </tr>\n    <tr>\n      <th>30.0</th>\n      <td>0.423256</td>\n    </tr>\n    <tr>\n      <th>45.0</th>\n      <td>0.404494</td>\n    </tr>\n    <tr>\n      <th>60.0</th>\n      <td>0.240000</td>\n    </tr>\n    <tr>\n      <th>75.0</th>\n      <td>1.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 44
    }
   ],
   "source": [
    "train_data[\"AgeBucket\"] = train_data[\"Age\"] // 15 * 15\n",
    "train_data[[\"AgeBucket\", \"Survived\"]].groupby(['AgeBucket']).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 77,
   "metadata": {},
   "outputs": [
    {
     "output_type": "execute_result",
     "data": {
      "text/plain": [
       "                  Survived\n",
       "RelativesOnboard          \n",
       "0                 0.303538\n",
       "1                 0.552795\n",
       "2                 0.578431\n",
       "3                 0.724138\n",
       "4                 0.200000\n",
       "5                 0.136364\n",
       "6                 0.333333\n",
       "7                 0.000000\n",
       "10                0.000000"
      ],
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Survived</th>\n    </tr>\n    <tr>\n      <th>RelativesOnboard</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0.303538</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>0.552795</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>0.578431</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>0.724138</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>0.200000</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>0.136364</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>0.333333</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>10</th>\n      <td>0.000000</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "metadata": {},
     "execution_count": 77
    }
   ],
   "source": [
    "train_data[\"RelativesOnboard\"] = train_data[\"SibSp\"] + train_data[\"Parch\"]\n",
    "train_data[[\"RelativesOnboard\", \"Survived\"]].groupby(['RelativesOnboard']).mean()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ]
}