diff --git "a/Lecci\303\263n_Random_Forest.ipynb" "b/Lecci\303\263n_Random_Forest.ipynb" new file mode 100644 index 00000000..82f065a2 --- /dev/null +++ "b/Lecci\303\263n_Random_Forest.ipynb" @@ -0,0 +1,817 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "id": "8ECiyrDL33HU" + }, + "outputs": [], + "source": [ + "import numpy as np\n", + "from numpy.random import seed, choice\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from collections import Counter\n", + "from sklearn.datasets import load_breast_cancer\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.metrics import *\n", + "from imblearn.metrics import specificity_score\n", + "import pandas as pd" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "_cq6PlyXx76r" + }, + "source": [ + "# 1.Creación de la clase RandomForest para problemas de clasificación\n", + "\n", + "Cada árbol (un total de n_estimators) se construye con un subconjunto del dataset con reemplazo (Bootstrap) y un subonjunto de variables. La variedad en las predicciones hace que RF sea más efectivo.\n", + "\n", + "La técnica aplicada se conoce como Bagging = Bootstrap + Aggregating." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": { + "id": "cuBn07pYxMt6" + }, + "outputs": [], + "source": [ + "class RandomForestBootstrap:\n", + "\n", + " def __init__(self, n_estimators, random_state, max_depth, min_samples_leaf, max_features, X, y):\n", + " self.n_estimators = n_estimators\n", + " self.random_state = random_state\n", + " self.max_depth = max_depth\n", + " self.min_samples_leaf = min_samples_leaf\n", + " self.max_features = max_features\n", + " self.X = X\n", + " self.y = y\n", + " self.models = []\n", + "\n", + " def get_bootstrap_datasets(self):\n", + " # Método para obtener conjuntos de datos bootstrap\n", + " seed(self.random_state) # Se utiliza una semilla aleatoria para reproducir el experimento\n", + " # Genera los índices bootstrap\n", + " idxs = [choice(len(self.X), len(self.X), replace=True) for _ in range(self.n_estimators)]\n", + " # Selección aleatoria de índices de características (max_features)\n", + " feature_idxs = [choice(range(self.X.shape[1]), self.max_features, replace=False) for _ in range(self.n_estimators)]\n", + " # Retorna los índices de las columnas y los datasets bootstrap como una lista de tuplas (X_bootstrap, y_bootstrap)\n", + " return feature_idxs, [(self.X[idxs[i],:][:,feature_idxs[i]], self.y[idxs[i]]) for i in range(self.n_estimators)]\n", + "\n", + " def fit(self):\n", + " # Método para ajustar el modelo (entrenar los árboles del bosque)\n", + " feature_idxs, data_sets = self.get_bootstrap_datasets()\n", + " for i, data in enumerate(data_sets):\n", + " X, y = data\n", + " # Si es clasificación, se entrena un árbol de clasificación\n", + " self.models.append((feature_idxs[i], DecisionTreeClassifier(max_depth=self.max_depth, min_samples_leaf=self.min_samples_leaf, max_features=self.max_features, random_state=self.random_state).fit(X, y)))\n", + "\n", + " def predict(self, X):\n", + " # Si hay modelos entrenados\n", + " predictions = np.vstack([model.predict(X[:,idxs]) for idxs, model in self.models])\n", + " # Se devuelve la clase más común entre las predicciones\n", + " predicciones = [Counter(predictions[:,i]).most_common(1)[0][0] for i in range(predictions.shape[1])]\n", + " return predicciones" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vCNSOLOI_e1m" + }, + "source": [ + "# 2.Uso para una tarea de clasificación con un modelo simple y otro complejo" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": { + "id": "9FkBX_2I_obQ" + }, + "outputs": [], + "source": [ + "# Cargar el conjunto de datos de cáncer de mama\n", + "data = load_breast_cancer()\n", + "X = data.data\n", + "y = data.target\n", + "\n", + "# Dividir los datos en conjuntos de entrenamiento y prueba\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "JHrb0CeUBHNX" + }, + "outputs": [], + "source": [ + "# Crear y entrenar un bosque simple\n", + "rf_simple = RandomForestBootstrap(n_estimators=5, max_depth=2, min_samples_leaf=20, max_features=X_train.shape[1]//2, random_state=42, X=X_train, y=y_train)\n", + "rf_simple.fit()\n", + "\n", + "# Bosque complejo\n", + "rf_complex = RandomForestBootstrap(n_estimators=100, max_depth=20, min_samples_leaf=1, max_features=int(X_train.shape[1]*0.8), random_state=42, X=X_train, y=y_train)\n", + "rf_complex.fit()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "0ijqDEFVFQah" + }, + "outputs": [], + "source": [ + "# Evaluar el modelo complejo en entrenamiento y prueba\n", + "train_pred_complex = rf_complex.predict(X_train)\n", + "test_pred_complex = rf_complex.predict(X_test)\n", + "\n", + "# Evaluar el modelo simple en entrenamiento y prueba\n", + "train_pred_simple = rf_simple.predict(X_train)\n", + "test_pred_simple = rf_simple.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "YyyYE-ZsIV7L" + }, + "outputs": [], + "source": [ + "def get_metrics(y_train, y_test, y_pred_train, y_pred_test):\n", + " # Calcular métricas para el conjunto de entrenamiento\n", + " train_accuracy = accuracy_score(y_train, y_pred_train)\n", + " train_f1 = f1_score(y_train, y_pred_train)\n", + " train_auc = roc_auc_score(y_train, y_pred_train)\n", + " train_precision = precision_score(y_train, y_pred_train)\n", + " train_recall = recall_score(y_train, y_pred_train)\n", + " train_specificity = specificity_score(y_train, y_pred_train)\n", + "\n", + " # Calcular métricas para el conjunto de prueba\n", + " test_accuracy = accuracy_score(y_test, y_pred_test)\n", + " test_f1 = f1_score(y_test, y_pred_test)\n", + " test_auc = roc_auc_score(y_test, y_pred_test)\n", + " test_precision = precision_score(y_test, y_pred_test)\n", + " test_recall = recall_score(y_test, y_pred_test)\n", + " test_specificity = specificity_score(y_test, y_pred_test)\n", + "\n", + " # Calcular la diferencia entre métricas de entrenamiento y prueba\n", + " diff_accuracy = train_accuracy - test_accuracy\n", + " diff_f1 = train_f1 - test_f1\n", + " diff_auc = train_auc - test_auc\n", + " diff_precision = train_precision - test_precision\n", + " diff_recall = train_recall - test_recall\n", + " diff_specificity = train_specificity - test_specificity\n", + "\n", + " # Crear un DataFrame con los resultados\n", + " metrics_df = pd.DataFrame([[train_accuracy, train_f1, train_auc, train_precision, train_recall, train_specificity],[test_accuracy, test_f1, test_auc, test_precision, test_recall, test_specificity],[diff_accuracy, diff_f1, diff_auc, diff_precision, diff_recall, diff_specificity]],\n", + " columns = ['Accuracy', 'F1', 'AUC', 'Precision', 'Recall', 'Specificity'],\n", + " index = ['Train','Test', 'Diferencia'])\n", + "\n", + " return metrics_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "Ekaem9mpBC9Z", + "outputId": "62f8b9f3-227e-4209-bd37-14ff4e5f3f8c" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AccuracyF1AUCPrecisionRecallSpecificity
Train1.000001.0000001.0000001.0000001.0000001.000000
Test0.956140.9650350.9510320.9583330.9718310.930233
Diferencia0.043860.0349650.0489680.0416670.0281690.069767
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Accuracy F1 AUC Precision Recall Specificity\n", + "Train 1.00000 1.000000 1.000000 1.000000 1.000000 1.000000\n", + "Test 0.95614 0.965035 0.951032 0.958333 0.971831 0.930233\n", + "Diferencia 0.04386 0.034965 0.048968 0.041667 0.028169 0.069767" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Métricas del modelo complejo\n", + "get_metrics(y_train, y_test, train_pred_complex, test_pred_complex)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 143 + }, + "id": "k1KJp3wBIqYS", + "outputId": "3f48b14d-5242-448e-b547-8458e746d6ff" + }, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AccuracyF1AUCPrecisionRecallSpecificity
Train0.9560440.9646640.9565630.9750000.9545450.958580
Test0.9561400.9655170.9464460.9459460.9859150.906977
Diferencia-0.000096-0.0008530.0101170.029054-0.0313700.051603
\n", + "
\n", + "
\n", + "\n", + "
\n", + " \n", + "\n", + " \n", + "\n", + " \n", + "
\n", + "\n", + "\n", + "
\n", + " \n", + "\n", + "\n", + "\n", + " \n", + "
\n", + "
\n", + "
\n" + ], + "text/plain": [ + " Accuracy F1 AUC Precision Recall Specificity\n", + "Train 0.956044 0.964664 0.956563 0.975000 0.954545 0.958580\n", + "Test 0.956140 0.965517 0.946446 0.945946 0.985915 0.906977\n", + "Diferencia -0.000096 -0.000853 0.010117 0.029054 -0.031370 0.051603" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Métricas del modelo simple\n", + "get_metrics(y_train, y_test, train_pred_simple, test_pred_simple)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NWKZ1XQDJ0JI" + }, + "source": [ + "El modelo simple muestra mucha menos varianza que el modelo complejo y además tiene un sesgo bajo ya que obtiene buenos resultados en ambos conjuntos. El modelo complejo memoriza y tiene alta varianza.\n", + "\n", + "*Si cambias el muestreo (random_state en la función train_test_split) probablemente verás que el modelo simple con 2 estimadores funciona mejor sobre el conjunto de entrenamiento que sobre el conjunto de test, este mejor rendimiento en el conjuto de test puede ser debido al muestreo realizado, ya que a veces el conjunto de entrenamiento puede ser más difícil que el conjunto de prueba debido a la variabilidad en los datos de entrenamiento." + ] + } + ], + "metadata": { + "colab": { + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.5" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/random.ipynb b/random.ipynb new file mode 100644 index 00000000..2d6cdff1 --- /dev/null +++ b/random.ipynb @@ -0,0 +1,346 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "from ydata_profiling import ProfileReport\n", + "import numpy as np\n", + "from sklearn.tree import DecisionTreeClassifier\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "from sklearn.datasets import load_breast_cancer\n", + "from sklearn.model_selection import train_test_split\n", + "from imblearn.metrics import specificity_score\n", + "from sklearn.metrics import *\n", + "\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "metadata": {}, + "outputs": [], + "source": [ + "train_data = pd.read_csv('C:\\\\Users\\\\gamma\\\\Desktop\\\\Bootcamp\\\\13-Tree_decision\\\\tree-decision\\\\src\\\\datos_entrenados.csv')\n", + "test_data = pd.read_csv('C:\\\\Users\\\\gamma\\\\Desktop\\\\Bootcamp\\\\13-Tree_decision\\\\tree-decision\\\\src\\\\datos_testeo.csv')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "metadata": {}, + "outputs": [], + "source": [ + "X_train = train_data.drop([\"Outcome\"], axis = 1)\n", + "y_train = train_data[\"Outcome\"]\n", + "X_test = test_data.drop([\"Outcome\"], axis = 1)\n", + "y_test = test_data[\"Outcome\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
RandomForestClassifier(n_estimators=60, random_state=42)
In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
" + ], + "text/plain": [ + "RandomForestClassifier(n_estimators=60, random_state=42)" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "modelo = RandomForestClassifier(n_estimators=60, random_state=42)\n", + "modelo.fit(X_train, y_train)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,\n", + " 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0,\n", + " 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1,\n", + " 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,\n", + " 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1,\n", + " 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1,\n", + " 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],\n", + " dtype=int64)" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_pred = modelo.predict(X_test)\n", + "y_pred" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7727272727272727" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.metrics import accuracy_score\n", + "\n", + "accuracy_score(y_test, y_pred)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "La mejor combinación de pistas es: {'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 60}\n", + "Y con estas pistas, la precisión del detective es del 79.63%\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV, RandomizedSearchCV\n", + "\n", + "# Estas son todas las llaves diferentes que vamos a probar en la cerradura del tesoro.\n", + "# 'criterion' es el tipo de pista que el detective prefiere.\n", + "# 'max_depth' es qué tan lejos el detective sigue una sola pista antes de probar otra.\n", + "# 'min_samples_split' es cuántas pistas necesita el detective antes de decidir seguir un camino.\n", + "# 'min_samples_leaf' es el número mínimo de pistas finales que el detective quiere tener para estar seguro de su decisión.\n", + "hyperparams = {\n", + " \"n_estimators\": [40, 50, 60, 100],\n", + " \"criterion\": [\"gini\", \"entropy\"],\n", + " \"max_depth\": [1, 5, 10, 20, 30, 40],\n", + " \"min_samples_split\": [2, 3, 5, 10],\n", + " \"min_samples_leaf\": [2, 3, 4]\n", + "}\n", + "\n", + "# 'GridSearchCV' es como un robot que prueba todas las llaves por ti para ver cuál funciona mejor.\n", + "# Le damos el robot (GridSearchCV), nuestro libro de aventuras (modelo_arbol), todas las llaves diferentes (hyperparams),\n", + "# y le decimos que queremos encontrar la llave que nos da la mejor precisión (scoring='accuracy').\n", + "# El 'cv=10' significa que el robot va a probar cada llave 10 veces para estar realmente seguro de cuál es la mejor.\n", + "grid = GridSearchCV(modelo, hyperparams, scoring='accuracy', cv=10)\n", + "\n", + "# Ahora, le decimos al robot que comience a probar todas las llaves.\n", + "grid.fit(X_train, y_train)\n", + "mejores_parametros = grid.best_params_\n", + "\n", + "# Después de probar todas las llaves, el robot nos dirá cuál es la mejor combinación para abrir el tesoro.\n", + "print(f\"La mejor combinación de pistas es: {mejores_parametros}\")\n", + "print(f\"Y con estas pistas, la precisión del detective es del {grid.best_score_ * 100:.2f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "La precisión de nuestra llave especial es del 75.32%\n" + ] + } + ], + "source": [ + "# Aquí estamos creando un nuevo cofre del tesoro, pero esta vez usando la llave especial que encontramos.\n", + "# 'mejores_parametros' es como las instrucciones secretas de cómo usar la llave.\n", + "# 'random_state = 42' es como un hechizo mágico para asegurarnos de que cada vez que intentemos abrir el cofre, todo suceda de la misma manera.\n", + "modelo_arbol_mejorado = RandomForestClassifier(**mejores_parametros, random_state = 42)\n", + "\n", + "# Ahora, le damos al cofre todas las cosas que sabemos sobre tesoros (X_train) y lo que realmente había dentro de los cofres antiguos (y_train).\n", + "# Esto es para enseñarle al cofre cómo usar la llave especial.\n", + "modelo_arbol_mejorado.fit(X_train, y_train)\n", + "\n", + "# Llegó el momento de la verdad. Vamos a ver si nuestra llave especial puede predecir lo que hay dentro de nuevos cofres del tesoro (X_test).\n", + "y_pred = modelo_arbol_mejorado.predict(X_test)\n", + "\n", + "# Para saber qué tan buena es nuestra llave especial, comparamos lo que predijo con lo que realmente había dentro de los cofres (y_test).\n", + "# Esto es como contar cuántas veces acertamos al adivinar lo que había dentro del cofre.\n", + "accuracy = accuracy_score(y_test, y_pred)\n", + "\n", + "# Finalmente, mostramos qué tan buena fue nuestra llave especial al abrir los cofres.\n", + "print(f\"La precisión de nuestra llave especial es del {accuracy * 100:.2f}%\")" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [], + "source": [ + "# Evaluar el modelo complejo en entrenamiento y prueba\n", + "train_pred = modelo_arbol_mejorado.predict(X_train)\n", + "test_pred = modelo_arbol_mejorado.predict(X_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "def get_metrics(y_train, y_test, y_pred_train, y_pred_test):\n", + " # Calcular métricas para el conjunto de entrenamiento\n", + " train_accuracy = accuracy_score(y_train, y_pred_train)\n", + " train_f1 = f1_score(y_train, y_pred_train)\n", + " train_auc = roc_auc_score(y_train, y_pred_train)\n", + " train_precision = precision_score(y_train, y_pred_train)\n", + " train_recall = recall_score(y_train, y_pred_train)\n", + " train_specificity = specificity_score(y_train, y_pred_train)\n", + " # Calcular métricas para el conjunto de prueba\n", + " test_accuracy = accuracy_score(y_test, y_pred_test)\n", + " test_f1 = f1_score(y_test, y_pred_test)\n", + " test_auc = roc_auc_score(y_test, y_pred_test)\n", + " test_precision = precision_score(y_test, y_pred_test)\n", + " test_recall = recall_score(y_test, y_pred_test)\n", + " test_specificity = specificity_score(y_test, y_pred_test)\n", + " # Calcular la diferencia entre métricas de entrenamiento y prueba\n", + " diff_accuracy = train_accuracy - test_accuracy\n", + " diff_f1 = train_f1 - test_f1\n", + " diff_auc = train_auc - test_auc\n", + " diff_precision = train_precision - test_precision\n", + " diff_recall = train_recall - test_recall\n", + " diff_specificity = train_specificity - test_specificity\n", + " # Crear un DataFrame con los resultados\n", + " metrics_df = pd.DataFrame([[train_accuracy, train_f1, train_auc, train_precision, train_recall, train_specificity],[test_accuracy, test_f1, test_auc, test_precision, test_recall, test_specificity],[diff_accuracy, diff_f1, diff_auc, diff_precision, diff_recall, diff_specificity]],\n", + " columns = ['Accuracy', 'F1', 'AUC', 'Precision', 'Recall', 'Specificity'],\n", + " index = ['Train','Test', 'Diferencia'])\n", + " return metrics_df" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
AccuracyF1AUCPrecisionRecallSpecificity
Train0.9837130.9763030.9798270.9856460.9671360.992519
Test0.7532470.6607140.7353540.6491230.6727270.797980
Diferencia0.2304670.3155890.2444740.3365230.2944090.194539
\n", + "
" + ], + "text/plain": [ + " Accuracy F1 AUC Precision Recall Specificity\n", + "Train 0.983713 0.976303 0.979827 0.985646 0.967136 0.992519\n", + "Test 0.753247 0.660714 0.735354 0.649123 0.672727 0.797980\n", + "Diferencia 0.230467 0.315589 0.244474 0.336523 0.294409 0.194539" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "get_metrics(y_train, y_test, train_pred, test_pred)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}