AutoPrognosis - Tutorial on using classifiers with explainers
[ ]:
# Install AutoPrognosis
!pip install autoprognosis
[ ]:
# stdlib
import json
import sys
import warnings
# third party
import numpy as np
import pandas as pd
warnings.filterwarnings("ignore")
# autoprognosis absolute
# autoprognosis
import autoprognosis.logger as log
from autoprognosis.studies.classifiers import ClassifierStudy
log.add(sink=sys.stderr, level="INFO")
Load dataset
AutoPrognosis expects pandas.DataFrames as input.
For this example, we will use the Breast Cancer Wisconsin Dataset.
[ ]:
# third party
# Load dataset
from sklearn.datasets import load_breast_cancer
X, Y = load_breast_cancer(return_X_y=True, as_frame=True)
X
Run a study with AutoPrognosis
[ ]:
dataset = X.copy()
dataset["target"] = Y
[ ]:
# List available classifiers
# autoprognosis absolute
from autoprognosis.plugins.prediction import Classifiers
Classifiers().list_available()
[ ]:
# stdlib
from pathlib import Path
workspace = Path("workspace")
study_name = "test_classification_studies"
study = ClassifierStudy(
study_name=study_name,
dataset=dataset,
target="target",
num_iter=100, # DELETE THIS LINE FOR BETTER RESULTS.
num_study_iter=1, # DELETE THIS LINE FOR BETTER RESULTS.
imputers=[], # Dataset is complete, so imputation not necessary
classifiers=[
"logistic_regression",
"perceptron",
"xgboost",
"decision_trees",
], # DELETE THIS LINE FOR BETTER RESULTS.
feature_scaling=[],
score_threshold=0.4,
workspace=workspace,
)
[ ]:
study.run()
[ ]:
# autoprognosis absolute
from autoprognosis.utils.serialization import load_model_from_file
from autoprognosis.utils.tester import evaluate_estimator
model_path = workspace / study_name / "model.p"
model = load_model_from_file(model_path)
[ ]:
model.name()
[ ]:
evaluate_estimator(model, X, Y)
Interpretability
[ ]:
# autoprognosis absolute
from autoprognosis.plugins.explainers import Explainers
[ ]:
# Explain using Kernel SHAP
explainer = Explainers().get(
"kernel_shap",
model,
X,
Y,
feature_names=X.columns,
task_type="classification",
)
explainer.plot(X.sample(frac=0.1))
[ ]:
# Explain using Risk Effect Size
explainer = Explainers().get(
"risk_effect_size",
model,
X,
Y,
task_type="classification",
)
explainer.plot(X)
Value of information
[ ]:
def evaluate_for_effect_size(effect_size):
exp = Explainers().get(
"risk_effect_size",
model,
X,
Y,
task_type="classification",
effect_size=effect_size,
)
important_features = exp.explain(X, effect_size).index.tolist()
return important_features
def evaluate_using_important_feature(effect_size):
filtered_model = load_model_from_file(model_path)
important_features = evaluate_for_effect_size(effect_size)
X_filtered = X[important_features]
metrics = evaluate_estimator(
filtered_model,
X_filtered,
Y,
)
print("\033[1mEvaluation for effect size \033[0m", effect_size)
print(
" >>> \033[1mSelected features for effect size\033[0m ", important_features
)
print(" >>> \033[1mSelected features count\033[0m ", len(important_features))
print(" >>> \033[1mEvaluation:\033[0m ")
print(f" >>>> score = {metrics['str']}")
print("========================================")
[ ]:
# Evaluate performance for difference feature subsets defined by effect size
for effect_size in [0.5, 1.0, 1.5, 2.0]:
evaluate_using_important_feature(effect_size)
Congratulations!
Congratulations on completing this notebook tutorial! If you enjoyed this and would like to learn more about machine learning and AI for medicine, you can do so in the following ways!
Star AutoPrognosis on GitHub
The easiest way to help our community is just by starring the Repos! This helps raise awareness of the tools we’re building.