AutoPrognosis - Tutorial on using classifiers with explainers

[ ]:
# Install AutoPrognosis
!pip install autoprognosis
[ ]:
# stdlib
import json
import sys
import warnings

# third party
import numpy as np
import pandas as pd

warnings.filterwarnings("ignore")

# autoprognosis absolute
# autoprognosis
import autoprognosis.logger as log
from autoprognosis.studies.classifiers import ClassifierStudy

log.add(sink=sys.stderr, level="INFO")

Load dataset

AutoPrognosis expects pandas.DataFrames as input.

For this example, we will use the Breast Cancer Wisconsin Dataset.

[ ]:
# third party
# Load dataset
from sklearn.datasets import load_breast_cancer

X, Y = load_breast_cancer(return_X_y=True, as_frame=True)

X

Run a study with AutoPrognosis

[ ]:
dataset = X.copy()
dataset["target"] = Y
[ ]:
# List available classifiers

# autoprognosis absolute
from autoprognosis.plugins.prediction import Classifiers

Classifiers().list_available()
[ ]:
# stdlib
from pathlib import Path

workspace = Path("workspace")
study_name = "test_classification_studies"

study = ClassifierStudy(
    study_name=study_name,
    dataset=dataset,
    target="target",
    num_iter=100,  # DELETE THIS LINE FOR BETTER RESULTS.
    num_study_iter=1,  # DELETE THIS LINE FOR BETTER RESULTS.
    imputers=[],  # Dataset is complete, so imputation not necessary
    classifiers=[
        "logistic_regression",
        "perceptron",
        "xgboost",
        "decision_trees",
    ],  # DELETE THIS LINE FOR BETTER RESULTS.
    feature_scaling=[],
    score_threshold=0.4,
    workspace=workspace,
)
[ ]:
study.run()
[ ]:
# autoprognosis absolute
from autoprognosis.utils.serialization import load_model_from_file
from autoprognosis.utils.tester import evaluate_estimator

model_path = workspace / study_name / "model.p"

model = load_model_from_file(model_path)
[ ]:
model.name()
[ ]:
evaluate_estimator(model, X, Y)

Interpretability

[ ]:
# autoprognosis absolute
from autoprognosis.plugins.explainers import Explainers
[ ]:
# Explain using Kernel SHAP
explainer = Explainers().get(
    "kernel_shap",
    model,
    X,
    Y,
    feature_names=X.columns,
    task_type="classification",
)
explainer.plot(X.sample(frac=0.1))
[ ]:
# Explain using Risk Effect Size
explainer = Explainers().get(
    "risk_effect_size",
    model,
    X,
    Y,
    task_type="classification",
)

explainer.plot(X)

Value of information

[ ]:
def evaluate_for_effect_size(effect_size):
    exp = Explainers().get(
        "risk_effect_size",
        model,
        X,
        Y,
        task_type="classification",
        effect_size=effect_size,
    )

    important_features = exp.explain(X, effect_size).index.tolist()

    return important_features


def evaluate_using_important_feature(effect_size):
    filtered_model = load_model_from_file(model_path)

    important_features = evaluate_for_effect_size(effect_size)
    X_filtered = X[important_features]

    metrics = evaluate_estimator(
        filtered_model,
        X_filtered,
        Y,
    )

    print("\033[1mEvaluation for effect size \033[0m", effect_size)
    print(
        "    >>> \033[1mSelected features for effect size\033[0m ", important_features
    )
    print("    >>> \033[1mSelected features count\033[0m ", len(important_features))
    print("    >>> \033[1mEvaluation:\033[0m ")
    print(f"        >>>> score =  {metrics['str']}")
    print("========================================")
[ ]:
# Evaluate performance for difference feature subsets defined by effect size
for effect_size in [0.5, 1.0, 1.5, 2.0]:
    evaluate_using_important_feature(effect_size)

Congratulations!

Congratulations on completing this notebook tutorial! If you enjoyed this and would like to learn more about machine learning and AI for medicine, you can do so in the following ways!

Star AutoPrognosis on GitHub

The easiest way to help our community is just by starring the Repos! This helps raise awareness of the tools we’re building.

Check out our website and paper for AutoPrognosis

Learn more about our lab and other work