You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
When generating counterfactual explanations then looking at the factual in the explanation structure returned, I observe small differences. Namely, the value in column "BP" changes a bit. MWE follows:
import os
import random
from urllib.request import urlretrieve
import dice_ml
from lightgbm import LGBMRegressor
import numpy as np
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
def diabetes_df():
url = "https://www4.stat.ncsu.edu/~boos/var.select/diabetes.tab.txt"
# safety measure for MacOS, see
# https://docs.python.org/3/library/urllib.request.html#module-urllib.request
os.environ["no_proxy"] = "*"
file_name, _ = urlretrieve(url)
df = pd.read_csv(file_name, sep="\t").astype({"SEX": str}).astype({"SEX": "category"})
return df.sample(200, random_state=1)
def data_and_model(df, numerical, categorical, target_column):
np.random.seed(1)
numeric_transformer = Pipeline(steps=[("scaler", StandardScaler())])
categorical_transformer = Pipeline(steps=[("onehot", OneHotEncoder(handle_unknown="ignore"))])
transformations = ColumnTransformer(
transformers=[
("num", numeric_transformer, numerical),
("cat", categorical_transformer, categorical),
]
)
#
X = df.drop(target_column, axis=1)
y = df[target_column]
clf = Pipeline(steps=[("preprocessor", transformations), ("regressor", LGBMRegressor())])
model = clf.fit(X, y)
return X, y, model
# Data set
df = diabetes_df()
numerical = ["AGE", "BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6"]
categorical = ["SEX"]
x_train, y_train, model = data_and_model(df, numerical, categorical, "Y")
factuals = x_train[0:1]
seed = 5
random.seed(seed)
np.random.seed(seed)
# Ask for counterfactual explanations
df_for_dice = pd.concat([x_train, y_train], axis=1)
dice_data = dice_ml.Data(dataframe=df_for_dice, continuous_features=numerical, outcome_name="Y")
dice_model = dice_ml.Model(model=model, backend="sklearn", model_type="regressor")
dice_explainer = dice_ml.Dice(dice_data, dice_model, method="genetic")
features_to_vary = ["BMI", "BP", "S1", "S2", "S3", "S4", "S5", "S6"]
explanations = dice_explainer.generate_counterfactuals(
factuals,
total_CFs=5,
desired_range=[60, 90],
features_to_vary=features_to_vary,
posthoc_sparsity_algorithm="binary",
)
print(explanations.cf_examples_list[0].test_instance_df)
print(factuals)
And here is the output:
AGE SEX BMI BP S1 S2 S3 S4 S5 S6 Y
0 60 1 23.4 76.669998 247 148.0 65.0 3.8 5.1358 77 93.585579
AGE SEX BMI BP S1 S2 S3 S4 S5 S6
246 60 1 23.4 76.67 247 148.0 65.0 3.8 5.1358 77
The text was updated successfully, but these errors were encountered:
Yours look like a precision issue. Mine looks even worse: the categorical feature totally flipped! See the original factual (blue "V0") and the factual in the prediction (red "V0"). Hope @amit-sharma can throw some light upon. Many thanks.
When generating counterfactual explanations then looking at the factual in the explanation structure returned, I observe small differences. Namely, the value in column "BP" changes a bit. MWE follows:
And here is the output:
The text was updated successfully, but these errors were encountered: