csce585-mlsystems / project-athena

This is the course project for CSCE585: ML Systems. Students will build their machine learning systems based on the provided infrastructure --- Athena.
MIT License
13 stars 19 forks source link

Can't Evaluate Generated AEs #22

Closed Rasika-prog closed 4 years ago

Rasika-prog commented 4 years ago

We generated AEs and updated the data-mnist.json file

"task1_aes" : [
    "FGSM_eps0.1.npy",
    "FGSM_eps0.15.npy",
    "FGSM_eps0.2.npy",
    "FGSM_eps0.25.npy",
    "FGSM_eps0.3.npy",
    "PGD_eps0.1.npy",
    "PGD_eps0.15.npy",
    "PGD_eps0.2.npy",
    "PGD_eps0.25.npy",
    "PGD_eps0.3.npy"
]

However we get an indexing error when we try to evaluate the generated AEs in the provided 'Task1_GenerateAEs_ZeroKnowledgeModel' jupyter notebook tutorial.

Last two cells do not seem to work. We modified the last two cells. Please see below.

Evaluate the generated AEs

python example: tutorials/eval_model.py api: utils.metrics.error_rate from utils.model import load_pool from utils.metrics import error_rate, get_corrections from models.athena import Ensemble, ENSEMBLE_STRATEGY ​

copied from tutorials/eval_model.py

def evaluate(trans_configs, model_configs, data_configs, save=False, output_dir=None): """ Apply transformation(s) on images. :param trans_configs: dictionary. The collection of the parameterized transformations to test. in the form of { configsx: { param: value, } } The key of a configuration is 'configs'x, where 'x' is the id of corresponding weak defense. :param model_configs: dictionary. Defines model related information. Such as, location, the undefended model, the file format, etc. :param data_configs: dictionary. Defines data related information. Such as, location, the file for the true labels, the file for the benign samples, the files for the adversarial examples, etc. :param save: boolean. Save the transformed sample or not. :param output_dir: path or str. The location to store the transformed samples. It cannot be None when save is True. :return: """

Load the baseline defense (PGD-ADT model)

baseline = load_lenet(file=model_configs.get('pgd_trained'), trans_configs=None,
                              use_logits=False, wrap=False)

get the undefended model (UM)

file = os.path.join(model_configs.get('dir'), model_configs.get('um_file'))
undefended = load_lenet(file=file,
                        trans_configs=trans_configs.get('configs0'),
                        wrap=True)
print(">>> um:", type(undefended))

load weak defenses into a pool

pool, _ = load_pool(trans_configs=trans_configs,
                    model_configs=model_configs,
                    active_list=True,
                    wrap=True)
# create an AVEP ensemble from the WD pool
wds = list(pool.values())
print(">>> wds:", type(wds), type(wds[0]))
ensemble = Ensemble(classifiers=wds, strategy=ENSEMBLE_STRATEGY.AVEP.value)

load the benign samples

bs_file = os.path.join(data_configs.get('dir'), data_configs.get('bs_file'))
x_bs = np.load(bs_file)
img_rows, img_cols = x_bs.shape[1], x_bs.shape[2]

load the corresponding true labels

label_file = os.path.join(data_configs.get('dir'), data_configs.get('label_file'))
labels = np.load(label_file)

get indices of benign samples that are correctly classified by the targeted model

print(">>> Evaluating UM on [{}], it may take a while...".format(bs_file))
pred_bs = undefended.predict(x_bs)
corrections = get_corrections(y_pred=pred_bs, y_true=labels)

Evaluate AEs.

results = {}
ae_list = data_configs.get('task1_aes')
ae_file = os.path.join(data_configs.get('dir'), ae_list[0])
x_adv = np.load(ae_file)

evaluate the undefended model on the AE

print(">>> Evaluating UM on [{}], it may take a while...".format(ae_file))
pred_adv_um = undefended.predict(x_adv)
err_um = error_rate(y_pred=pred_adv_um, y_true=labels, correct_on_bs=corrections)
# track the result
results['UM'] = err_um

evaluate the ensemble on the AE

print(">>> Evaluating ensemble on [{}], it may take a while...".format(ae_file))
pred_adv_ens = ensemble.predict(x_adv)
err_ens = error_rate(y_pred=pred_adv_ens, y_true=labels, correct_on_bs=corrections)
# track the result
results['Ensemble'] = err_ens

evaluate the baseline on the AE

print(">>> Evaluating baseline model on [{}], it may take a while...".format(ae_file))
pred_adv_bl = baseline.predict(x_adv)
err_bl = error_rate(y_pred=pred_adv_bl, y_true=labels, correct_on_bs=corrections)
# track the result
results['PGD-ADT'] = err_bl

TODO: collect and dump the evaluation results to file(s) such that you can analyze them later.

print(">>> Evaluations on [{}]:\n{}".format(ae_list, results))

load experiment configurations

trans_configs = load_from_json("../src/configs/demo/athena-mnist.json") model_configs = load_from_json("../src/configs/demo/model-mnist.json") data_configs = load_from_json("../src/configs/demo/data-mnist.json")

output_root = "../results"

evaluate

evaluate(trans_configs=trans_configs, model_configs=model_configs, data_configs=data_configs, save=True, output_dir=output_root)

load experiment configurations

trans_configs = load_from_json("../src/configs/demo/athena-mnist.json") model_configs = load_from_json("../src/configs/demo/model-mnist.json") data_configs = load_from_json("../src/configs/demo/data-mnist.json") ​ output_root = "../results" ​

evaluate

evaluate(trans_configs=trans_configs, model_configs=model_configs, data_configs=data_configs, save=True, output_dir=output_root)

Loading model [../models/baseline/advTrained-mnist-adtC.h5]... Loading model [../models/cnn/model-mnist-cnn-clean.h5]... um: <class 'models.keras.WeakDefense'> Loading model [../models/cnn/model-mnist-cnn-flip_horizontal.h5]... Loading model [../models/cnn/model-mnist-cnn-affine_both_stretch.h5]... Loading model [../models/cnn/model-mnist-cnn-morph_gradient.h5]... Loaded 3 models. wds: <class 'list'> <class 'models.keras.WeakDefense'> Evaluating UM on [../data/subsamples-mnist-ratio_0.1-200530.156.npy], it may take a while... Evaluating UM on [../data/FGSM_eps0.1.npy], it may take a while...

IndexError Traceback (most recent call last)

in 11 data_configs=data_configs, 12 save=True, ---> 13 output_dir=output_root)
in evaluate(trans_configs, model_configs, data_configs, save, output_dir) 69 print(">>> Evaluating UM on [{}], it may take a while...".format(ae_file)) 70 pred_adv_um = undefended.predict(x_adv) ---> 71 err_um = error_rate(y_pred=pred_adv_um, y_true=labels, correct_on_bs=corrections) 72 # track the result 73 results['UM'] = err_um ~\PycharmProjects\project-athena-master\src\utils\metrics.py in error_rate(y_pred, y_true, correct_on_bs) 27 # that is f(x') != f(x). 28 if correct_on_bs is not None: ---> 29 num_fooled = np.sum([1. for i in range(amount) if (i in correct_on_bs) and (y_pred[i] != y_true[i])]) 30 else: 31 num_fooled = np.sum([1. for i in range(amount) if (y_pred[i] != y_true[i])]) ~\PycharmProjects\project-athena-master\src\utils\metrics.py in (.0) 27 # that is f(x') != f(x). 28 if correct_on_bs is not None: ---> 29 num_fooled = np.sum([1. for i in range(amount) if (i in correct_on_bs) and (y_pred[i] != y_true[i])]) 30 else: 31 num_fooled = np.sum([1. for i in range(amount) if (y_pred[i] != y_true[i])]) IndexError: index 5 is out of bounds for axis 0 with size 5
MENG2010 commented 4 years ago

incorrect data were fed. or did not proceed with the data correctly.