Evaluation

Protocols and Metrics Code Examples

# --- CONFIG
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# ---------

# --- TRANSFORMATIONS
train_transform = transforms.Compose([
    RandomCrop(28, padding=4),
    ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
test_transform = transforms.Compose([
    ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
# ---------

# --- BENCHMARK CREATION
mnist_train = MNIST('./data/mnist', train=True,
                    download=True, transform=train_transform)
mnist_test = MNIST('./data/mnist', train=False,
                   download=True, transform=test_transform)
benchmark = nc_benchmark(
    mnist_train, mnist_test, 5, task_labels=False, seed=1234)
# ---------

# MODEL CREATION
model = SimpleMLP(num_classes=benchmark.n_classes)

# DEFINE THE EVALUATION PLUGIN AND LOGGER
# The evaluation plugin manages the metrics computation.
# It takes as argument a list of metrics and a list of loggers.
# The evaluation plugin calls the loggers to serialize the metrics
# and save them in persistent memory or print them in the standard output.

# log to Tensorboard
tb_logger = TensorboardLogger()

# log to text file
text_logger = TextLogger(open('log.txt', 'a'))

# print to stdout
interactive_logger = InteractiveLogger()

eval_plugin = EvaluationPlugin(
    accuracy_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loss_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    timing_metrics(epoch=True, epoch_running=True),
    cpu_usage_metrics(experience=True),
    ExperienceForgetting(),
    StreamConfusionMatrix(num_classes=benchmark.n_classes, save_image=False),
    disk_usage_metrics(minibatch=True, epoch=True, experience=True, stream=True),
    loggers=[interactive_logger, text_logger, tb_logger])

# CREATE THE STRATEGY INSTANCE (NAIVE)
cl_strategy = Naive(
    model, SGD(model.parameters(), lr=0.001, momentum=0.9),
    CrossEntropyLoss(), train_mb_size=500, train_epochs=1, test_mb_size=100,
    device=device, evaluator=eval_plugin)

# TRAINING LOOP
print('Starting experiment...')
results = []
for experience in benchmark.train_stream:
    print("Start of experience: ", experience.current_experience)
    print("Current Classes: ", experience.classes_in_this_experience)

    # train returns a dictionary which contains all the metric values
    res = cl_strategy.train(experience, num_workers=4)
    print('Training completed')

    print('Computing accuracy on the whole test set')
    # test also returns a dictionary which contains all the metric values
    results.append(cl_strategy.eval(benchmark.test_stream, num_workers=4))

🤝 Run it on Google Colab

You can run this chapter and play with it on Google Colaboratory:

Notebook currently unavailable.

Last updated