Trouble with ni_benchmark

🐛 **Describe the bug**
When using lists of self-defined training and test Pytorch dataset for domain-incremental learning with `ni_benchmark`, target labels are altered. 
I prepare training and test data from 2 domains both with the same target labels among [0, 1, 2] and wrap them with the `ni_benchmark` to perform domain-incremental learning. However, during training, it seems like the target labels for domain 2 are altered into [4, 5, 6] and the compiler gives an error regarding target label 4/5/6 is out of bounds for the chosen criterion. I want to use the same 3 output neurons to classify the same target labels [0, 1, 2] from 2 different domains, but I'm having trouble doing this with `ni_benchmark`.

🐜 **To Reproduce**
```
import torch
import argparse
import numpy as np
import torch.nn as nn
from torch.utils.data import Dataset
from torch.nn import Module, ReLU, Linear, Softmax
from avalanche.benchmarks.generators import ni_benchmark
from avalanche.benchmarks.utils import AvalancheDataset, make_classification_dataset
from avalanche.training.supervised import EWC
from avalanche.evaluation.metrics import (forgetting_metrics, accuracy_metrics, loss_metrics, bwt_metrics)
from avalanche.logging import InteractiveLogger, TensorboardLogger
from avalanche.training.plugins import EvaluationPlugin


# Create Pytorch dataset class
class dataSet(Dataset):
    def __init__(self, x, y):
        self.x = torch.as_tensor(x)
        self.targets = torch.as_tensor(y)

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, index):
        return self.x[index, :], self.targets[index]


# Create MLP model
class MLP(Module):
    def __init__(self, in_num, out_num, hidden_num1, hidden_num2, hidden_num3):
        super(MLP, self).__init__()
        self.fc1 = Linear(in_features=in_num, out_features=hidden_num1)
        self.relu1 = ReLU()

        self.fc2 = Linear(in_features=hidden_num1, out_features=hidden_num2)
        self.relu2 = ReLU()

        self.fc3 = Linear(in_features=hidden_num2, out_features=hidden_num3)
        self.relu3 = ReLU()

        self.fc4 = Linear(in_features=hidden_num3, out_features=out_num)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)

        x = self.fc2(x)
        x = self.relu2(x)

        x = self.fc3(x)
        x = self.relu3(x)

        x = self.fc4(x)
        return x


def main(args):
    # --- CONFIG
    # HYPERPARAMETERS FOR THE MODEL
    in_num = 155        # the number of input features
    hidden_num1 = 256   # number of neurons in hidden layers
    hidden_num2 = 128   # number of neurons in hidden layers
    hidden_num3 = 128   # number of neurons in hidden layers
    out_num = 3         # number of classes
    # ---------

    # GENERATE TRAINING AND TEST DATASETS FOR 2 DOMAINS
    x_train_1 = np.random.rand(100, 155).astype(np.float32) # domain 1
    x_test_1 = np.random.rand(50, 155).astype(np.float32)   # domain 1
    x_train_2 = np.random.rand(60, 155).astype(np.float32)  # domain 2
    x_test_2 = np.random.rand(30, 155).astype(np.float32)   # domain 2
    # ---------
    
    # GENERATE TARGET LABELS AMONG [0, 1, 2]
    y_train_1 = np.random.randint(3, size=100).astype(np.int_)  # domain 1
    y_test_1 = np.random.randint(3, size=50).astype(np.int_)    # domain 1
    y_train_2 = np.random.randint(3, size=60).astype(np.int_)   # domain 2
    y_test_2 = np.random.randint(3, size=30).astype(np.int_)    # domain 2
    # ---------

    print(np.unique(y_train_1), np.unique(y_test_1), np.unique(y_train_2), np.unique(y_test_2)) 

    # INSTANTIATE PYTORCH DATASETS
    train_1 = dataSet(x_train_1, y_train_1)
    test_1 = dataSet(x_test_1, y_test_1)
    train_2 = dataSet(x_train_2, y_train_2)
    test_2 = dataSet(x_test_2, y_test_2)
    # ---------

    # CHECK IF SELECTED GPU IS AVAILABLE OR USE CPU
    assert args.cuda == -1 or args.cuda >= 0, "cuda must be -1 or >= 0."
    device = torch.device(
        f"cuda:{args.cuda}" if torch.cuda.is_available() and args.cuda >= 0 else "cpu")
    print(f"Using device: {device}")
    # ---------

    # --- SCENARIO CREATION
    generic_scenario = ni_benchmark([train_1, train_2], [test_1, test_2], 2)
    # ---------

    # MODEL CREATION
    model = MLP(in_num, out_num, hidden_num1, hidden_num2, hidden_num3).to(device)
    optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum)
    criterion = torch.nn.CrossEntropyLoss()
    # ---------

    # DEFINE THE EVALUATION PLUGIN AND LOGGER
    interactive_logger = InteractiveLogger()

    eval_plugin = EvaluationPlugin(
        accuracy_metrics(minibatch=False, epoch=True, experience=True, stream=True),
        loss_metrics(minibatch=False, epoch=True, experience=True, stream=True),
        forgetting_metrics(experience=True, stream=True),
        bwt_metrics(experience=True, stream=True),
        loggers=[interactive_logger]
    )
    # ---------

    if args.ewc_mode == 'separate':
        args.decay_factor = None

    # CREATE STRATEGY
    strategy = EWC(
        model,
        optimizer,
        criterion,
        args.ewc_lambda,
        args.ewc_mode,
        decay_factor=args.decay_factor,
        train_epochs=args.epochs,
        device=device,
        train_mb_size=args.minibatch_size,
        evaluator=eval_plugin,
    )
    # ---------

    # TRAIN ON THE SELECTED SCENARIO WITH THE CHOSEN STRATEGY
    print("Starting experiment...")
    results = []
    acc_history = []
    for experience in generic_scenario.train_stream:
        print("Start training on experience ", experience.current_experience)
        strategy.train(experience)
        print("End training on experience", experience.current_experience)
        print("Computing accuracy on the test set")
        results.append(strategy.eval(generic_scenario.test_stream))

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "--ewc_mode",
        type=str,
        choices=["separate", "online"],
        default="separate",
        help="Choose between EWC and online.",
    )
    parser.add_argument(
        "--ewc_lambda",
        type=float,
        default=0.4,
        help="Penalty hyperparameter for EWC",
    )
    parser.add_argument(
        "--decay_factor",
        type=float,
        default=0.1,
        help="Decay factor for importance " "when ewc_mode is online.",
    )
    parser.add_argument("--optim", type=str, choices=["sgd"], default="sgd", help="Optimizer.")
    parser.add_argument("--lr", type=float, default=1e-3, help="Learning rate.")
    parser.add_argument("--momentum", type=float, default=9e-1, help="Momentum.")
    parser.add_argument(
        "--epochs", type=int, default=300, help="Number of training epochs."
    )
    parser.add_argument(
        "--minibatch_size", type=int, default=128, help="Minibatch size."
    )
    parser.add_argument(
        "--cuda",
        type=int,
        default=0,
        help="Specify GPU id to use. Use CPU if -1.",
    )
    args = parser.parse_args()

    main(args)
```

🐝 **Expected behavior**
I expect `ni_benchmark` to generate a domain-incremental stream from the two Pytorch datasets in the lists, namely [train_1, train_2] and [test_1, test_2], with the same target labels [0, 1, 2]. 

🐞 **Screenshots**
If you print `self.mb_output` and `self.mb_y` in the function `criterion` in `/avalanche/training/templates/problem_type/supervised_problem.py`. You can see the true target labels `self.mb_y` during training are altered as below:
```
self.mb_y = tensor([3, 0, 0, 1, 5, 0, 1, 0, 0, 1, 0, 5, 5, 5, 3, 1, 2, 2, 0, 1, 2, 3, 2, 5,
                                3, 3, 4, 0, 2, 0, 2, 0, 0, 1, 5, 0, 2, 0, 1, 1, 1, 0, 5, 0, 5, 5, 1, 2,
                                3, 1, 5, 1, 1, 5, 0, 1, 3, 0, 5, 3, 2, 4, 3, 3, 3, 4, 0, 2, 0, 2, 2, 1,
                                1, 1, 2, 4, 4, 4, 4, 1])
```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Trouble with ni_benchmark #1539

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Uh oh!

Trouble with ni_benchmark #1539

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions