From f963267acd5d84a19d388129683496b01b5de4b2 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Sun, 19 Apr 2020 12:59:40 +0100 Subject: [PATCH 1/2] adds integration test classification --- .github/workflows/setupapp.yml | 2 +- tests/test_integration_classification_2d.py | 242 ++++++++++++++++++++ 2 files changed, 243 insertions(+), 1 deletion(-) create mode 100644 tests/test_integration_classification_2d.py diff --git a/.github/workflows/setupapp.yml b/.github/workflows/setupapp.yml index e9c71c361f..f435381967 100644 --- a/.github/workflows/setupapp.yml +++ b/.github/workflows/setupapp.yml @@ -18,7 +18,7 @@ jobs: which python python -m pip install --upgrade pip --no-cache-dir python -m pip uninstall -y torch torchvision - python -m pip install -q -r requirements.txt --no-cache-dir + python -m pip install --upgrade -q -r requirements.txt --no-cache-dir python -m pip list - name: Run unit tests report coverage run: | diff --git a/tests/test_integration_classification_2d.py b/tests/test_integration_classification_2d.py new file mode 100644 index 0000000000..d4528e9080 --- /dev/null +++ b/tests/test_integration_classification_2d.py @@ -0,0 +1,242 @@ +# Copyright 2020 MONAI Consortium +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import shutil +import subprocess +import tarfile +import tempfile +import unittest + +import numpy as np +import torch +from torch.utils.data import DataLoader + +import monai +from monai.metrics import compute_roc_auc +from monai.networks.nets import densenet121 +from monai.transforms import (AddChannel, Compose, LoadPNG, RandFlip, RandRotate, RandZoom, Resize, ScaleIntensity, + ToTensor) +from tests.utils import skip_if_quick + +TEST_DATA_URL = 'https://www.dropbox.com/s/5wwskxctvcxiuea/MedNIST.tar.gz' + + +class MedNISTDataset(torch.utils.data.Dataset): + + def __init__(self, image_files, labels, transforms): + self.image_files = image_files + self.labels = labels + self.transforms = transforms + + def __len__(self): + return len(self.image_files) + + def __getitem__(self, index): + return self.transforms(self.image_files[index]), self.labels[index] + + +def run_training_test(root_dir, train_x, train_y, val_x, val_y, device=torch.device("cuda:0")): + + monai.config.print_config() + # define transforms for image and classification + train_transforms = Compose([ + LoadPNG(), + AddChannel(), + ScaleIntensity(), + RandRotate(degrees=15, prob=0.5), + RandFlip(spatial_axis=0, prob=0.5), + RandZoom(min_zoom=0.9, max_zoom=1.1, prob=0.5), + Resize(spatial_size=(64, 64), mode='constant'), + ToTensor() + ]) + train_transforms.set_random_state(1234) + val_transforms = Compose([LoadPNG(), AddChannel(), ScaleIntensity(), ToTensor()]) + + # create train, val data loaders + train_ds = MedNISTDataset(train_x, train_y, train_transforms) + train_loader = DataLoader(train_ds, batch_size=300, shuffle=True, num_workers=10) + + val_ds = MedNISTDataset(val_x, val_y, val_transforms) + val_loader = DataLoader(val_ds, batch_size=300, num_workers=10) + + model = densenet121( + spatial_dims=2, + in_channels=1, + out_channels=len(np.unique(train_y)), + ).to(device) + loss_function = torch.nn.CrossEntropyLoss() + optimizer = torch.optim.Adam(model.parameters(), 1e-5) + epoch_num = 4 + val_interval = 1 + + # start training validation + best_metric = -1 + best_metric_epoch = -1 + epoch_loss_values = list() + metric_values = list() + model_filename = os.path.join(root_dir, 'best_metric_model.pth') + for epoch in range(epoch_num): + print('-' * 10) + print('Epoch {}/{}'.format(epoch + 1, epoch_num)) + model.train() + epoch_loss = 0 + step = 0 + for batch_data in train_loader: + step += 1 + inputs, labels = batch_data[0].to(device), batch_data[1].to(device) + optimizer.zero_grad() + outputs = model(inputs) + loss = loss_function(outputs, labels) + loss.backward() + optimizer.step() + epoch_loss += loss.item() + epoch_loss /= step + epoch_loss_values.append(epoch_loss) + print("epoch %d average loss:%0.4f" % (epoch + 1, epoch_loss)) + + if (epoch + 1) % val_interval == 0: + model.eval() + with torch.no_grad(): + y_pred = torch.tensor([], dtype=torch.float32, device=device) + y = torch.tensor([], dtype=torch.long, device=device) + for val_data in val_loader: + val_images, val_labels = val_data[0].to(device), val_data[1].to(device) + y_pred = torch.cat([y_pred, model(val_images)], dim=0) + y = torch.cat([y, val_labels], dim=0) + auc_metric = compute_roc_auc(y_pred, y, to_onehot_y=True, add_softmax=True) + metric_values.append(auc_metric) + acc_value = torch.eq(y_pred.argmax(dim=1), y) + acc_metric = acc_value.sum().item() / len(acc_value) + if auc_metric > best_metric: + best_metric = auc_metric + best_metric_epoch = epoch + 1 + torch.save(model.state_dict(), model_filename) + print('saved new best metric model') + print("current epoch %d current AUC: %0.4f current accuracy: %0.4f best AUC: %0.4f at epoch %d" % + (epoch + 1, auc_metric, acc_metric, best_metric, best_metric_epoch)) + print('train completed, best_metric: %0.4f at epoch: %d' % (best_metric, best_metric_epoch)) + return epoch_loss_values, best_metric, best_metric_epoch + + +def run_inference_test(root_dir, test_x, test_y, device=torch.device("cuda:0")): + # define transforms for image and classification + val_transforms = Compose([LoadPNG(), AddChannel(), ScaleIntensity(), ToTensor()]) + val_ds = MedNISTDataset(test_x, test_y, val_transforms) + val_loader = DataLoader(val_ds, batch_size=300, num_workers=10) + + model = densenet121( + spatial_dims=2, + in_channels=1, + out_channels=len(np.unique(test_y)), + ).to(device) + + model_filename = os.path.join(root_dir, 'best_metric_model.pth') + model.load_state_dict(torch.load(model_filename)) + model.eval() + y_true = list() + y_pred = list() + with torch.no_grad(): + for test_data in val_loader: + test_images, test_labels = test_data[0].to(device), test_data[1].to(device) + pred = model(test_images).argmax(dim=1) + for i in range(len(pred)): + y_true.append(test_labels[i].item()) + y_pred.append(pred[i].item()) + tps = [np.sum((np.asarray(y_true) == idx) & (np.asarray(y_pred) == idx)) for idx in np.unique(test_y)] + return tps + + +class IntegrationClassification2D(unittest.TestCase): + + def setUp(self): + torch.manual_seed(0) + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + np.random.seed(0) + self.data_dir = tempfile.mkdtemp() + + # download + subprocess.call(['wget', '-nv', '-P', self.data_dir, TEST_DATA_URL]) + dataset_file = os.path.join(self.data_dir, 'MedNIST.tar.gz') + assert os.path.exists(dataset_file) + + # extract tarfile + datafile = tarfile.open(dataset_file) + datafile.extractall(path=self.data_dir) + datafile.close() + + # find image files and labels + data_dir = os.path.join(self.data_dir, 'MedNIST') + class_names = sorted(os.listdir(data_dir)) + image_files = [[ + os.path.join(data_dir, class_name, x) for x in sorted(os.listdir(os.path.join(data_dir, class_name))) + ] for class_name in class_names] + image_file_list, image_classes = [], [] + for i, class_name in enumerate(class_names): + image_file_list.extend(image_files[i]) + image_classes.extend([i] * len(image_files[i])) + + # split train, val, test + valid_frac, test_frac = 0.1, 0.1 + self.train_x, self.train_y = [], [] + self.val_x, self.val_y = [], [] + self.test_x, self.test_y = [], [] + for i in range(len(image_classes)): + rann = np.random.random() + if rann < valid_frac: + self.val_x.append(image_file_list[i]) + self.val_y.append(image_classes[i]) + elif rann < test_frac + valid_frac: + self.test_x.append(image_file_list[i]) + self.test_y.append(image_classes[i]) + else: + self.train_x.append(image_file_list[i]) + self.train_y.append(image_classes[i]) + + np.random.seed(seed=None) + self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu:0') + + def tearDown(self): + shutil.rmtree(self.data_dir) + + @skip_if_quick + def test_training(self): + repeated = [] + for i in range(2): + torch.manual_seed(0) + + repeated.append([]) + losses, best_metric, best_metric_epoch = \ + run_training_test(self.data_dir, self.train_x, self.train_y, self.val_x, self.val_y, device=self.device) + + # check training properties + np.testing.assert_allclose( + losses, [0.8501208358129878, 0.18469145818121113, 0.08108749352158255, 0.04965383692342005], rtol=1e-3) + repeated[i].extend(losses) + print('best metric', best_metric) + np.testing.assert_allclose(best_metric, 0.9999480167572079, rtol=1e-4) + repeated[i].append(best_metric) + np.testing.assert_allclose(best_metric_epoch, 4) + model_file = os.path.join(self.data_dir, 'best_metric_model.pth') + self.assertTrue(os.path.exists(model_file)) + + infer_metric = run_inference_test(self.data_dir, self.test_x, self.test_y, device=self.device) + + # check inference properties + np.testing.assert_allclose(np.asarray(infer_metric), [1036, 895, 982, 1033, 958, 1047]) + repeated[i].extend(infer_metric) + + np.testing.assert_allclose(repeated[0], repeated[1]) + + +if __name__ == '__main__': + unittest.main() From 9cbbc847e0e7219b0cb8fbbcfac15597afd40868 Mon Sep 17 00:00:00 2001 From: Wenqi Li Date: Sun, 19 Apr 2020 15:11:37 +0100 Subject: [PATCH 2/2] remove unused manual seed --- tests/test_integration_classification_2d.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_integration_classification_2d.py b/tests/test_integration_classification_2d.py index d4528e9080..a740a381b0 100644 --- a/tests/test_integration_classification_2d.py +++ b/tests/test_integration_classification_2d.py @@ -158,7 +158,6 @@ def run_inference_test(root_dir, test_x, test_y, device=torch.device("cuda:0")): class IntegrationClassification2D(unittest.TestCase): def setUp(self): - torch.manual_seed(0) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(0)