Browse Source

init

master
Artur Feoktistov 7 months ago
commit
c7a6997e9f
11 changed files with 6456 additions and 0 deletions
  1. 4395
    0
      .ipynb_checkpoints/run-checkpoint.ipynb
  2. 35
    0
      README.md
  3. 184
    0
      dataset.py
  4. 246
    0
      main_manual.py
  5. 210
    0
      main_manual_abl_layerlr.py
  6. 208
    0
      main_manual_abl_testrot.py
  7. 183
    0
      model.py
  8. 166
    0
      plot.py
  9. 679
    0
      run.ipynb
  10. 92
    0
      submission.py
  11. 58
    0
      utils.py

+ 4395
- 0
.ipynb_checkpoints/run-checkpoint.ipynb
File diff suppressed because it is too large
View File


+ 35
- 0
README.md View File

@@ -0,0 +1,35 @@
# C-NMC Challenge

This is the code release for the paper:

Prellberg J., Kramer O. (2019) Acute Lymphoblastic Leukemia Classification from Microscopic Images Using Convolutional Neural Networks. In: Gupta A., Gupta R. (eds) ISBI 2019 C-NMC Challenge: Classification in Cancer Cell Imaging. Lecture Notes in Bioengineering. Springer, Singapore

## Usage

Use the script `main_manual.py` to train the model on the dataset. The expected training data layout is described below.

Use the script `submission.py` to apply the trained model to the test data.

## Data Layout

The training data during the challenge was released in multiple steps which is why the data layout is a little peculiar.

```
data/fold_0/all/*.bmp
data/fold_0/hem/*.bmp
data/fold_1/...
data/fold_2/...
data/phase2/*.bmp
data/phase3/*.bmp
data/phase2.csv
```

The `fold_0` to `fold_2` folders contain the training images with two subdirectories for the two classes each. The directories `phase2` and `phase3` are the preliminary test-set and test-set respectively and contain images numbered starting from `1.bmp`. The labels for the preliminary test-set are specified in `phase2.csv` which looks as follows:

```
Patient_ID,new_names,labels
UID_57_29_1_all.bmp,1.bmp,1
UID_57_22_2_all.bmp,2.bmp,1
UID_57_31_3_all.bmp,3.bmp,1
UID_H49_35_1_hem.bmp,4.bmp,0
```

+ 184
- 0
dataset.py View File

@@ -0,0 +1,184 @@
import re
from collections import defaultdict
from glob import glob
from os.path import join

import pandas as pd
import torch
import torchvision.transforms.functional as TF
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

STD_RES = 450
STD_CENTER_CROP = 300

def file_iter(dataroot):
for file in glob(join(dataroot, '*', '*', '*')):
yield file


def file_match_iter(dataroot):
pattern = re.compile(r'(?P<file>.*(?P<fold>[a-zA-Z0-9_]+)/'
r'(?P<class>hem|all)/'
r'UID_(?P<subject>H?\d+)_(?P<image>\d+)_(?P<cell>\d+)_(all|hem).bmp)')
for file in file_iter(dataroot):
match = pattern.match(file)
if match is not None:
yield file, match


def to_dataframe(dataroot):
data = defaultdict(list)
keys = ['file', 'fold', 'subject', 'class', 'image', 'cell']

# Load data from the three training folds
for file, match in file_match_iter(dataroot):
for key in keys:
data[key].append(match.group(key))

# Load data from the phase2 validation set
phase2 = pd.read_csv(join(dataroot, 'phase2.csv'), header=0, names=['file_id', 'file', 'class'])
pattern = re.compile(r'UID_(?P<subject>H?\d+)_(?P<image>\d+)_(?P<cell>\d+)_(all|hem).bmp')
for i, row in phase2.iterrows():
match = pattern.match(row['file_id'])
data['file'].append(join(dataroot, f'phase2/{i+1}.bmp'))
data['fold'].append('3')
data['subject'].append(match.group('subject'))
data['class'].append('hem' if row['class'] == 0 else 'all')
data['image'].append(match.group('image'))
data['cell'].append(match.group('cell'))

# Convert to dataframe
df = pd.DataFrame(data)
df = df.apply(pd.to_numeric, errors='ignore')
return df


class ISBI2019(Dataset):
def __init__(self, df, transform=None):
super().__init__()
self.transform = transform
self.df = df

def __len__(self):
return len(self.df)

def __getitem__(self, index):
# Convert tensors to int because pandas screws up otherwise
index = int(index)
file, cls = self.df.iloc[index][['file', 'class']]
img = Image.open(file)#.convert('RGB')
cls = 0 if cls == 'hem' else 1
if self.transform is not None:
img = self.transform(img)
return img, cls


def get_class_weights(df):
class_weights = torch.FloatTensor([
df.loc[df['class'] == 'hem']['file'].count() / len(df),
df.loc[df['class'] == 'all']['file'].count() / len(df),
]).to(dtype=torch.float32)
return class_weights


def tf_rotation_stack(x, num_rotations=8):
xs = []
for i in range(num_rotations):
angle = 360 * i / num_rotations
xrot = TF.rotate(x, angle)
xrot = TF.to_tensor(xrot)
xs.append(xrot)
xs = torch.stack(xs)
return xs


def get_tf_train_transform(res):
size_factor = int(STD_RES/res)
center_crop = int(STD_CENTER_CROP/size_factor)
tf_train = transforms.Compose([
transforms.Resize(res),
#transforms.CenterCrop(center_crop),
transforms.RandomVerticalFlip(),
transforms.RandomHorizontalFlip(),
transforms.RandomAffine(degrees=360, translate=(0.2, 0.2)),
# transforms.Lambda(tf_rotation_stack),
transforms.ToTensor(),
])
return tf_train


def get_tf_vaild_rot_transform(res):
size_factor = int(STD_RES/res)
center_crop = int(STD_CENTER_CROP/size_factor)
tf_valid_rot = transforms.Compose([
transforms.Resize(res),
#transforms.CenterCrop(center_crop),
transforms.Lambda(tf_rotation_stack),
])
return tf_valid_rot

def get_tf_valid_norot_transform(res):
size_factor = int(STD_RES/res)
center_crop = int(STD_CENTER_CROP/size_factor)
tf_valid_norot = transforms.Compose([
transforms.Resize(res),
#transforms.CenterCrop(center_crop),
transforms.ToTensor(),
])
return tf_valid_norot


def get_dataset(dataroot, folds_train=(0, 1, 2), folds_valid=(3,), tf_train=None, tf_valid=None):
if tf_train is None or tf_valid is None:
sys.exit("Tranformation is None")
df = to_dataframe(dataroot)
df_trainset = df.loc[df['fold'].isin(folds_train)]
trainset = ISBI2019(df_trainset, transform=tf_train)
class_weights = get_class_weights(df_trainset)

if folds_valid is not None:
df_validset = df.loc[df['fold'].isin(folds_valid)]
validset_subjects = df_validset['subject'].values
validset = ISBI2019(df_validset, transform=tf_valid)
return trainset, validset, validset_subjects, class_weights
else:
return trainset, class_weights


if __name__ == '__main__':
import math
from tqdm import tqdm

df = to_dataframe('data')
print(df)
print("Examples by fold and class")
print(df.groupby(['fold', 'class'])['file'].count())

dataset = ISBI2019(df)
mean_height, mean_width = 0, 0
weird_files = []
bound_left, bound_upper, bound_right, bound_lower = math.inf, math.inf, 0, 0
for i, (img, label) in tqdm(enumerate(dataset), total=len(dataset)):
left, upper, right, lower = img.getbbox()
if left == 0 or upper == 0 or right == 450 or lower == 450:
weird_files.append(df.iloc[i]['file'])
height = lower - upper
width = right - left
mean_height = mean_height + (height - mean_height) / (i + 1)
mean_width = mean_width + (width - mean_width) / (i + 1)
bound_left = min(bound_left, left)
bound_upper = min(bound_upper, upper)
bound_right = max(bound_right, right)
bound_lower = max(bound_lower, lower)
print(f"mean_height = {mean_height:.2f}")
print(f"mean_width = {mean_width:.2f}")
print(f"bound_left = {bound_left:d}")
print(f"bound_upper = {bound_upper:d}")
print(f"bound_right = {bound_right:d}")
print(f"bound_lower = {bound_lower:d}")
print("Files that max out at least one border:")
for f in weird_files:
print(f)

+ 246
- 0
main_manual.py View File

@@ -0,0 +1,246 @@
import argparse
import os
from collections import defaultdict

import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score
from tensorboardX import SummaryWriter
from torch.optim.lr_scheduler import StepLR, LambdaLR
from torch.utils.data import DataLoader
from tqdm import tqdm, trange

from dataset import get_dataset, get_tf_train_transform, get_tf_vaild_rot_transform
from model import get_model
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters


def evaluate(model, valid_loader, class_weights, device):
model.eval()

all_labels = []
all_preds = []
loss_avg = IncrementalAverage()
for img, label in tqdm(valid_loader, leave=False):
img, label = to_device(device, img, label)
bs, nrot, c, h, w = img.size()
with torch.no_grad():
pred = model(img.view(-1, c, h, w))
pred = pred.view(bs, nrot).mean(1)
loss = lossfn(pred, label.to(pred.dtype), class_weights)
all_labels.append(label.cpu())
all_preds.append(pred.cpu())
loss_avg.update(loss.item())

all_labels = torch.cat(all_labels).numpy()
all_preds = torch.cat(all_preds).numpy()
all_preds_binary = all_preds > 0

cm = confusion_matrix(all_labels, all_preds_binary)
auc = roc_auc_score(all_labels, all_preds)
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted')
return loss_avg.value, cm, auc, prec, rec, f1


def train(model, opt, train_loader, class_weights, device):
model.train()
loss_avg = IncrementalAverage()
for img, label in tqdm(train_loader, leave=False):
img, label = to_device(device, img, label)
pred = model(img)
pred = pred.view(-1)
loss = lossfn(pred, label.to(pred.dtype), class_weights)
loss_avg.update(loss.item())

opt.zero_grad()
loss.backward()
opt.step()
return loss_avg.value


def lossfn(prediction, target, class_weights):
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target))
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight)


def schedule(epoch):
if epoch < 2:
ub = 1
elif epoch < 4:
ub = 0.1
else:
ub = 0.01
return ub


def train_validate(args):
model = get_model().to(args.device)
print("Model parameters:", count_parameters(model))

trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot,
tf_train=get_tf_train_transform(args.res),
tf_valid=get_tf_vaild_rot_transform(args.res))
class_weights = class_weights.to(args.device)
print(f"Trainset length: {len(trainset)}")
print(f"Validset length: {len(validset)}")
print(f"class_weights = {class_weights}")

train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False)

opt = torch.optim.Adam([
{'params': model.paramgroup01(), 'lr': 1e-6},
{'params': model.paramgroup234(), 'lr': 1e-4},
{'params': model.parameters_classifier(), 'lr': 1e-2},
])
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
lambda e: schedule(e),
lambda e: schedule(e)])

summarywriter = SummaryWriter(args.out)
recorded_data = defaultdict(list)

def logged_eval(e):
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device)

# Derive some accuracy metrics from confusion matrix
tn, fp, fn, tp = cm.ravel()
acc = (tp + tn) / cm.sum()
acc_hem = tn / (tn + fp)
acc_all = tp / (tp + fn)

print(f"epoch={e} f1={f1:.4f}")

summarywriter.add_scalar('loss/train', train_loss, e)
summarywriter.add_scalar('loss/valid', valid_loss, e)
summarywriter.add_scalar('cm/tn', tn, e)
summarywriter.add_scalar('cm/fp', fp, e)
summarywriter.add_scalar('cm/fn', fn, e)
summarywriter.add_scalar('cm/tp', tp, e)
summarywriter.add_scalar('metrics/precision', prec, e)
summarywriter.add_scalar('metrics/recall', rec, e)
summarywriter.add_scalar('metrics/f1', f1, e)
summarywriter.add_scalar('metrics/auc', auc, e)
summarywriter.add_scalar('acc/acc', acc, e)
summarywriter.add_scalar('acc/hem', acc_hem, e)
summarywriter.add_scalar('acc/all', acc_all, e)

recorded_data['loss_train'].append(train_loss)
recorded_data['loss_valid'].append(valid_loss)
recorded_data['tn'].append(tn)
recorded_data['tn'].append(tn)
recorded_data['fp'].append(fp)
recorded_data['fn'].append(fn)
recorded_data['tp'].append(tp)
recorded_data['precision'].append(prec)
recorded_data['recall'].append(rec)
recorded_data['f1'].append(f1)
recorded_data['auc'].append(auc)
recorded_data['acc'].append(acc)
recorded_data['acc_hem'].append(acc_hem)
recorded_data['acc_all'].append(acc_all)
np.savez(f'{args.out}/results', **recorded_data)

return f1

model = torch.nn.DataParallel(model)
train_loss = np.nan
best_val_f1 = logged_eval(0)
for e in trange(args.epochs, desc='Epoch'):
scheduler.step(e)
train_loss = train(model, opt, train_loader, class_weights, args.device)
val_f1 = logged_eval(e + 1)

if val_f1 > best_val_f1:
print(f"New best model at {val_f1:.6f}")
torch.save(model.state_dict(), f'{args.out}/model.pt')
best_val_f1 = val_f1

summarywriter.close()

subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device)
np.savez(f'{args.out}/subj_acc', **subj_acc)


def evaluate_subj_acc(model, dataset, subjects, device):
model.eval()

subj_pred = defaultdict(list)
subj_label = defaultdict(list)

dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False)

for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False):
img, cls = to_device(device, img, cls)
bs, nrot, c, h, w = img.size()
with torch.no_grad():
cls_hat = model(img.view(-1, c, h, w))
cls_hat = cls_hat.view(bs, nrot).mean(1)
subj_label[subj].append(cls.cpu())
subj_pred[subj].append(cls_hat.cpu())

for k in subj_label:
subj_label[k] = torch.cat(subj_label[k]).numpy()
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0

subj_acc = {}
for k in subj_label:
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k])

return subj_acc


def train_test(args):
model = get_model().to(args.device)
print("Model parameters:", count_parameters(model))

trainset, class_weights = get_dataset(args.dataroot, folds_train=(0, 1, 2, 3),
folds_valid=None,
tf_train=get_tf_train_transform(args.res),
tf_valid=get_tf_vaild_rot_transform(args.res))
class_weights = class_weights.to(args.device)
print(f"Trainset length: {len(trainset)}")
print(f"class_weights = {class_weights}")

train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)

opt = torch.optim.Adam([
{'params': model.paramgroup01(), 'lr': 1e-6},
{'params': model.paramgroup234(), 'lr': 1e-4},
{'params': model.parameters_classifier(), 'lr': 1e-2},
])
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
lambda e: schedule(e),
lambda e: schedule(e)])

model = torch.nn.DataParallel(model)
for e in trange(args.epochs, desc='Epoch'):
scheduler.step(e)
train(model, opt, train_loader, class_weights, args.device)
torch.save(model.state_dict(), f'{args.out}/model.pt')


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default='data', help='path to dataset')
parser.add_argument('--batch-size', type=int, default=16)
parser.add_argument('--epochs', type=int, default=6)
parser.add_argument('--seed', default=1, type=int, help='random seed')
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
parser.add_argument('--out', default='results', help='output folder')
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
args = parser.parse_args()
args.out = os.path.join(args.out, unique_string())
return args


if __name__ == '__main__':
args = parse_args()
print(args)

os.makedirs(args.out, exist_ok=True)
set_seeds(args.seed)
torch.backends.cudnn.benchmark = True

train_validate(args)

+ 210
- 0
main_manual_abl_layerlr.py View File

@@ -0,0 +1,210 @@
import argparse
import os
from collections import defaultdict

import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score
from tensorboardX import SummaryWriter
from torch.optim.lr_scheduler import StepLR, LambdaLR
from torch.utils.data import DataLoader
from tqdm import tqdm, trange

from dataset import get_dataset, get_tf_train_transform, get_tf_vaild_rot_transform
from model import get_model
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters


def evaluate(model, valid_loader, class_weights, device):
model.eval()

all_labels = []
all_preds = []
loss_avg = IncrementalAverage()
for img, label in tqdm(valid_loader, leave=False):
img, label = to_device(device, img, label)
bs, nrot, c, h, w = img.size()
with torch.no_grad():
pred = model(img.view(-1, c, h, w))
pred = pred.view(bs, nrot).mean(1)
loss = lossfn(pred, label.to(pred.dtype), class_weights)
all_labels.append(label.cpu())
all_preds.append(pred.cpu())
loss_avg.update(loss.item())

all_labels = torch.cat(all_labels).numpy()
all_preds = torch.cat(all_preds).numpy()
all_preds_binary = all_preds > 0

cm = confusion_matrix(all_labels, all_preds_binary)
auc = roc_auc_score(all_labels, all_preds)
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted')
return loss_avg.value, cm, auc, prec, rec, f1


def train(model, opt, train_loader, class_weights, device):
model.train()
loss_avg = IncrementalAverage()
for img, label in tqdm(train_loader, leave=False):
img, label = to_device(device, img, label)
pred = model(img)
pred = pred.view(-1)
loss = lossfn(pred, label.to(pred.dtype), class_weights)
loss_avg.update(loss.item())

opt.zero_grad()
loss.backward()
opt.step()
return loss_avg.value


def lossfn(prediction, target, class_weights):
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target))
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight)


def schedule(epoch):
if epoch < 2:
ub = 1
elif epoch < 4:
ub = 0.1
else:
ub = 0.01
return ub


def train_validate(args):
model = get_model().to(args.device)
print("Model parameters:", count_parameters(model))

trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot,
tf_train=get_tf_train_transform(args.res),
tf_valid=get_tf_vaild_rot_transform(args.res))
class_weights = class_weights.to(args.device)
print(f"Trainset length: {len(trainset)}")
print(f"Validset length: {len(validset)}")
print(f"class_weights = {class_weights}")

train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False)

opt = torch.optim.Adam([
{'params': model.paramgroup01(), 'lr': args.lr},
{'params': model.paramgroup234(), 'lr': args.lr},
{'params': model.parameters_classifier(), 'lr': args.lr},
])
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
lambda e: schedule(e),
lambda e: schedule(e)])

summarywriter = SummaryWriter(args.out)
recorded_data = defaultdict(list)

def logged_eval(e):
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device)

# Derive some accuracy metrics from confusion matrix
tn, fp, fn, tp = cm.ravel()
acc = (tp + tn) / cm.sum()
acc_hem = tn / (tn + fp)
acc_all = tp / (tp + fn)

print(f"epoch={e} f1={f1:.4f}")

summarywriter.add_scalar('loss/train', train_loss, e)
summarywriter.add_scalar('loss/valid', valid_loss, e)
summarywriter.add_scalar('cm/tn', tn, e)
summarywriter.add_scalar('cm/fp', fp, e)
summarywriter.add_scalar('cm/fn', fn, e)
summarywriter.add_scalar('cm/tp', tp, e)
summarywriter.add_scalar('metrics/precision', prec, e)
summarywriter.add_scalar('metrics/recall', rec, e)
summarywriter.add_scalar('metrics/f1', f1, e)
summarywriter.add_scalar('metrics/auc', auc, e)
summarywriter.add_scalar('acc/acc', acc, e)
summarywriter.add_scalar('acc/hem', acc_hem, e)
summarywriter.add_scalar('acc/all', acc_all, e)

recorded_data['loss_train'].append(train_loss)
recorded_data['loss_valid'].append(valid_loss)
recorded_data['tn'].append(tn)
recorded_data['tn'].append(tn)
recorded_data['fp'].append(fp)
recorded_data['fn'].append(fn)
recorded_data['tp'].append(tp)
recorded_data['precision'].append(prec)
recorded_data['recall'].append(rec)
recorded_data['f1'].append(f1)
recorded_data['auc'].append(auc)
recorded_data['acc'].append(acc)
recorded_data['acc_hem'].append(acc_hem)
recorded_data['acc_all'].append(acc_all)
np.savez(f'{args.out}/results', **recorded_data)

model = torch.nn.DataParallel(model)
train_loss = np.nan
logged_eval(0)
for e in trange(args.epochs, desc='Epoch'):
scheduler.step(e)
train_loss = train(model, opt, train_loader, class_weights, args.device)
logged_eval(e + 1)

summarywriter.close()

subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device)
np.savez(f'{args.out}/subj_acc', **subj_acc)


def evaluate_subj_acc(model, dataset, subjects, device):
model.eval()

subj_pred = defaultdict(list)
subj_label = defaultdict(list)

dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False)

for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False):
img, cls = to_device(device, img, cls)
bs, nrot, c, h, w = img.size()
with torch.no_grad():
cls_hat = model(img.view(-1, c, h, w))
cls_hat = cls_hat.view(bs, nrot).mean(1)
subj_label[subj].append(cls.cpu())
subj_pred[subj].append(cls_hat.cpu())

for k in subj_label:
subj_label[k] = torch.cat(subj_label[k]).numpy()
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0

subj_acc = {}
for k in subj_label:
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k])

return subj_acc


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default='data', help='path to dataset')
parser.add_argument('--lr', type=float, default=1e-4)
parser.add_argument('--batch-size', type=int, default=16)
parser.add_argument('--epochs', type=int, default=6)
parser.add_argument('--seed', default=1, type=int, help='random seed')
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
parser.add_argument('--out', default='results', help='output folder')
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
args = parser.parse_args()
args.out = os.path.join(args.out, unique_string())
return args


if __name__ == '__main__':
args = parse_args()
print(args)

os.makedirs(args.out, exist_ok=True)
set_seeds(args.seed)
torch.backends.cudnn.benchmark = True

train_validate(args)

+ 208
- 0
main_manual_abl_testrot.py View File

@@ -0,0 +1,208 @@
import argparse
import os
from collections import defaultdict

import numpy as np
import torch
import torch.nn.functional as F
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score
from tensorboardX import SummaryWriter
from torch.optim.lr_scheduler import StepLR, LambdaLR
from torch.utils.data import DataLoader
from tqdm import tqdm, trange

from dataset import get_dataset, get_tf_valid_norot_transform, get_tf_train_transform
from model import get_model
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters


def evaluate(model, valid_loader, class_weights, device):
model.eval()

all_labels = []
all_preds = []
loss_avg = IncrementalAverage()
for img, label in tqdm(valid_loader, leave=False):
img, label = to_device(device, img, label)
with torch.no_grad():
pred = model(img).view(-1)
loss = lossfn(pred, label.to(pred.dtype), class_weights)
all_labels.append(label.cpu())
all_preds.append(pred.cpu())
loss_avg.update(loss.item())

all_labels = torch.cat(all_labels).numpy()
all_preds = torch.cat(all_preds).numpy()
all_preds_binary = all_preds > 0

cm = confusion_matrix(all_labels, all_preds_binary)
auc = roc_auc_score(all_labels, all_preds)
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted')
return loss_avg.value, cm, auc, prec, rec, f1


def train(model, opt, train_loader, class_weights, device):
model.train()
loss_avg = IncrementalAverage()
for img, label in tqdm(train_loader, leave=False):
img, label = to_device(device, img, label)
pred = model(img)
pred = pred.view(-1)
loss = lossfn(pred, label.to(pred.dtype), class_weights)
loss_avg.update(loss.item())

opt.zero_grad()
loss.backward()
opt.step()
return loss_avg.value


def lossfn(prediction, target, class_weights):
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target))
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight)


def schedule(epoch):
if epoch < 2:
ub = 1
elif epoch < 4:
ub = 0.1
else:
ub = 0.01
return ub


def train_validate(args):
model = get_model().to(args.device)
print("Model parameters:", count_parameters(model))

trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot,
tf_valid=get_tf_valid_norot_transform(args.res),
tf_train=get_tf_train_transform(args.res))
class_weights = class_weights.to(args.device)
print(f"Trainset length: {len(trainset)}")
print(f"Validset length: {len(validset)}")
print(f"class_weights = {class_weights}")

train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False)

opt = torch.optim.Adam([
{'params': model.paramgroup01(), 'lr': 1e-6},
{'params': model.paramgroup234(), 'lr': 1e-4},
{'params': model.parameters_classifier(), 'lr': 1e-2},
])
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
lambda e: schedule(e),
lambda e: schedule(e)])

summarywriter = SummaryWriter(args.out)
recorded_data = defaultdict(list)

def logged_eval(e):
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device)

# Derive some accuracy metrics from confusion matrix
tn, fp, fn, tp = cm.ravel()
acc = (tp + tn) / cm.sum()
acc_hem = tn / (tn + fp)
acc_all = tp / (tp + fn)

print(f"epoch={e} f1={f1:.4f}")

summarywriter.add_scalar('loss/train', train_loss, e)
summarywriter.add_scalar('loss/valid', valid_loss, e)
summarywriter.add_scalar('cm/tn', tn, e)
summarywriter.add_scalar('cm/fp', fp, e)
summarywriter.add_scalar('cm/fn', fn, e)
summarywriter.add_scalar('cm/tp', tp, e)
summarywriter.add_scalar('metrics/precision', prec, e)
summarywriter.add_scalar('metrics/recall', rec, e)
summarywriter.add_scalar('metrics/f1', f1, e)
summarywriter.add_scalar('metrics/auc', auc, e)
summarywriter.add_scalar('acc/acc', acc, e)
summarywriter.add_scalar('acc/hem', acc_hem, e)
summarywriter.add_scalar('acc/all', acc_all, e)

recorded_data['loss_train'].append(train_loss)
recorded_data['loss_valid'].append(valid_loss)
recorded_data['tn'].append(tn)
recorded_data['tn'].append(tn)
recorded_data['fp'].append(fp)
recorded_data['fn'].append(fn)
recorded_data['tp'].append(tp)
recorded_data['precision'].append(prec)
recorded_data['recall'].append(rec)
recorded_data['f1'].append(f1)
recorded_data['auc'].append(auc)
recorded_data['acc'].append(acc)
recorded_data['acc_hem'].append(acc_hem)
recorded_data['acc_all'].append(acc_all)
np.savez(f'{args.out}/results', **recorded_data)

model = torch.nn.DataParallel(model)
train_loss = np.nan
logged_eval(0)
for e in trange(args.epochs, desc='Epoch'):
scheduler.step(e)
train_loss = train(model, opt, train_loader, class_weights, args.device)
logged_eval(e + 1)

torch.save(model.state_dict(), f'{args.out}/model.pt')
summarywriter.close()

subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device)
np.savez(f'{args.out}/subj_acc', **subj_acc)


def evaluate_subj_acc(model, dataset, subjects, device):
model.eval()

subj_pred = defaultdict(list)
subj_label = defaultdict(list)

dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False)

for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False):
img, cls = to_device(device, img, cls)
bs, nrot, c, h, w = img.size()
with torch.no_grad():
cls_hat = model(img.view(-1, c, h, w))
cls_hat = cls_hat.view(bs, nrot).mean(1)
subj_label[subj].append(cls.cpu())
subj_pred[subj].append(cls_hat.cpu())

for k in subj_label:
subj_label[k] = torch.cat(subj_label[k]).numpy()
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0

subj_acc = {}
for k in subj_label:
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k])

return subj_acc


def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument('--dataroot', default='data', help='path to dataset')
parser.add_argument('--batch-size', type=int, default=16)
parser.add_argument('--epochs', type=int, default=6)
parser.add_argument('--seed', default=1, type=int, help='random seed')
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
parser.add_argument('--out', default='results', help='output folder')
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
args = parser.parse_args()
args.out = os.path.join(args.out, unique_string())
return args


if __name__ == '__main__':
args = parse_args()
print(args)

os.makedirs(args.out, exist_ok=True)
set_seeds(args.seed)
torch.backends.cudnn.benchmark = True

train_validate(args)

+ 183
- 0
model.py View File

@@ -0,0 +1,183 @@
# Code adapted from: https://github.com/Cadene/pretrained-models.pytorch
import math
from collections import OrderedDict
from itertools import chain

import torch.nn as nn
from torch.utils import model_zoo

from utils import Flatten


class SEModule(nn.Module):
def __init__(self, channels, reduction):
super(SEModule, self).__init__()
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)
self.relu = nn.ReLU(inplace=True)
self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)
self.sigmoid = nn.Sigmoid()

def forward(self, x):
module_input = x
x = self.avg_pool(x)
x = self.fc1(x)
x = self.relu(x)
x = self.fc2(x)
x = self.sigmoid(x)
return module_input * x


class SEResNeXtBottleneck(nn.Module):
"""
ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
"""
expansion = 4

def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None, base_width=4):
super(SEResNeXtBottleneck, self).__init__()
width = math.floor(planes * (base_width / 64)) * groups
self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, stride=1)
self.bn1 = nn.BatchNorm2d(width)
self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
self.bn2 = nn.BatchNorm2d(width)
self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.se_module = SEModule(planes * 4, reduction=reduction)
self.downsample = downsample
self.stride = stride

def forward(self, x):
residual = x

out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)

out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)

out = self.conv3(out)
out = self.bn3(out)

if self.downsample is not None:
residual = self.downsample(x)

out = self.se_module(out) + residual
out = self.relu(out)

return out


class SENet(nn.Module):
def __init__(self, block, layers, groups, reduction, inplanes=128,
downsample_kernel_size=3, downsample_padding=1):
super(SENet, self).__init__()
self.inplanes = inplanes

layer0_modules = [
('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)),
('bn1', nn.BatchNorm2d(inplanes)),
('relu1', nn.ReLU(inplace=True)),
# To preserve compatibility with Caffe weights `ceil_mode=True`
# is used instead of `padding=1`.
('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))
]
self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
self.layer1 = self._make_layer(
block,
planes=64,
blocks=layers[0],
groups=groups,
reduction=reduction,
downsample_kernel_size=1,
downsample_padding=0
)
self.layer2 = self._make_layer(
block,
planes=128,
blocks=layers[1],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.layer3 = self._make_layer(
block,
planes=256,
blocks=layers[2],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.layer4 = self._make_layer(
block,
planes=512,
blocks=layers[3],
stride=2,
groups=groups,
reduction=reduction,
downsample_kernel_size=downsample_kernel_size,
downsample_padding=downsample_padding
)
self.cls = nn.Sequential(
nn.AdaptiveAvgPool2d(1),
Flatten(),
nn.Linear(512 * block.expansion, 1)
)

def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
downsample_kernel_size=1, downsample_padding=0):
downsample = None
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.inplanes, planes * block.expansion,
kernel_size=downsample_kernel_size, stride=stride,
padding=downsample_padding, bias=False),
nn.BatchNorm2d(planes * block.expansion),
)

layers = [block(self.inplanes, planes, groups, reduction, stride, downsample)]
self.inplanes = planes * block.expansion
for i in range(1, blocks):
layers.append(block(self.inplanes, planes, groups, reduction))

return nn.Sequential(*layers)

def paramgroup01(self):
return chain(
self.layer0.parameters(),
self.layer1.parameters(),
)

def paramgroup234(self):
return chain(
self.layer2.parameters(),
self.layer3.parameters(),
self.layer4.parameters(),
)

def parameters_classifier(self):
return self.cls.parameters()

def forward(self, x):
x = self.layer0(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
c = self.cls(x)
return c


def get_model():
model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, inplanes=64,
downsample_kernel_size=1, downsample_padding=0)
checkpoint = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth')
model.load_state_dict(checkpoint, strict=False)
return model

+ 166
- 0
plot.py View File

@@ -0,0 +1,166 @@
from glob import glob

import numpy as np
import matplotlib.pyplot as plt
from os.path import join

from scipy.stats import mannwhitneyu

dataroots = {
'PROPOSAL' : 'results',
#'model_cnmc_res_128' : 'results/model_cnmc_res_128',
#'model_cnmc_res_224' : 'results/model_cnmc_res_224',
#'model_cnmc_res_256' : 'results/model_cnmc_res_256',
#'model_cnmc_res_450' : 'results/model_cnmc_res_450',
#'model_cnmc_res_450_blue_only' : 'results/model_cnmc_res_450_blue_only',
#'model_cnmc_res_450_green_only' : 'results/model_cnmc_res_450_green_only',
#'model_cnmc_res_450_red_only' : 'results/model_cnmc_res_450_red_only',
#'model_cnmc_res_450_no_blue' : 'results/model_cnmc_res_450_no_blue',
#'model_cnmc_res_450_no_green' : 'results/model_cnmc_res_450_no_green',
#'model_cnmc_res_450_no_red' : 'results/model_cnmc_res_450_no_red',
#'model_cnmc_res_450_grayscale' : 'results/model_cnmc_res_450_grayscale',
}




def get_values(dataroot, key):
npzs = list(glob(join(dataroot, '*', 'results.npz')))
vals = []
for f in npzs:
recorded_data = np.load(f)
val = recorded_data[key]
vals.append(val)
vals = np.stack(vals, 0)
return vals


def plot_mean_std(dataroot, key, ax, **kwargs):
vals = get_values(dataroot, key)
mean = np.mean(vals, 0)
std = np.std(vals, 0)
epochs = np.arange(len(mean))

# Offset by 1 so that we have nicely zoomed plots
mean = mean[1:]
std = std[1:]
epochs = epochs[1:]

ax.plot(epochs, mean, **kwargs)
ax.fill_between(epochs, mean - std, mean + std, alpha=0.2)


def plot3(key, ax):
for k, v in dataroots.items():
plot_mean_std(v, key, ax, label=k)


def print_final_min_mean_max(dataroot, key, model_epochs):
vals = get_values(dataroot, key) * 100
vals = vals[np.arange(len(vals)), model_epochs]
min = np.min(vals)
mean = np.mean(vals)
std = np.std(vals)
max = np.max(vals)
print(f'{min:.2f}', f'{mean:.2f} ± {std:.2f}', f'{max:.2f}', sep='\t')


def print_final_table(dataroot):
best_model_epochs = np.argmax(get_values(dataroot, 'f1'), axis=1)

print_final_min_mean_max(dataroot, 'acc', best_model_epochs)
print_final_min_mean_max(dataroot, 'acc_all', best_model_epochs)
print_final_min_mean_max(dataroot, 'acc_hem', best_model_epochs)
print_final_min_mean_max(dataroot, 'f1', best_model_epochs)
print_final_min_mean_max(dataroot, 'precision', best_model_epochs)
print_final_min_mean_max(dataroot, 'recall', best_model_epochs)


def get_best_f1_scores(dataroot):
f1_scores = get_values(dataroot, 'f1')
best_model_epochs = np.argmax(f1_scores, axis=1)
return f1_scores[np.arange(len(f1_scores)), best_model_epochs]


def is_statistically_greater(dataroot1, dataroot2):
# Tests if F1-score of dataroot1 is greater than dataroot2
a = get_best_f1_scores(dataroot1)
b = get_best_f1_scores(dataroot2)
u, p = mannwhitneyu(a, b, alternative='greater')
return u, p


######

for k, v in dataroots.items():
print(k)
print_final_table(v)
print()


######

#print("MWU-Test of PROPOSAL > NOSPECLR")
#print(is_statistically_greater(dataroots['PROPOSAL'], dataroots['NOSPECLR']))
#print()
#print("MWU-Test of PROPOSAL > NOROT")
#print(is_statistically_greater(dataroots['PROPOSAL'], dataroots['NOROT']))

######

fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(9, 5))

ax[0, 0].set_title('Accuracy')
plot3('acc', ax[0, 0])

ax[0, 1].set_title('Sensitivity')
plot3('acc_all', ax[0, 1])

ax[0, 2].set_title('Specificity')
plot3('acc_hem', ax[0, 2])

ax[1, 0].set_title('F1 score')
plot3('f1', ax[1, 0])

ax[1, 1].set_title('Precision')
plot3('precision', ax[1, 1])

ax[1, 2].set_title('Recall')
plot3('recall', ax[1, 2])

fig.legend(loc='lower center', ncol=3)
fig.tight_layout()
fig.subplots_adjust(bottom=0.12)
fig.savefig('results/plot_ablations.pdf')

######
npload= 'results/model_cnmc_res_128'
npload_sub=npload + '/subj_acc.npz'
npload_res=npload + '/results.npz'
subj_acc = np.load(npload_sub)
subj = list(sorted(subj_acc.keys()))
acc = [subj_acc[k] for k in subj]
fig, ax = plt.subplots(figsize=(9, 2))
ax.bar(range(len(acc)), acc, width=0.3, tick_label=subj)
fig.tight_layout()
fig.savefig('results/plot_subj_acc.pdf')

######

data = np.load(npload_res)
loss_train = data['loss_train']
loss_valid = data['loss_valid'][1:]
f1_valid = data['f1'][1:]
fig, ax = plt.subplots(ncols=3, figsize=(9, 2))
ax[0].plot(range(len(loss_train)), loss_train)
ax[0].set_title("Training set loss")
ax[1].plot(range(1, len(loss_valid) + 1), loss_valid)
ax[1].set_title("Preliminary test set loss")
ax[2].plot(range(1, len(f1_valid) + 1), f1_valid)
ax[2].set_title("Preliminary test set F1-score")
fig.tight_layout()
fig.savefig('results/plot_curves.pdf')

######

plt.show()

+ 679
- 0
run.ipynb View File

@@ -0,0 +1,679 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "cd8aaf96",
"metadata": {},
"outputs": [],
"source": [
"!pip install pandas tqdm"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "26bd5e25",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results"
]
},
{
"cell_type": "code",
"execution_count": 55,
"id": "b753e6b8",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:41, 1.43it/s] \n",
"Positive: 1234\n",
"Negative: 633\n",
"AUC: 0.8797024225483345\n"
]
}
],
"source": [
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/20220216T154306Z.AZHL\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3246460b",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC\n",
"# res : 32\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 32"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8a953a39",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC\n",
"# res : 128\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 128"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12c15b33",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC\n",
"# res : 224\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 224"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "08ba15b4",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC\n",
"# res : 256\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 256"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3cf25ec3",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "73b9d9d3",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC_Grayscale\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_grayscale\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce16353c",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC_no_red\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_red\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "959ab837",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC_no_green\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_green\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "879beb46",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC_no_blue\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_blue\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6d545dce",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC_red_only\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_red_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "25480226",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC_green_only\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_green_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a064d169",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC_blue_only\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_blue_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8d53828a",
"metadata": {},
"outputs": [],
"source": [
"# TRAIN\n",
"# dataset : CNMC\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
]
},
{
"cell_type": "code",
"execution_count": 183,
"id": "ea9c2f23",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PROPOSAL\n",
"68.51\t83.57 ± 5.16\t89.61\n",
"84.33\t89.06 ± 2.09\t92.95\n",
"38.73\t73.26 ± 11.77\t84.72\n",
"66.76\t83.35 ± 5.61\t89.57\n",
"66.81\t83.36 ± 5.60\t89.55\n",
"68.51\t83.57 ± 5.16\t89.61\n",
"\n",
"Figure(900x500)\n",
"Figure(900x200)\n",
"Figure(900x200)\n"
]
}
],
"source": [
"# PLOT\n",
"# dataset : CNMC\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 plot.py"
]
},
{
"cell_type": "markdown",
"id": "8c92073d",
"metadata": {},
"source": [
"# EVALUATION"
]
},
{
"cell_type": "code",
"execution_count": 135,
"id": "b25a4267",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:05, 11.69it/s] \n",
"Positive: 1425\n",
"Negative: 442\n",
"AUC: 0.6153299354864846\n"
]
}
],
"source": [
"# EVALUATION \n",
"# dataset : CNMC\n",
"# res : 32\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_32\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 32"
]
},
{
"cell_type": "code",
"execution_count": 136,
"id": "b14e3e67",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:09, 6.24it/s] \n",
"Positive: 1315\n",
"Negative: 552\n",
"AUC: 0.7711131113339208\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC\n",
"# res : 128\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_128\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 128"
]
},
{
"cell_type": "code",
"execution_count": 137,
"id": "dfb25744",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:14, 4.19it/s] \n",
"Positive: 1262\n",
"Negative: 605\n",
"AUC: 0.8143717274835677\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC\n",
"# res : 224\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_224\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 224"
]
},
{
"cell_type": "code",
"execution_count": 138,
"id": "68600db4",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:41, 1.44it/s] \n",
"Positive: 1195\n",
"Negative: 672\n",
"AUC: 0.8400701597139936\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC\n",
"# res : 256\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_256\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 256"
]
},
{
"cell_type": "code",
"execution_count": 139,
"id": "71a5547e",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:41, 1.42it/s] \n",
"Positive: 1241\n",
"Negative: 626\n",
"AUC: 0.8813918512441892\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 154,
"id": "58450362",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:41, 1.42it/s] \n",
"Positive: 1261\n",
"Negative: 606\n",
"AUC: 0.8045073375262055\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC_Grayscale\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_grayscale\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_grayscale/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 155,
"id": "48c40f18",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:44, 1.33it/s] \n",
"Positive: 1178\n",
"Negative: 689\n",
"AUC: 0.8661869929814967\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC_no_red\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_red\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_red/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 156,
"id": "b6ad9232",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:52, 1.12it/s] \n",
"Positive: 1266\n",
"Negative: 601\n",
"AUC: 0.8018310900454735\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC_no_green\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_green\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_green/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 157,
"id": "1ba76d51",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:48, 1.23it/s] \n",
"Positive: 1248\n",
"Negative: 619\n",
"AUC: 0.8570821813062721\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC_no_blue\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_blue\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_blue/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 158,
"id": "05cfaf9c",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:52, 1.12it/s] \n",
"Positive: 1239\n",
"Negative: 628\n",
"AUC: 0.8013924335875389\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC_red_only\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_red_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_red_only/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 159,
"id": "1ad09456",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:52, 1.13it/s] \n",
"Positive: 1221\n",
"Negative: 646\n",
"AUC: 0.8590070792695896\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC_green_only\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_green_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_green_only/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 160,
"id": "41e8d3a0",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [00:52, 1.12it/s] \n",
"Positive: 1255\n",
"Negative: 612\n",
"AUC: 0.8268636253152251\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC_blue_only\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_blue_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_blue_only/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": 186,
"id": "88bc18db",
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading model\n",
"Classifying\n",
"59it [01:24, 1.43s/it] \n",
"Positive: 1235\n",
"Negative: 632\n",
"AUC: 0.8588406050294211\n"
]
}
],
"source": [
"# EVALUATION\n",
"# dataset : CNMC-blackborder\n",
"# res : 450\n",
"# epochs : 100\n",
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_w_blackborder\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 450"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ec31125a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

+ 92
- 0
submission.py View File

@@ -0,0 +1,92 @@
import argparse
import os
import zipfile
from os.path import join

import torch
from PIL import Image
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm
import numpy as np

from model import get_model
from dataset import get_tf_vaild_rot_transform

from sklearn import metrics
import matplotlib.pyplot as plt
import csv
from sklearn.metrics import roc_curve, roc_auc_score

class OrderedImages(Dataset):
def __init__(self, root, transform):
super().__init__()
self.root = root
self.transform = transform

def __len__(self):
return 1867

def __getitem__(self, index):
img = Image.open(os.path.join(self.root, f'{index + 1}.bmp'))#.convert('RGB')
return self.transform(img)

VALIDATION_ALL = 1219
VALIDATION_HEM = 648
parser = argparse.ArgumentParser()
parser.add_argument('--batch-size', type=int, default=64)
parser.add_argument('--modelroot', default='results/20190313T101236Z.LGJL', help='path to model')
parser.add_argument('--dataroot', default='data/phase3', help='path to dataset')
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
args = parser.parse_args()

dataset = OrderedImages(args.dataroot, get_tf_vaild_rot_transform(args.res))

print(f"Loading model")
model = get_model().to('cuda:0')
model = torch.nn.DataParallel(model)
model.load_state_dict(torch.load(join(args.modelroot, 'model.pt')))
model.eval()

dataloader = DataLoader(dataset, batch_size=args.batch_size, num_workers=6)

print("Classifying")
all_labels = []
for x in tqdm(dataloader, total=len(dataset) // args.batch_size):
x = x.to('cuda:0')
bs, nrot, c, h, w = x.size()
with torch.no_grad():
y = model(x.view(-1, c, h, w))
y = y.view(bs, nrot).mean(1)
labels = y > 0
all_labels.append(labels)

all_labels = torch.cat(all_labels)
print("Positive:", all_labels.sum().item())
print("Negative:", len(all_labels) - all_labels.sum().item())

file_w = open(r'/home/feoktistovar67431/data/resources/phase2_labels.csv', "r")
true_labels = []
reader = csv.reader(file_w, delimiter=',')
for row in reader:
true_labels.append(row)

print(f'AUC: {roc_auc_score(true_labels, all_labels.cpu())}') # Zeige Flaeche unter der Kurve an


#print("Accuracy", metrics.accuracy_score(y_test, y_pred))
#import matplotlib.pyplot as plt
#import numpy as np
#x = # false_positive_rate
#y = # true_positive_rate
# This is the ROC curve
#plt.plot(x,y)
#plt.show()
# This is the AUC
#auc = np.trapz(y,x)

csv_path = join(args.modelroot, 'submission.csv')
zip_path = join(args.modelroot, 'submission.zip')
np.savetxt(csv_path, all_labels.cpu().numpy(), '%d')
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
zipf.write(csv_path, 'isbi_valid.predict')

+ 58
- 0
utils.py View File

@@ -0,0 +1,58 @@
import pickle
import random
import string
from datetime import datetime

import torch
import torch.nn as nn


class IncrementalAverage:
def __init__(self):
self.value = 0
self.counter = 0

def update(self, x):
self.counter += 1
self.value += (x - self.value) / self.counter


class Flatten(nn.Module):
def forward(self, x):
return x.view(x.size(0), -1)


class SizePrinter(nn.Module):
def forward(self, x):
print(x.size())
return x


def count_parameters(model, grad_only=True):
return sum(p.numel() for p in model.parameters() if not grad_only or p.requires_grad)


def to_device(device, *tensors):
return tuple(x.to(device) for x in tensors)


def loop_iter(iter):
while True:
for item in iter:
yield item


def unique_string():
return '{}.{}'.format(datetime.now().strftime('%Y%m%dT%H%M%SZ'),
''.join(random.choice(string.ascii_uppercase) for _ in range(4)))


def set_seeds(seed):
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)


def pickle_dump(obj, file):
with open(file, 'wb') as f:
pickle.dump(obj, f)

Loading…
Cancel
Save