@@ -0,0 +1,35 @@ | |||
# C-NMC Challenge | |||
This is the code release for the paper: | |||
Prellberg J., Kramer O. (2019) Acute Lymphoblastic Leukemia Classification from Microscopic Images Using Convolutional Neural Networks. In: Gupta A., Gupta R. (eds) ISBI 2019 C-NMC Challenge: Classification in Cancer Cell Imaging. Lecture Notes in Bioengineering. Springer, Singapore | |||
## Usage | |||
Use the script `main_manual.py` to train the model on the dataset. The expected training data layout is described below. | |||
Use the script `submission.py` to apply the trained model to the test data. | |||
## Data Layout | |||
The training data during the challenge was released in multiple steps which is why the data layout is a little peculiar. | |||
``` | |||
data/fold_0/all/*.bmp | |||
data/fold_0/hem/*.bmp | |||
data/fold_1/... | |||
data/fold_2/... | |||
data/phase2/*.bmp | |||
data/phase3/*.bmp | |||
data/phase2.csv | |||
``` | |||
The `fold_0` to `fold_2` folders contain the training images with two subdirectories for the two classes each. The directories `phase2` and `phase3` are the preliminary test-set and test-set respectively and contain images numbered starting from `1.bmp`. The labels for the preliminary test-set are specified in `phase2.csv` which looks as follows: | |||
``` | |||
Patient_ID,new_names,labels | |||
UID_57_29_1_all.bmp,1.bmp,1 | |||
UID_57_22_2_all.bmp,2.bmp,1 | |||
UID_57_31_3_all.bmp,3.bmp,1 | |||
UID_H49_35_1_hem.bmp,4.bmp,0 | |||
``` |
@@ -0,0 +1,184 @@ | |||
import re | |||
from collections import defaultdict | |||
from glob import glob | |||
from os.path import join | |||
import pandas as pd | |||
import torch | |||
import torchvision.transforms.functional as TF | |||
from PIL import Image | |||
from torch.utils.data import Dataset | |||
from torchvision import transforms | |||
STD_RES = 450 | |||
STD_CENTER_CROP = 300 | |||
def file_iter(dataroot): | |||
for file in glob(join(dataroot, '*', '*', '*')): | |||
yield file | |||
def file_match_iter(dataroot): | |||
pattern = re.compile(r'(?P<file>.*(?P<fold>[a-zA-Z0-9_]+)/' | |||
r'(?P<class>hem|all)/' | |||
r'UID_(?P<subject>H?\d+)_(?P<image>\d+)_(?P<cell>\d+)_(all|hem).bmp)') | |||
for file in file_iter(dataroot): | |||
match = pattern.match(file) | |||
if match is not None: | |||
yield file, match | |||
def to_dataframe(dataroot): | |||
data = defaultdict(list) | |||
keys = ['file', 'fold', 'subject', 'class', 'image', 'cell'] | |||
# Load data from the three training folds | |||
for file, match in file_match_iter(dataroot): | |||
for key in keys: | |||
data[key].append(match.group(key)) | |||
# Load data from the phase2 validation set | |||
phase2 = pd.read_csv(join(dataroot, 'phase2.csv'), header=0, names=['file_id', 'file', 'class']) | |||
pattern = re.compile(r'UID_(?P<subject>H?\d+)_(?P<image>\d+)_(?P<cell>\d+)_(all|hem).bmp') | |||
for i, row in phase2.iterrows(): | |||
match = pattern.match(row['file_id']) | |||
data['file'].append(join(dataroot, f'phase2/{i+1}.bmp')) | |||
data['fold'].append('3') | |||
data['subject'].append(match.group('subject')) | |||
data['class'].append('hem' if row['class'] == 0 else 'all') | |||
data['image'].append(match.group('image')) | |||
data['cell'].append(match.group('cell')) | |||
# Convert to dataframe | |||
df = pd.DataFrame(data) | |||
df = df.apply(pd.to_numeric, errors='ignore') | |||
return df | |||
class ISBI2019(Dataset): | |||
def __init__(self, df, transform=None): | |||
super().__init__() | |||
self.transform = transform | |||
self.df = df | |||
def __len__(self): | |||
return len(self.df) | |||
def __getitem__(self, index): | |||
# Convert tensors to int because pandas screws up otherwise | |||
index = int(index) | |||
file, cls = self.df.iloc[index][['file', 'class']] | |||
img = Image.open(file)#.convert('RGB') | |||
cls = 0 if cls == 'hem' else 1 | |||
if self.transform is not None: | |||
img = self.transform(img) | |||
return img, cls | |||
def get_class_weights(df): | |||
class_weights = torch.FloatTensor([ | |||
df.loc[df['class'] == 'hem']['file'].count() / len(df), | |||
df.loc[df['class'] == 'all']['file'].count() / len(df), | |||
]).to(dtype=torch.float32) | |||
return class_weights | |||
def tf_rotation_stack(x, num_rotations=8): | |||
xs = [] | |||
for i in range(num_rotations): | |||
angle = 360 * i / num_rotations | |||
xrot = TF.rotate(x, angle) | |||
xrot = TF.to_tensor(xrot) | |||
xs.append(xrot) | |||
xs = torch.stack(xs) | |||
return xs | |||
def get_tf_train_transform(res): | |||
size_factor = int(STD_RES/res) | |||
center_crop = int(STD_CENTER_CROP/size_factor) | |||
tf_train = transforms.Compose([ | |||
transforms.Resize(res), | |||
#transforms.CenterCrop(center_crop), | |||
transforms.RandomVerticalFlip(), | |||
transforms.RandomHorizontalFlip(), | |||
transforms.RandomAffine(degrees=360, translate=(0.2, 0.2)), | |||
# transforms.Lambda(tf_rotation_stack), | |||
transforms.ToTensor(), | |||
]) | |||
return tf_train | |||
def get_tf_vaild_rot_transform(res): | |||
size_factor = int(STD_RES/res) | |||
center_crop = int(STD_CENTER_CROP/size_factor) | |||
tf_valid_rot = transforms.Compose([ | |||
transforms.Resize(res), | |||
#transforms.CenterCrop(center_crop), | |||
transforms.Lambda(tf_rotation_stack), | |||
]) | |||
return tf_valid_rot | |||
def get_tf_valid_norot_transform(res): | |||
size_factor = int(STD_RES/res) | |||
center_crop = int(STD_CENTER_CROP/size_factor) | |||
tf_valid_norot = transforms.Compose([ | |||
transforms.Resize(res), | |||
#transforms.CenterCrop(center_crop), | |||
transforms.ToTensor(), | |||
]) | |||
return tf_valid_norot | |||
def get_dataset(dataroot, folds_train=(0, 1, 2), folds_valid=(3,), tf_train=None, tf_valid=None): | |||
if tf_train is None or tf_valid is None: | |||
sys.exit("Tranformation is None") | |||
df = to_dataframe(dataroot) | |||
df_trainset = df.loc[df['fold'].isin(folds_train)] | |||
trainset = ISBI2019(df_trainset, transform=tf_train) | |||
class_weights = get_class_weights(df_trainset) | |||
if folds_valid is not None: | |||
df_validset = df.loc[df['fold'].isin(folds_valid)] | |||
validset_subjects = df_validset['subject'].values | |||
validset = ISBI2019(df_validset, transform=tf_valid) | |||
return trainset, validset, validset_subjects, class_weights | |||
else: | |||
return trainset, class_weights | |||
if __name__ == '__main__': | |||
import math | |||
from tqdm import tqdm | |||
df = to_dataframe('data') | |||
print(df) | |||
print("Examples by fold and class") | |||
print(df.groupby(['fold', 'class'])['file'].count()) | |||
dataset = ISBI2019(df) | |||
mean_height, mean_width = 0, 0 | |||
weird_files = [] | |||
bound_left, bound_upper, bound_right, bound_lower = math.inf, math.inf, 0, 0 | |||
for i, (img, label) in tqdm(enumerate(dataset), total=len(dataset)): | |||
left, upper, right, lower = img.getbbox() | |||
if left == 0 or upper == 0 or right == 450 or lower == 450: | |||
weird_files.append(df.iloc[i]['file']) | |||
height = lower - upper | |||
width = right - left | |||
mean_height = mean_height + (height - mean_height) / (i + 1) | |||
mean_width = mean_width + (width - mean_width) / (i + 1) | |||
bound_left = min(bound_left, left) | |||
bound_upper = min(bound_upper, upper) | |||
bound_right = max(bound_right, right) | |||
bound_lower = max(bound_lower, lower) | |||
print(f"mean_height = {mean_height:.2f}") | |||
print(f"mean_width = {mean_width:.2f}") | |||
print(f"bound_left = {bound_left:d}") | |||
print(f"bound_upper = {bound_upper:d}") | |||
print(f"bound_right = {bound_right:d}") | |||
print(f"bound_lower = {bound_lower:d}") | |||
print("Files that max out at least one border:") | |||
for f in weird_files: | |||
print(f) |
@@ -0,0 +1,246 @@ | |||
import argparse | |||
import os | |||
from collections import defaultdict | |||
import numpy as np | |||
import torch | |||
import torch.nn.functional as F | |||
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score | |||
from tensorboardX import SummaryWriter | |||
from torch.optim.lr_scheduler import StepLR, LambdaLR | |||
from torch.utils.data import DataLoader | |||
from tqdm import tqdm, trange | |||
from dataset import get_dataset, get_tf_train_transform, get_tf_vaild_rot_transform | |||
from model import get_model | |||
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters | |||
def evaluate(model, valid_loader, class_weights, device): | |||
model.eval() | |||
all_labels = [] | |||
all_preds = [] | |||
loss_avg = IncrementalAverage() | |||
for img, label in tqdm(valid_loader, leave=False): | |||
img, label = to_device(device, img, label) | |||
bs, nrot, c, h, w = img.size() | |||
with torch.no_grad(): | |||
pred = model(img.view(-1, c, h, w)) | |||
pred = pred.view(bs, nrot).mean(1) | |||
loss = lossfn(pred, label.to(pred.dtype), class_weights) | |||
all_labels.append(label.cpu()) | |||
all_preds.append(pred.cpu()) | |||
loss_avg.update(loss.item()) | |||
all_labels = torch.cat(all_labels).numpy() | |||
all_preds = torch.cat(all_preds).numpy() | |||
all_preds_binary = all_preds > 0 | |||
cm = confusion_matrix(all_labels, all_preds_binary) | |||
auc = roc_auc_score(all_labels, all_preds) | |||
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted') | |||
return loss_avg.value, cm, auc, prec, rec, f1 | |||
def train(model, opt, train_loader, class_weights, device): | |||
model.train() | |||
loss_avg = IncrementalAverage() | |||
for img, label in tqdm(train_loader, leave=False): | |||
img, label = to_device(device, img, label) | |||
pred = model(img) | |||
pred = pred.view(-1) | |||
loss = lossfn(pred, label.to(pred.dtype), class_weights) | |||
loss_avg.update(loss.item()) | |||
opt.zero_grad() | |||
loss.backward() | |||
opt.step() | |||
return loss_avg.value | |||
def lossfn(prediction, target, class_weights): | |||
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target)) | |||
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight) | |||
def schedule(epoch): | |||
if epoch < 2: | |||
ub = 1 | |||
elif epoch < 4: | |||
ub = 0.1 | |||
else: | |||
ub = 0.01 | |||
return ub | |||
def train_validate(args): | |||
model = get_model().to(args.device) | |||
print("Model parameters:", count_parameters(model)) | |||
trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot, | |||
tf_train=get_tf_train_transform(args.res), | |||
tf_valid=get_tf_vaild_rot_transform(args.res)) | |||
class_weights = class_weights.to(args.device) | |||
print(f"Trainset length: {len(trainset)}") | |||
print(f"Validset length: {len(validset)}") | |||
print(f"class_weights = {class_weights}") | |||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True) | |||
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False) | |||
opt = torch.optim.Adam([ | |||
{'params': model.paramgroup01(), 'lr': 1e-6}, | |||
{'params': model.paramgroup234(), 'lr': 1e-4}, | |||
{'params': model.parameters_classifier(), 'lr': 1e-2}, | |||
]) | |||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e), | |||
lambda e: schedule(e), | |||
lambda e: schedule(e)]) | |||
summarywriter = SummaryWriter(args.out) | |||
recorded_data = defaultdict(list) | |||
def logged_eval(e): | |||
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device) | |||
# Derive some accuracy metrics from confusion matrix | |||
tn, fp, fn, tp = cm.ravel() | |||
acc = (tp + tn) / cm.sum() | |||
acc_hem = tn / (tn + fp) | |||
acc_all = tp / (tp + fn) | |||
print(f"epoch={e} f1={f1:.4f}") | |||
summarywriter.add_scalar('loss/train', train_loss, e) | |||
summarywriter.add_scalar('loss/valid', valid_loss, e) | |||
summarywriter.add_scalar('cm/tn', tn, e) | |||
summarywriter.add_scalar('cm/fp', fp, e) | |||
summarywriter.add_scalar('cm/fn', fn, e) | |||
summarywriter.add_scalar('cm/tp', tp, e) | |||
summarywriter.add_scalar('metrics/precision', prec, e) | |||
summarywriter.add_scalar('metrics/recall', rec, e) | |||
summarywriter.add_scalar('metrics/f1', f1, e) | |||
summarywriter.add_scalar('metrics/auc', auc, e) | |||
summarywriter.add_scalar('acc/acc', acc, e) | |||
summarywriter.add_scalar('acc/hem', acc_hem, e) | |||
summarywriter.add_scalar('acc/all', acc_all, e) | |||
recorded_data['loss_train'].append(train_loss) | |||
recorded_data['loss_valid'].append(valid_loss) | |||
recorded_data['tn'].append(tn) | |||
recorded_data['tn'].append(tn) | |||
recorded_data['fp'].append(fp) | |||
recorded_data['fn'].append(fn) | |||
recorded_data['tp'].append(tp) | |||
recorded_data['precision'].append(prec) | |||
recorded_data['recall'].append(rec) | |||
recorded_data['f1'].append(f1) | |||
recorded_data['auc'].append(auc) | |||
recorded_data['acc'].append(acc) | |||
recorded_data['acc_hem'].append(acc_hem) | |||
recorded_data['acc_all'].append(acc_all) | |||
np.savez(f'{args.out}/results', **recorded_data) | |||
return f1 | |||
model = torch.nn.DataParallel(model) | |||
train_loss = np.nan | |||
best_val_f1 = logged_eval(0) | |||
for e in trange(args.epochs, desc='Epoch'): | |||
scheduler.step(e) | |||
train_loss = train(model, opt, train_loader, class_weights, args.device) | |||
val_f1 = logged_eval(e + 1) | |||
if val_f1 > best_val_f1: | |||
print(f"New best model at {val_f1:.6f}") | |||
torch.save(model.state_dict(), f'{args.out}/model.pt') | |||
best_val_f1 = val_f1 | |||
summarywriter.close() | |||
subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device) | |||
np.savez(f'{args.out}/subj_acc', **subj_acc) | |||
def evaluate_subj_acc(model, dataset, subjects, device): | |||
model.eval() | |||
subj_pred = defaultdict(list) | |||
subj_label = defaultdict(list) | |||
dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False) | |||
for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False): | |||
img, cls = to_device(device, img, cls) | |||
bs, nrot, c, h, w = img.size() | |||
with torch.no_grad(): | |||
cls_hat = model(img.view(-1, c, h, w)) | |||
cls_hat = cls_hat.view(bs, nrot).mean(1) | |||
subj_label[subj].append(cls.cpu()) | |||
subj_pred[subj].append(cls_hat.cpu()) | |||
for k in subj_label: | |||
subj_label[k] = torch.cat(subj_label[k]).numpy() | |||
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0 | |||
subj_acc = {} | |||
for k in subj_label: | |||
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k]) | |||
return subj_acc | |||
def train_test(args): | |||
model = get_model().to(args.device) | |||
print("Model parameters:", count_parameters(model)) | |||
trainset, class_weights = get_dataset(args.dataroot, folds_train=(0, 1, 2, 3), | |||
folds_valid=None, | |||
tf_train=get_tf_train_transform(args.res), | |||
tf_valid=get_tf_vaild_rot_transform(args.res)) | |||
class_weights = class_weights.to(args.device) | |||
print(f"Trainset length: {len(trainset)}") | |||
print(f"class_weights = {class_weights}") | |||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True) | |||
opt = torch.optim.Adam([ | |||
{'params': model.paramgroup01(), 'lr': 1e-6}, | |||
{'params': model.paramgroup234(), 'lr': 1e-4}, | |||
{'params': model.parameters_classifier(), 'lr': 1e-2}, | |||
]) | |||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e), | |||
lambda e: schedule(e), | |||
lambda e: schedule(e)]) | |||
model = torch.nn.DataParallel(model) | |||
for e in trange(args.epochs, desc='Epoch'): | |||
scheduler.step(e) | |||
train(model, opt, train_loader, class_weights, args.device) | |||
torch.save(model.state_dict(), f'{args.out}/model.pt') | |||
def parse_args(): | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--dataroot', default='data', help='path to dataset') | |||
parser.add_argument('--batch-size', type=int, default=16) | |||
parser.add_argument('--epochs', type=int, default=6) | |||
parser.add_argument('--seed', default=1, type=int, help='random seed') | |||
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu') | |||
parser.add_argument('--out', default='results', help='output folder') | |||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution') | |||
args = parser.parse_args() | |||
args.out = os.path.join(args.out, unique_string()) | |||
return args | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
print(args) | |||
os.makedirs(args.out, exist_ok=True) | |||
set_seeds(args.seed) | |||
torch.backends.cudnn.benchmark = True | |||
train_validate(args) |
@@ -0,0 +1,210 @@ | |||
import argparse | |||
import os | |||
from collections import defaultdict | |||
import numpy as np | |||
import torch | |||
import torch.nn.functional as F | |||
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score | |||
from tensorboardX import SummaryWriter | |||
from torch.optim.lr_scheduler import StepLR, LambdaLR | |||
from torch.utils.data import DataLoader | |||
from tqdm import tqdm, trange | |||
from dataset import get_dataset, get_tf_train_transform, get_tf_vaild_rot_transform | |||
from model import get_model | |||
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters | |||
def evaluate(model, valid_loader, class_weights, device): | |||
model.eval() | |||
all_labels = [] | |||
all_preds = [] | |||
loss_avg = IncrementalAverage() | |||
for img, label in tqdm(valid_loader, leave=False): | |||
img, label = to_device(device, img, label) | |||
bs, nrot, c, h, w = img.size() | |||
with torch.no_grad(): | |||
pred = model(img.view(-1, c, h, w)) | |||
pred = pred.view(bs, nrot).mean(1) | |||
loss = lossfn(pred, label.to(pred.dtype), class_weights) | |||
all_labels.append(label.cpu()) | |||
all_preds.append(pred.cpu()) | |||
loss_avg.update(loss.item()) | |||
all_labels = torch.cat(all_labels).numpy() | |||
all_preds = torch.cat(all_preds).numpy() | |||
all_preds_binary = all_preds > 0 | |||
cm = confusion_matrix(all_labels, all_preds_binary) | |||
auc = roc_auc_score(all_labels, all_preds) | |||
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted') | |||
return loss_avg.value, cm, auc, prec, rec, f1 | |||
def train(model, opt, train_loader, class_weights, device): | |||
model.train() | |||
loss_avg = IncrementalAverage() | |||
for img, label in tqdm(train_loader, leave=False): | |||
img, label = to_device(device, img, label) | |||
pred = model(img) | |||
pred = pred.view(-1) | |||
loss = lossfn(pred, label.to(pred.dtype), class_weights) | |||
loss_avg.update(loss.item()) | |||
opt.zero_grad() | |||
loss.backward() | |||
opt.step() | |||
return loss_avg.value | |||
def lossfn(prediction, target, class_weights): | |||
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target)) | |||
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight) | |||
def schedule(epoch): | |||
if epoch < 2: | |||
ub = 1 | |||
elif epoch < 4: | |||
ub = 0.1 | |||
else: | |||
ub = 0.01 | |||
return ub | |||
def train_validate(args): | |||
model = get_model().to(args.device) | |||
print("Model parameters:", count_parameters(model)) | |||
trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot, | |||
tf_train=get_tf_train_transform(args.res), | |||
tf_valid=get_tf_vaild_rot_transform(args.res)) | |||
class_weights = class_weights.to(args.device) | |||
print(f"Trainset length: {len(trainset)}") | |||
print(f"Validset length: {len(validset)}") | |||
print(f"class_weights = {class_weights}") | |||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True) | |||
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False) | |||
opt = torch.optim.Adam([ | |||
{'params': model.paramgroup01(), 'lr': args.lr}, | |||
{'params': model.paramgroup234(), 'lr': args.lr}, | |||
{'params': model.parameters_classifier(), 'lr': args.lr}, | |||
]) | |||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e), | |||
lambda e: schedule(e), | |||
lambda e: schedule(e)]) | |||
summarywriter = SummaryWriter(args.out) | |||
recorded_data = defaultdict(list) | |||
def logged_eval(e): | |||
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device) | |||
# Derive some accuracy metrics from confusion matrix | |||
tn, fp, fn, tp = cm.ravel() | |||
acc = (tp + tn) / cm.sum() | |||
acc_hem = tn / (tn + fp) | |||
acc_all = tp / (tp + fn) | |||
print(f"epoch={e} f1={f1:.4f}") | |||
summarywriter.add_scalar('loss/train', train_loss, e) | |||
summarywriter.add_scalar('loss/valid', valid_loss, e) | |||
summarywriter.add_scalar('cm/tn', tn, e) | |||
summarywriter.add_scalar('cm/fp', fp, e) | |||
summarywriter.add_scalar('cm/fn', fn, e) | |||
summarywriter.add_scalar('cm/tp', tp, e) | |||
summarywriter.add_scalar('metrics/precision', prec, e) | |||
summarywriter.add_scalar('metrics/recall', rec, e) | |||
summarywriter.add_scalar('metrics/f1', f1, e) | |||
summarywriter.add_scalar('metrics/auc', auc, e) | |||
summarywriter.add_scalar('acc/acc', acc, e) | |||
summarywriter.add_scalar('acc/hem', acc_hem, e) | |||
summarywriter.add_scalar('acc/all', acc_all, e) | |||
recorded_data['loss_train'].append(train_loss) | |||
recorded_data['loss_valid'].append(valid_loss) | |||
recorded_data['tn'].append(tn) | |||
recorded_data['tn'].append(tn) | |||
recorded_data['fp'].append(fp) | |||
recorded_data['fn'].append(fn) | |||
recorded_data['tp'].append(tp) | |||
recorded_data['precision'].append(prec) | |||
recorded_data['recall'].append(rec) | |||
recorded_data['f1'].append(f1) | |||
recorded_data['auc'].append(auc) | |||
recorded_data['acc'].append(acc) | |||
recorded_data['acc_hem'].append(acc_hem) | |||
recorded_data['acc_all'].append(acc_all) | |||
np.savez(f'{args.out}/results', **recorded_data) | |||
model = torch.nn.DataParallel(model) | |||
train_loss = np.nan | |||
logged_eval(0) | |||
for e in trange(args.epochs, desc='Epoch'): | |||
scheduler.step(e) | |||
train_loss = train(model, opt, train_loader, class_weights, args.device) | |||
logged_eval(e + 1) | |||
summarywriter.close() | |||
subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device) | |||
np.savez(f'{args.out}/subj_acc', **subj_acc) | |||
def evaluate_subj_acc(model, dataset, subjects, device): | |||
model.eval() | |||
subj_pred = defaultdict(list) | |||
subj_label = defaultdict(list) | |||
dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False) | |||
for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False): | |||
img, cls = to_device(device, img, cls) | |||
bs, nrot, c, h, w = img.size() | |||
with torch.no_grad(): | |||
cls_hat = model(img.view(-1, c, h, w)) | |||
cls_hat = cls_hat.view(bs, nrot).mean(1) | |||
subj_label[subj].append(cls.cpu()) | |||
subj_pred[subj].append(cls_hat.cpu()) | |||
for k in subj_label: | |||
subj_label[k] = torch.cat(subj_label[k]).numpy() | |||
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0 | |||
subj_acc = {} | |||
for k in subj_label: | |||
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k]) | |||
return subj_acc | |||
def parse_args(): | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--dataroot', default='data', help='path to dataset') | |||
parser.add_argument('--lr', type=float, default=1e-4) | |||
parser.add_argument('--batch-size', type=int, default=16) | |||
parser.add_argument('--epochs', type=int, default=6) | |||
parser.add_argument('--seed', default=1, type=int, help='random seed') | |||
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu') | |||
parser.add_argument('--out', default='results', help='output folder') | |||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution') | |||
args = parser.parse_args() | |||
args.out = os.path.join(args.out, unique_string()) | |||
return args | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
print(args) | |||
os.makedirs(args.out, exist_ok=True) | |||
set_seeds(args.seed) | |||
torch.backends.cudnn.benchmark = True | |||
train_validate(args) |
@@ -0,0 +1,208 @@ | |||
import argparse | |||
import os | |||
from collections import defaultdict | |||
import numpy as np | |||
import torch | |||
import torch.nn.functional as F | |||
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score | |||
from tensorboardX import SummaryWriter | |||
from torch.optim.lr_scheduler import StepLR, LambdaLR | |||
from torch.utils.data import DataLoader | |||
from tqdm import tqdm, trange | |||
from dataset import get_dataset, get_tf_valid_norot_transform, get_tf_train_transform | |||
from model import get_model | |||
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters | |||
def evaluate(model, valid_loader, class_weights, device): | |||
model.eval() | |||
all_labels = [] | |||
all_preds = [] | |||
loss_avg = IncrementalAverage() | |||
for img, label in tqdm(valid_loader, leave=False): | |||
img, label = to_device(device, img, label) | |||
with torch.no_grad(): | |||
pred = model(img).view(-1) | |||
loss = lossfn(pred, label.to(pred.dtype), class_weights) | |||
all_labels.append(label.cpu()) | |||
all_preds.append(pred.cpu()) | |||
loss_avg.update(loss.item()) | |||
all_labels = torch.cat(all_labels).numpy() | |||
all_preds = torch.cat(all_preds).numpy() | |||
all_preds_binary = all_preds > 0 | |||
cm = confusion_matrix(all_labels, all_preds_binary) | |||
auc = roc_auc_score(all_labels, all_preds) | |||
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted') | |||
return loss_avg.value, cm, auc, prec, rec, f1 | |||
def train(model, opt, train_loader, class_weights, device): | |||
model.train() | |||
loss_avg = IncrementalAverage() | |||
for img, label in tqdm(train_loader, leave=False): | |||
img, label = to_device(device, img, label) | |||
pred = model(img) | |||
pred = pred.view(-1) | |||
loss = lossfn(pred, label.to(pred.dtype), class_weights) | |||
loss_avg.update(loss.item()) | |||
opt.zero_grad() | |||
loss.backward() | |||
opt.step() | |||
return loss_avg.value | |||
def lossfn(prediction, target, class_weights): | |||
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target)) | |||
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight) | |||
def schedule(epoch): | |||
if epoch < 2: | |||
ub = 1 | |||
elif epoch < 4: | |||
ub = 0.1 | |||
else: | |||
ub = 0.01 | |||
return ub | |||
def train_validate(args): | |||
model = get_model().to(args.device) | |||
print("Model parameters:", count_parameters(model)) | |||
trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot, | |||
tf_valid=get_tf_valid_norot_transform(args.res), | |||
tf_train=get_tf_train_transform(args.res)) | |||
class_weights = class_weights.to(args.device) | |||
print(f"Trainset length: {len(trainset)}") | |||
print(f"Validset length: {len(validset)}") | |||
print(f"class_weights = {class_weights}") | |||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True) | |||
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False) | |||
opt = torch.optim.Adam([ | |||
{'params': model.paramgroup01(), 'lr': 1e-6}, | |||
{'params': model.paramgroup234(), 'lr': 1e-4}, | |||
{'params': model.parameters_classifier(), 'lr': 1e-2}, | |||
]) | |||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e), | |||
lambda e: schedule(e), | |||
lambda e: schedule(e)]) | |||
summarywriter = SummaryWriter(args.out) | |||
recorded_data = defaultdict(list) | |||
def logged_eval(e): | |||
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device) | |||
# Derive some accuracy metrics from confusion matrix | |||
tn, fp, fn, tp = cm.ravel() | |||
acc = (tp + tn) / cm.sum() | |||
acc_hem = tn / (tn + fp) | |||
acc_all = tp / (tp + fn) | |||
print(f"epoch={e} f1={f1:.4f}") | |||
summarywriter.add_scalar('loss/train', train_loss, e) | |||
summarywriter.add_scalar('loss/valid', valid_loss, e) | |||
summarywriter.add_scalar('cm/tn', tn, e) | |||
summarywriter.add_scalar('cm/fp', fp, e) | |||
summarywriter.add_scalar('cm/fn', fn, e) | |||
summarywriter.add_scalar('cm/tp', tp, e) | |||
summarywriter.add_scalar('metrics/precision', prec, e) | |||
summarywriter.add_scalar('metrics/recall', rec, e) | |||
summarywriter.add_scalar('metrics/f1', f1, e) | |||
summarywriter.add_scalar('metrics/auc', auc, e) | |||
summarywriter.add_scalar('acc/acc', acc, e) | |||
summarywriter.add_scalar('acc/hem', acc_hem, e) | |||
summarywriter.add_scalar('acc/all', acc_all, e) | |||
recorded_data['loss_train'].append(train_loss) | |||
recorded_data['loss_valid'].append(valid_loss) | |||
recorded_data['tn'].append(tn) | |||
recorded_data['tn'].append(tn) | |||
recorded_data['fp'].append(fp) | |||
recorded_data['fn'].append(fn) | |||
recorded_data['tp'].append(tp) | |||
recorded_data['precision'].append(prec) | |||
recorded_data['recall'].append(rec) | |||
recorded_data['f1'].append(f1) | |||
recorded_data['auc'].append(auc) | |||
recorded_data['acc'].append(acc) | |||
recorded_data['acc_hem'].append(acc_hem) | |||
recorded_data['acc_all'].append(acc_all) | |||
np.savez(f'{args.out}/results', **recorded_data) | |||
model = torch.nn.DataParallel(model) | |||
train_loss = np.nan | |||
logged_eval(0) | |||
for e in trange(args.epochs, desc='Epoch'): | |||
scheduler.step(e) | |||
train_loss = train(model, opt, train_loader, class_weights, args.device) | |||
logged_eval(e + 1) | |||
torch.save(model.state_dict(), f'{args.out}/model.pt') | |||
summarywriter.close() | |||
subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device) | |||
np.savez(f'{args.out}/subj_acc', **subj_acc) | |||
def evaluate_subj_acc(model, dataset, subjects, device): | |||
model.eval() | |||
subj_pred = defaultdict(list) | |||
subj_label = defaultdict(list) | |||
dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False) | |||
for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False): | |||
img, cls = to_device(device, img, cls) | |||
bs, nrot, c, h, w = img.size() | |||
with torch.no_grad(): | |||
cls_hat = model(img.view(-1, c, h, w)) | |||
cls_hat = cls_hat.view(bs, nrot).mean(1) | |||
subj_label[subj].append(cls.cpu()) | |||
subj_pred[subj].append(cls_hat.cpu()) | |||
for k in subj_label: | |||
subj_label[k] = torch.cat(subj_label[k]).numpy() | |||
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0 | |||
subj_acc = {} | |||
for k in subj_label: | |||
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k]) | |||
return subj_acc | |||
def parse_args(): | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--dataroot', default='data', help='path to dataset') | |||
parser.add_argument('--batch-size', type=int, default=16) | |||
parser.add_argument('--epochs', type=int, default=6) | |||
parser.add_argument('--seed', default=1, type=int, help='random seed') | |||
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu') | |||
parser.add_argument('--out', default='results', help='output folder') | |||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution') | |||
args = parser.parse_args() | |||
args.out = os.path.join(args.out, unique_string()) | |||
return args | |||
if __name__ == '__main__': | |||
args = parse_args() | |||
print(args) | |||
os.makedirs(args.out, exist_ok=True) | |||
set_seeds(args.seed) | |||
torch.backends.cudnn.benchmark = True | |||
train_validate(args) |
@@ -0,0 +1,183 @@ | |||
# Code adapted from: https://github.com/Cadene/pretrained-models.pytorch | |||
import math | |||
from collections import OrderedDict | |||
from itertools import chain | |||
import torch.nn as nn | |||
from torch.utils import model_zoo | |||
from utils import Flatten | |||
class SEModule(nn.Module): | |||
def __init__(self, channels, reduction): | |||
super(SEModule, self).__init__() | |||
self.avg_pool = nn.AdaptiveAvgPool2d(1) | |||
self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0) | |||
self.sigmoid = nn.Sigmoid() | |||
def forward(self, x): | |||
module_input = x | |||
x = self.avg_pool(x) | |||
x = self.fc1(x) | |||
x = self.relu(x) | |||
x = self.fc2(x) | |||
x = self.sigmoid(x) | |||
return module_input * x | |||
class SEResNeXtBottleneck(nn.Module): | |||
""" | |||
ResNeXt bottleneck type C with a Squeeze-and-Excitation module. | |||
""" | |||
expansion = 4 | |||
def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None, base_width=4): | |||
super(SEResNeXtBottleneck, self).__init__() | |||
width = math.floor(planes * (base_width / 64)) * groups | |||
self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, stride=1) | |||
self.bn1 = nn.BatchNorm2d(width) | |||
self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False) | |||
self.bn2 = nn.BatchNorm2d(width) | |||
self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False) | |||
self.bn3 = nn.BatchNorm2d(planes * 4) | |||
self.relu = nn.ReLU(inplace=True) | |||
self.se_module = SEModule(planes * 4, reduction=reduction) | |||
self.downsample = downsample | |||
self.stride = stride | |||
def forward(self, x): | |||
residual = x | |||
out = self.conv1(x) | |||
out = self.bn1(out) | |||
out = self.relu(out) | |||
out = self.conv2(out) | |||
out = self.bn2(out) | |||
out = self.relu(out) | |||
out = self.conv3(out) | |||
out = self.bn3(out) | |||
if self.downsample is not None: | |||
residual = self.downsample(x) | |||
out = self.se_module(out) + residual | |||
out = self.relu(out) | |||
return out | |||
class SENet(nn.Module): | |||
def __init__(self, block, layers, groups, reduction, inplanes=128, | |||
downsample_kernel_size=3, downsample_padding=1): | |||
super(SENet, self).__init__() | |||
self.inplanes = inplanes | |||
layer0_modules = [ | |||
('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)), | |||
('bn1', nn.BatchNorm2d(inplanes)), | |||
('relu1', nn.ReLU(inplace=True)), | |||
# To preserve compatibility with Caffe weights `ceil_mode=True` | |||
# is used instead of `padding=1`. | |||
('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True)) | |||
] | |||
self.layer0 = nn.Sequential(OrderedDict(layer0_modules)) | |||
self.layer1 = self._make_layer( | |||
block, | |||
planes=64, | |||
blocks=layers[0], | |||
groups=groups, | |||
reduction=reduction, | |||
downsample_kernel_size=1, | |||
downsample_padding=0 | |||
) | |||
self.layer2 = self._make_layer( | |||
block, | |||
planes=128, | |||
blocks=layers[1], | |||
stride=2, | |||
groups=groups, | |||
reduction=reduction, | |||
downsample_kernel_size=downsample_kernel_size, | |||
downsample_padding=downsample_padding | |||
) | |||
self.layer3 = self._make_layer( | |||
block, | |||
planes=256, | |||
blocks=layers[2], | |||
stride=2, | |||
groups=groups, | |||
reduction=reduction, | |||
downsample_kernel_size=downsample_kernel_size, | |||
downsample_padding=downsample_padding | |||
) | |||
self.layer4 = self._make_layer( | |||
block, | |||
planes=512, | |||
blocks=layers[3], | |||
stride=2, | |||
groups=groups, | |||
reduction=reduction, | |||
downsample_kernel_size=downsample_kernel_size, | |||
downsample_padding=downsample_padding | |||
) | |||
self.cls = nn.Sequential( | |||
nn.AdaptiveAvgPool2d(1), | |||
Flatten(), | |||
nn.Linear(512 * block.expansion, 1) | |||
) | |||
def _make_layer(self, block, planes, blocks, groups, reduction, stride=1, | |||
downsample_kernel_size=1, downsample_padding=0): | |||
downsample = None | |||
if stride != 1 or self.inplanes != planes * block.expansion: | |||
downsample = nn.Sequential( | |||
nn.Conv2d(self.inplanes, planes * block.expansion, | |||
kernel_size=downsample_kernel_size, stride=stride, | |||
padding=downsample_padding, bias=False), | |||
nn.BatchNorm2d(planes * block.expansion), | |||
) | |||
layers = [block(self.inplanes, planes, groups, reduction, stride, downsample)] | |||
self.inplanes = planes * block.expansion | |||
for i in range(1, blocks): | |||
layers.append(block(self.inplanes, planes, groups, reduction)) | |||
return nn.Sequential(*layers) | |||
def paramgroup01(self): | |||
return chain( | |||
self.layer0.parameters(), | |||
self.layer1.parameters(), | |||
) | |||
def paramgroup234(self): | |||
return chain( | |||
self.layer2.parameters(), | |||
self.layer3.parameters(), | |||
self.layer4.parameters(), | |||
) | |||
def parameters_classifier(self): | |||
return self.cls.parameters() | |||
def forward(self, x): | |||
x = self.layer0(x) | |||
x = self.layer1(x) | |||
x = self.layer2(x) | |||
x = self.layer3(x) | |||
x = self.layer4(x) | |||
c = self.cls(x) | |||
return c | |||
def get_model(): | |||
model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, inplanes=64, | |||
downsample_kernel_size=1, downsample_padding=0) | |||
checkpoint = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth') | |||
model.load_state_dict(checkpoint, strict=False) | |||
return model |
@@ -0,0 +1,166 @@ | |||
from glob import glob | |||
import numpy as np | |||
import matplotlib.pyplot as plt | |||
from os.path import join | |||
from scipy.stats import mannwhitneyu | |||
dataroots = { | |||
'PROPOSAL' : 'results', | |||
#'model_cnmc_res_128' : 'results/model_cnmc_res_128', | |||
#'model_cnmc_res_224' : 'results/model_cnmc_res_224', | |||
#'model_cnmc_res_256' : 'results/model_cnmc_res_256', | |||
#'model_cnmc_res_450' : 'results/model_cnmc_res_450', | |||
#'model_cnmc_res_450_blue_only' : 'results/model_cnmc_res_450_blue_only', | |||
#'model_cnmc_res_450_green_only' : 'results/model_cnmc_res_450_green_only', | |||
#'model_cnmc_res_450_red_only' : 'results/model_cnmc_res_450_red_only', | |||
#'model_cnmc_res_450_no_blue' : 'results/model_cnmc_res_450_no_blue', | |||
#'model_cnmc_res_450_no_green' : 'results/model_cnmc_res_450_no_green', | |||
#'model_cnmc_res_450_no_red' : 'results/model_cnmc_res_450_no_red', | |||
#'model_cnmc_res_450_grayscale' : 'results/model_cnmc_res_450_grayscale', | |||
} | |||
def get_values(dataroot, key): | |||
npzs = list(glob(join(dataroot, '*', 'results.npz'))) | |||
vals = [] | |||
for f in npzs: | |||
recorded_data = np.load(f) | |||
val = recorded_data[key] | |||
vals.append(val) | |||
vals = np.stack(vals, 0) | |||
return vals | |||
def plot_mean_std(dataroot, key, ax, **kwargs): | |||
vals = get_values(dataroot, key) | |||
mean = np.mean(vals, 0) | |||
std = np.std(vals, 0) | |||
epochs = np.arange(len(mean)) | |||
# Offset by 1 so that we have nicely zoomed plots | |||
mean = mean[1:] | |||
std = std[1:] | |||
epochs = epochs[1:] | |||
ax.plot(epochs, mean, **kwargs) | |||
ax.fill_between(epochs, mean - std, mean + std, alpha=0.2) | |||
def plot3(key, ax): | |||
for k, v in dataroots.items(): | |||
plot_mean_std(v, key, ax, label=k) | |||
def print_final_min_mean_max(dataroot, key, model_epochs): | |||
vals = get_values(dataroot, key) * 100 | |||
vals = vals[np.arange(len(vals)), model_epochs] | |||
min = np.min(vals) | |||
mean = np.mean(vals) | |||
std = np.std(vals) | |||
max = np.max(vals) | |||
print(f'{min:.2f}', f'{mean:.2f} ± {std:.2f}', f'{max:.2f}', sep='\t') | |||
def print_final_table(dataroot): | |||
best_model_epochs = np.argmax(get_values(dataroot, 'f1'), axis=1) | |||
print_final_min_mean_max(dataroot, 'acc', best_model_epochs) | |||
print_final_min_mean_max(dataroot, 'acc_all', best_model_epochs) | |||
print_final_min_mean_max(dataroot, 'acc_hem', best_model_epochs) | |||
print_final_min_mean_max(dataroot, 'f1', best_model_epochs) | |||
print_final_min_mean_max(dataroot, 'precision', best_model_epochs) | |||
print_final_min_mean_max(dataroot, 'recall', best_model_epochs) | |||
def get_best_f1_scores(dataroot): | |||
f1_scores = get_values(dataroot, 'f1') | |||
best_model_epochs = np.argmax(f1_scores, axis=1) | |||
return f1_scores[np.arange(len(f1_scores)), best_model_epochs] | |||
def is_statistically_greater(dataroot1, dataroot2): | |||
# Tests if F1-score of dataroot1 is greater than dataroot2 | |||
a = get_best_f1_scores(dataroot1) | |||
b = get_best_f1_scores(dataroot2) | |||
u, p = mannwhitneyu(a, b, alternative='greater') | |||
return u, p | |||
###### | |||
for k, v in dataroots.items(): | |||
print(k) | |||
print_final_table(v) | |||
print() | |||
###### | |||
#print("MWU-Test of PROPOSAL > NOSPECLR") | |||
#print(is_statistically_greater(dataroots['PROPOSAL'], dataroots['NOSPECLR'])) | |||
#print() | |||
#print("MWU-Test of PROPOSAL > NOROT") | |||
#print(is_statistically_greater(dataroots['PROPOSAL'], dataroots['NOROT'])) | |||
###### | |||
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(9, 5)) | |||
ax[0, 0].set_title('Accuracy') | |||
plot3('acc', ax[0, 0]) | |||
ax[0, 1].set_title('Sensitivity') | |||
plot3('acc_all', ax[0, 1]) | |||
ax[0, 2].set_title('Specificity') | |||
plot3('acc_hem', ax[0, 2]) | |||
ax[1, 0].set_title('F1 score') | |||
plot3('f1', ax[1, 0]) | |||
ax[1, 1].set_title('Precision') | |||
plot3('precision', ax[1, 1]) | |||
ax[1, 2].set_title('Recall') | |||
plot3('recall', ax[1, 2]) | |||
fig.legend(loc='lower center', ncol=3) | |||
fig.tight_layout() | |||
fig.subplots_adjust(bottom=0.12) | |||
fig.savefig('results/plot_ablations.pdf') | |||
###### | |||
npload= 'results/model_cnmc_res_128' | |||
npload_sub=npload + '/subj_acc.npz' | |||
npload_res=npload + '/results.npz' | |||
subj_acc = np.load(npload_sub) | |||
subj = list(sorted(subj_acc.keys())) | |||
acc = [subj_acc[k] for k in subj] | |||
fig, ax = plt.subplots(figsize=(9, 2)) | |||
ax.bar(range(len(acc)), acc, width=0.3, tick_label=subj) | |||
fig.tight_layout() | |||
fig.savefig('results/plot_subj_acc.pdf') | |||
###### | |||
data = np.load(npload_res) | |||
loss_train = data['loss_train'] | |||
loss_valid = data['loss_valid'][1:] | |||
f1_valid = data['f1'][1:] | |||
fig, ax = plt.subplots(ncols=3, figsize=(9, 2)) | |||
ax[0].plot(range(len(loss_train)), loss_train) | |||
ax[0].set_title("Training set loss") | |||
ax[1].plot(range(1, len(loss_valid) + 1), loss_valid) | |||
ax[1].set_title("Preliminary test set loss") | |||
ax[2].plot(range(1, len(f1_valid) + 1), f1_valid) | |||
ax[2].set_title("Preliminary test set F1-score") | |||
fig.tight_layout() | |||
fig.savefig('results/plot_curves.pdf') | |||
###### | |||
plt.show() |
@@ -0,0 +1,679 @@ | |||
{ | |||
"cells": [ | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "cd8aaf96", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"!pip install pandas tqdm" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "26bd5e25", | |||
"metadata": { | |||
"scrolled": true | |||
}, | |||
"outputs": [], | |||
"source": [ | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 55, | |||
"id": "b753e6b8", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:41, 1.43it/s] \n", | |||
"Positive: 1234\n", | |||
"Negative: 633\n", | |||
"AUC: 0.8797024225483345\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/20220216T154306Z.AZHL\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "3246460b", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC\n", | |||
"# res : 32\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 32" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "8a953a39", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC\n", | |||
"# res : 128\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 128" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "12c15b33", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC\n", | |||
"# res : 224\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 224" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "08ba15b4", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC\n", | |||
"# res : 256\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 256" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "3cf25ec3", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "73b9d9d3", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC_Grayscale\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_grayscale\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "ce16353c", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC_no_red\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_red\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "959ab837", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC_no_green\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_green\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "879beb46", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC_no_blue\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_blue\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "6d545dce", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC_red_only\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_red_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "25480226", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC_green_only\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_green_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "a064d169", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC_blue_only\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_blue_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "8d53828a", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [ | |||
"# TRAIN\n", | |||
"# dataset : CNMC\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 183, | |||
"id": "ea9c2f23", | |||
"metadata": {}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"PROPOSAL\n", | |||
"68.51\t83.57 ± 5.16\t89.61\n", | |||
"84.33\t89.06 ± 2.09\t92.95\n", | |||
"38.73\t73.26 ± 11.77\t84.72\n", | |||
"66.76\t83.35 ± 5.61\t89.57\n", | |||
"66.81\t83.36 ± 5.60\t89.55\n", | |||
"68.51\t83.57 ± 5.16\t89.61\n", | |||
"\n", | |||
"Figure(900x500)\n", | |||
"Figure(900x200)\n", | |||
"Figure(900x200)\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# PLOT\n", | |||
"# dataset : CNMC\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 plot.py" | |||
] | |||
}, | |||
{ | |||
"cell_type": "markdown", | |||
"id": "8c92073d", | |||
"metadata": {}, | |||
"source": [ | |||
"# EVALUATION" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 135, | |||
"id": "b25a4267", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:05, 11.69it/s] \n", | |||
"Positive: 1425\n", | |||
"Negative: 442\n", | |||
"AUC: 0.6153299354864846\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION \n", | |||
"# dataset : CNMC\n", | |||
"# res : 32\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_32\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 32" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 136, | |||
"id": "b14e3e67", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:09, 6.24it/s] \n", | |||
"Positive: 1315\n", | |||
"Negative: 552\n", | |||
"AUC: 0.7711131113339208\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC\n", | |||
"# res : 128\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_128\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 128" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 137, | |||
"id": "dfb25744", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:14, 4.19it/s] \n", | |||
"Positive: 1262\n", | |||
"Negative: 605\n", | |||
"AUC: 0.8143717274835677\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC\n", | |||
"# res : 224\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_224\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 224" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 138, | |||
"id": "68600db4", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:41, 1.44it/s] \n", | |||
"Positive: 1195\n", | |||
"Negative: 672\n", | |||
"AUC: 0.8400701597139936\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC\n", | |||
"# res : 256\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_256\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 256" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 139, | |||
"id": "71a5547e", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:41, 1.42it/s] \n", | |||
"Positive: 1241\n", | |||
"Negative: 626\n", | |||
"AUC: 0.8813918512441892\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 154, | |||
"id": "58450362", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:41, 1.42it/s] \n", | |||
"Positive: 1261\n", | |||
"Negative: 606\n", | |||
"AUC: 0.8045073375262055\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC_Grayscale\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_grayscale\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_grayscale/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 155, | |||
"id": "48c40f18", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:44, 1.33it/s] \n", | |||
"Positive: 1178\n", | |||
"Negative: 689\n", | |||
"AUC: 0.8661869929814967\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC_no_red\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_red\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_red/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 156, | |||
"id": "b6ad9232", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:52, 1.12it/s] \n", | |||
"Positive: 1266\n", | |||
"Negative: 601\n", | |||
"AUC: 0.8018310900454735\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC_no_green\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_green\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_green/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 157, | |||
"id": "1ba76d51", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:48, 1.23it/s] \n", | |||
"Positive: 1248\n", | |||
"Negative: 619\n", | |||
"AUC: 0.8570821813062721\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC_no_blue\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_blue\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_blue/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 158, | |||
"id": "05cfaf9c", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:52, 1.12it/s] \n", | |||
"Positive: 1239\n", | |||
"Negative: 628\n", | |||
"AUC: 0.8013924335875389\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC_red_only\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_red_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_red_only/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 159, | |||
"id": "1ad09456", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:52, 1.13it/s] \n", | |||
"Positive: 1221\n", | |||
"Negative: 646\n", | |||
"AUC: 0.8590070792695896\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC_green_only\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_green_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_green_only/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 160, | |||
"id": "41e8d3a0", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [00:52, 1.12it/s] \n", | |||
"Positive: 1255\n", | |||
"Negative: 612\n", | |||
"AUC: 0.8268636253152251\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC_blue_only\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_blue_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_blue_only/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": 186, | |||
"id": "88bc18db", | |||
"metadata": { | |||
"scrolled": false | |||
}, | |||
"outputs": [ | |||
{ | |||
"name": "stdout", | |||
"output_type": "stream", | |||
"text": [ | |||
"Loading model\n", | |||
"Classifying\n", | |||
"59it [01:24, 1.43s/it] \n", | |||
"Positive: 1235\n", | |||
"Negative: 632\n", | |||
"AUC: 0.8588406050294211\n" | |||
] | |||
} | |||
], | |||
"source": [ | |||
"# EVALUATION\n", | |||
"# dataset : CNMC-blackborder\n", | |||
"# res : 450\n", | |||
"# epochs : 100\n", | |||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_w_blackborder\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 450" | |||
] | |||
}, | |||
{ | |||
"cell_type": "code", | |||
"execution_count": null, | |||
"id": "ec31125a", | |||
"metadata": {}, | |||
"outputs": [], | |||
"source": [] | |||
} | |||
], | |||
"metadata": { | |||
"kernelspec": { | |||
"display_name": "Python 3", | |||
"language": "python", | |||
"name": "python3" | |||
}, | |||
"language_info": { | |||
"codemirror_mode": { | |||
"name": "ipython", | |||
"version": 3 | |||
}, | |||
"file_extension": ".py", | |||
"mimetype": "text/x-python", | |||
"name": "python", | |||
"nbconvert_exporter": "python", | |||
"pygments_lexer": "ipython3", | |||
"version": "3.6.9" | |||
} | |||
}, | |||
"nbformat": 4, | |||
"nbformat_minor": 5 | |||
} |
@@ -0,0 +1,92 @@ | |||
import argparse | |||
import os | |||
import zipfile | |||
from os.path import join | |||
import torch | |||
from PIL import Image | |||
from torch.utils.data import DataLoader, Dataset | |||
from tqdm import tqdm | |||
import numpy as np | |||
from model import get_model | |||
from dataset import get_tf_vaild_rot_transform | |||
from sklearn import metrics | |||
import matplotlib.pyplot as plt | |||
import csv | |||
from sklearn.metrics import roc_curve, roc_auc_score | |||
class OrderedImages(Dataset): | |||
def __init__(self, root, transform): | |||
super().__init__() | |||
self.root = root | |||
self.transform = transform | |||
def __len__(self): | |||
return 1867 | |||
def __getitem__(self, index): | |||
img = Image.open(os.path.join(self.root, f'{index + 1}.bmp'))#.convert('RGB') | |||
return self.transform(img) | |||
VALIDATION_ALL = 1219 | |||
VALIDATION_HEM = 648 | |||
parser = argparse.ArgumentParser() | |||
parser.add_argument('--batch-size', type=int, default=64) | |||
parser.add_argument('--modelroot', default='results/20190313T101236Z.LGJL', help='path to model') | |||
parser.add_argument('--dataroot', default='data/phase3', help='path to dataset') | |||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution') | |||
args = parser.parse_args() | |||
dataset = OrderedImages(args.dataroot, get_tf_vaild_rot_transform(args.res)) | |||
print(f"Loading model") | |||
model = get_model().to('cuda:0') | |||
model = torch.nn.DataParallel(model) | |||
model.load_state_dict(torch.load(join(args.modelroot, 'model.pt'))) | |||
model.eval() | |||
dataloader = DataLoader(dataset, batch_size=args.batch_size, num_workers=6) | |||
print("Classifying") | |||
all_labels = [] | |||
for x in tqdm(dataloader, total=len(dataset) // args.batch_size): | |||
x = x.to('cuda:0') | |||
bs, nrot, c, h, w = x.size() | |||
with torch.no_grad(): | |||
y = model(x.view(-1, c, h, w)) | |||
y = y.view(bs, nrot).mean(1) | |||
labels = y > 0 | |||
all_labels.append(labels) | |||
all_labels = torch.cat(all_labels) | |||
print("Positive:", all_labels.sum().item()) | |||
print("Negative:", len(all_labels) - all_labels.sum().item()) | |||
file_w = open(r'/home/feoktistovar67431/data/resources/phase2_labels.csv', "r") | |||
true_labels = [] | |||
reader = csv.reader(file_w, delimiter=',') | |||
for row in reader: | |||
true_labels.append(row) | |||
print(f'AUC: {roc_auc_score(true_labels, all_labels.cpu())}') # Zeige Flaeche unter der Kurve an | |||
#print("Accuracy", metrics.accuracy_score(y_test, y_pred)) | |||
#import matplotlib.pyplot as plt | |||
#import numpy as np | |||
#x = # false_positive_rate | |||
#y = # true_positive_rate | |||
# This is the ROC curve | |||
#plt.plot(x,y) | |||
#plt.show() | |||
# This is the AUC | |||
#auc = np.trapz(y,x) | |||
csv_path = join(args.modelroot, 'submission.csv') | |||
zip_path = join(args.modelroot, 'submission.zip') | |||
np.savetxt(csv_path, all_labels.cpu().numpy(), '%d') | |||
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf: | |||
zipf.write(csv_path, 'isbi_valid.predict') |
@@ -0,0 +1,58 @@ | |||
import pickle | |||
import random | |||
import string | |||
from datetime import datetime | |||
import torch | |||
import torch.nn as nn | |||
class IncrementalAverage: | |||
def __init__(self): | |||
self.value = 0 | |||
self.counter = 0 | |||
def update(self, x): | |||
self.counter += 1 | |||
self.value += (x - self.value) / self.counter | |||
class Flatten(nn.Module): | |||
def forward(self, x): | |||
return x.view(x.size(0), -1) | |||
class SizePrinter(nn.Module): | |||
def forward(self, x): | |||
print(x.size()) | |||
return x | |||
def count_parameters(model, grad_only=True): | |||
return sum(p.numel() for p in model.parameters() if not grad_only or p.requires_grad) | |||
def to_device(device, *tensors): | |||
return tuple(x.to(device) for x in tensors) | |||
def loop_iter(iter): | |||
while True: | |||
for item in iter: | |||
yield item | |||
def unique_string(): | |||
return '{}.{}'.format(datetime.now().strftime('%Y%m%dT%H%M%SZ'), | |||
''.join(random.choice(string.ascii_uppercase) for _ in range(4))) | |||
def set_seeds(seed): | |||
random.seed(seed) | |||
torch.manual_seed(seed) | |||
torch.cuda.manual_seed_all(seed) | |||
def pickle_dump(obj, file): | |||
with open(file, 'wb') as f: | |||
pickle.dump(obj, f) |