init
This commit is contained in:
commit
c7a6997e9f
4395
.ipynb_checkpoints/run-checkpoint.ipynb
Normal file
4395
.ipynb_checkpoints/run-checkpoint.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
35
README.md
Normal file
35
README.md
Normal file
@ -0,0 +1,35 @@
|
||||
# C-NMC Challenge
|
||||
|
||||
This is the code release for the paper:
|
||||
|
||||
Prellberg J., Kramer O. (2019) Acute Lymphoblastic Leukemia Classification from Microscopic Images Using Convolutional Neural Networks. In: Gupta A., Gupta R. (eds) ISBI 2019 C-NMC Challenge: Classification in Cancer Cell Imaging. Lecture Notes in Bioengineering. Springer, Singapore
|
||||
|
||||
## Usage
|
||||
|
||||
Use the script `main_manual.py` to train the model on the dataset. The expected training data layout is described below.
|
||||
|
||||
Use the script `submission.py` to apply the trained model to the test data.
|
||||
|
||||
## Data Layout
|
||||
|
||||
The training data during the challenge was released in multiple steps which is why the data layout is a little peculiar.
|
||||
|
||||
```
|
||||
data/fold_0/all/*.bmp
|
||||
data/fold_0/hem/*.bmp
|
||||
data/fold_1/...
|
||||
data/fold_2/...
|
||||
data/phase2/*.bmp
|
||||
data/phase3/*.bmp
|
||||
data/phase2.csv
|
||||
```
|
||||
|
||||
The `fold_0` to `fold_2` folders contain the training images with two subdirectories for the two classes each. The directories `phase2` and `phase3` are the preliminary test-set and test-set respectively and contain images numbered starting from `1.bmp`. The labels for the preliminary test-set are specified in `phase2.csv` which looks as follows:
|
||||
|
||||
```
|
||||
Patient_ID,new_names,labels
|
||||
UID_57_29_1_all.bmp,1.bmp,1
|
||||
UID_57_22_2_all.bmp,2.bmp,1
|
||||
UID_57_31_3_all.bmp,3.bmp,1
|
||||
UID_H49_35_1_hem.bmp,4.bmp,0
|
||||
```
|
184
dataset.py
Normal file
184
dataset.py
Normal file
@ -0,0 +1,184 @@
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from glob import glob
|
||||
from os.path import join
|
||||
|
||||
import pandas as pd
|
||||
import torch
|
||||
import torchvision.transforms.functional as TF
|
||||
from PIL import Image
|
||||
from torch.utils.data import Dataset
|
||||
from torchvision import transforms
|
||||
|
||||
STD_RES = 450
|
||||
STD_CENTER_CROP = 300
|
||||
|
||||
def file_iter(dataroot):
|
||||
for file in glob(join(dataroot, '*', '*', '*')):
|
||||
yield file
|
||||
|
||||
|
||||
def file_match_iter(dataroot):
|
||||
pattern = re.compile(r'(?P<file>.*(?P<fold>[a-zA-Z0-9_]+)/'
|
||||
r'(?P<class>hem|all)/'
|
||||
r'UID_(?P<subject>H?\d+)_(?P<image>\d+)_(?P<cell>\d+)_(all|hem).bmp)')
|
||||
for file in file_iter(dataroot):
|
||||
match = pattern.match(file)
|
||||
if match is not None:
|
||||
yield file, match
|
||||
|
||||
|
||||
def to_dataframe(dataroot):
|
||||
data = defaultdict(list)
|
||||
keys = ['file', 'fold', 'subject', 'class', 'image', 'cell']
|
||||
|
||||
# Load data from the three training folds
|
||||
for file, match in file_match_iter(dataroot):
|
||||
for key in keys:
|
||||
data[key].append(match.group(key))
|
||||
|
||||
# Load data from the phase2 validation set
|
||||
phase2 = pd.read_csv(join(dataroot, 'phase2.csv'), header=0, names=['file_id', 'file', 'class'])
|
||||
pattern = re.compile(r'UID_(?P<subject>H?\d+)_(?P<image>\d+)_(?P<cell>\d+)_(all|hem).bmp')
|
||||
for i, row in phase2.iterrows():
|
||||
match = pattern.match(row['file_id'])
|
||||
data['file'].append(join(dataroot, f'phase2/{i+1}.bmp'))
|
||||
data['fold'].append('3')
|
||||
data['subject'].append(match.group('subject'))
|
||||
data['class'].append('hem' if row['class'] == 0 else 'all')
|
||||
data['image'].append(match.group('image'))
|
||||
data['cell'].append(match.group('cell'))
|
||||
|
||||
# Convert to dataframe
|
||||
df = pd.DataFrame(data)
|
||||
df = df.apply(pd.to_numeric, errors='ignore')
|
||||
return df
|
||||
|
||||
|
||||
class ISBI2019(Dataset):
|
||||
def __init__(self, df, transform=None):
|
||||
super().__init__()
|
||||
self.transform = transform
|
||||
self.df = df
|
||||
|
||||
def __len__(self):
|
||||
return len(self.df)
|
||||
|
||||
def __getitem__(self, index):
|
||||
# Convert tensors to int because pandas screws up otherwise
|
||||
index = int(index)
|
||||
file, cls = self.df.iloc[index][['file', 'class']]
|
||||
img = Image.open(file)#.convert('RGB')
|
||||
cls = 0 if cls == 'hem' else 1
|
||||
if self.transform is not None:
|
||||
img = self.transform(img)
|
||||
return img, cls
|
||||
|
||||
|
||||
def get_class_weights(df):
|
||||
class_weights = torch.FloatTensor([
|
||||
df.loc[df['class'] == 'hem']['file'].count() / len(df),
|
||||
df.loc[df['class'] == 'all']['file'].count() / len(df),
|
||||
]).to(dtype=torch.float32)
|
||||
return class_weights
|
||||
|
||||
|
||||
def tf_rotation_stack(x, num_rotations=8):
|
||||
xs = []
|
||||
for i in range(num_rotations):
|
||||
angle = 360 * i / num_rotations
|
||||
xrot = TF.rotate(x, angle)
|
||||
xrot = TF.to_tensor(xrot)
|
||||
xs.append(xrot)
|
||||
xs = torch.stack(xs)
|
||||
return xs
|
||||
|
||||
|
||||
def get_tf_train_transform(res):
|
||||
size_factor = int(STD_RES/res)
|
||||
center_crop = int(STD_CENTER_CROP/size_factor)
|
||||
tf_train = transforms.Compose([
|
||||
transforms.Resize(res),
|
||||
#transforms.CenterCrop(center_crop),
|
||||
transforms.RandomVerticalFlip(),
|
||||
transforms.RandomHorizontalFlip(),
|
||||
transforms.RandomAffine(degrees=360, translate=(0.2, 0.2)),
|
||||
# transforms.Lambda(tf_rotation_stack),
|
||||
transforms.ToTensor(),
|
||||
])
|
||||
return tf_train
|
||||
|
||||
|
||||
def get_tf_vaild_rot_transform(res):
|
||||
size_factor = int(STD_RES/res)
|
||||
center_crop = int(STD_CENTER_CROP/size_factor)
|
||||
tf_valid_rot = transforms.Compose([
|
||||
transforms.Resize(res),
|
||||
#transforms.CenterCrop(center_crop),
|
||||
transforms.Lambda(tf_rotation_stack),
|
||||
])
|
||||
return tf_valid_rot
|
||||
|
||||
|
||||
def get_tf_valid_norot_transform(res):
|
||||
size_factor = int(STD_RES/res)
|
||||
center_crop = int(STD_CENTER_CROP/size_factor)
|
||||
tf_valid_norot = transforms.Compose([
|
||||
transforms.Resize(res),
|
||||
#transforms.CenterCrop(center_crop),
|
||||
transforms.ToTensor(),
|
||||
])
|
||||
return tf_valid_norot
|
||||
|
||||
|
||||
def get_dataset(dataroot, folds_train=(0, 1, 2), folds_valid=(3,), tf_train=None, tf_valid=None):
|
||||
if tf_train is None or tf_valid is None:
|
||||
sys.exit("Tranformation is None")
|
||||
df = to_dataframe(dataroot)
|
||||
df_trainset = df.loc[df['fold'].isin(folds_train)]
|
||||
trainset = ISBI2019(df_trainset, transform=tf_train)
|
||||
class_weights = get_class_weights(df_trainset)
|
||||
|
||||
if folds_valid is not None:
|
||||
df_validset = df.loc[df['fold'].isin(folds_valid)]
|
||||
validset_subjects = df_validset['subject'].values
|
||||
validset = ISBI2019(df_validset, transform=tf_valid)
|
||||
return trainset, validset, validset_subjects, class_weights
|
||||
else:
|
||||
return trainset, class_weights
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import math
|
||||
from tqdm import tqdm
|
||||
|
||||
df = to_dataframe('data')
|
||||
print(df)
|
||||
print("Examples by fold and class")
|
||||
print(df.groupby(['fold', 'class'])['file'].count())
|
||||
|
||||
dataset = ISBI2019(df)
|
||||
mean_height, mean_width = 0, 0
|
||||
weird_files = []
|
||||
bound_left, bound_upper, bound_right, bound_lower = math.inf, math.inf, 0, 0
|
||||
for i, (img, label) in tqdm(enumerate(dataset), total=len(dataset)):
|
||||
left, upper, right, lower = img.getbbox()
|
||||
if left == 0 or upper == 0 or right == 450 or lower == 450:
|
||||
weird_files.append(df.iloc[i]['file'])
|
||||
height = lower - upper
|
||||
width = right - left
|
||||
mean_height = mean_height + (height - mean_height) / (i + 1)
|
||||
mean_width = mean_width + (width - mean_width) / (i + 1)
|
||||
bound_left = min(bound_left, left)
|
||||
bound_upper = min(bound_upper, upper)
|
||||
bound_right = max(bound_right, right)
|
||||
bound_lower = max(bound_lower, lower)
|
||||
print(f"mean_height = {mean_height:.2f}")
|
||||
print(f"mean_width = {mean_width:.2f}")
|
||||
print(f"bound_left = {bound_left:d}")
|
||||
print(f"bound_upper = {bound_upper:d}")
|
||||
print(f"bound_right = {bound_right:d}")
|
||||
print(f"bound_lower = {bound_lower:d}")
|
||||
print("Files that max out at least one border:")
|
||||
for f in weird_files:
|
||||
print(f)
|
246
main_manual.py
Normal file
246
main_manual.py
Normal file
@ -0,0 +1,246 @@
|
||||
import argparse
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score
|
||||
from tensorboardX import SummaryWriter
|
||||
from torch.optim.lr_scheduler import StepLR, LambdaLR
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm, trange
|
||||
|
||||
from dataset import get_dataset, get_tf_train_transform, get_tf_vaild_rot_transform
|
||||
from model import get_model
|
||||
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters
|
||||
|
||||
|
||||
def evaluate(model, valid_loader, class_weights, device):
|
||||
model.eval()
|
||||
|
||||
all_labels = []
|
||||
all_preds = []
|
||||
loss_avg = IncrementalAverage()
|
||||
for img, label in tqdm(valid_loader, leave=False):
|
||||
img, label = to_device(device, img, label)
|
||||
bs, nrot, c, h, w = img.size()
|
||||
with torch.no_grad():
|
||||
pred = model(img.view(-1, c, h, w))
|
||||
pred = pred.view(bs, nrot).mean(1)
|
||||
loss = lossfn(pred, label.to(pred.dtype), class_weights)
|
||||
all_labels.append(label.cpu())
|
||||
all_preds.append(pred.cpu())
|
||||
loss_avg.update(loss.item())
|
||||
|
||||
all_labels = torch.cat(all_labels).numpy()
|
||||
all_preds = torch.cat(all_preds).numpy()
|
||||
all_preds_binary = all_preds > 0
|
||||
|
||||
cm = confusion_matrix(all_labels, all_preds_binary)
|
||||
auc = roc_auc_score(all_labels, all_preds)
|
||||
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted')
|
||||
return loss_avg.value, cm, auc, prec, rec, f1
|
||||
|
||||
|
||||
def train(model, opt, train_loader, class_weights, device):
|
||||
model.train()
|
||||
loss_avg = IncrementalAverage()
|
||||
for img, label in tqdm(train_loader, leave=False):
|
||||
img, label = to_device(device, img, label)
|
||||
pred = model(img)
|
||||
pred = pred.view(-1)
|
||||
loss = lossfn(pred, label.to(pred.dtype), class_weights)
|
||||
loss_avg.update(loss.item())
|
||||
|
||||
opt.zero_grad()
|
||||
loss.backward()
|
||||
opt.step()
|
||||
return loss_avg.value
|
||||
|
||||
|
||||
def lossfn(prediction, target, class_weights):
|
||||
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target))
|
||||
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight)
|
||||
|
||||
|
||||
def schedule(epoch):
|
||||
if epoch < 2:
|
||||
ub = 1
|
||||
elif epoch < 4:
|
||||
ub = 0.1
|
||||
else:
|
||||
ub = 0.01
|
||||
return ub
|
||||
|
||||
|
||||
def train_validate(args):
|
||||
model = get_model().to(args.device)
|
||||
print("Model parameters:", count_parameters(model))
|
||||
|
||||
trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot,
|
||||
tf_train=get_tf_train_transform(args.res),
|
||||
tf_valid=get_tf_vaild_rot_transform(args.res))
|
||||
class_weights = class_weights.to(args.device)
|
||||
print(f"Trainset length: {len(trainset)}")
|
||||
print(f"Validset length: {len(validset)}")
|
||||
print(f"class_weights = {class_weights}")
|
||||
|
||||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)
|
||||
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False)
|
||||
|
||||
opt = torch.optim.Adam([
|
||||
{'params': model.paramgroup01(), 'lr': 1e-6},
|
||||
{'params': model.paramgroup234(), 'lr': 1e-4},
|
||||
{'params': model.parameters_classifier(), 'lr': 1e-2},
|
||||
])
|
||||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
|
||||
lambda e: schedule(e),
|
||||
lambda e: schedule(e)])
|
||||
|
||||
summarywriter = SummaryWriter(args.out)
|
||||
recorded_data = defaultdict(list)
|
||||
|
||||
def logged_eval(e):
|
||||
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device)
|
||||
|
||||
# Derive some accuracy metrics from confusion matrix
|
||||
tn, fp, fn, tp = cm.ravel()
|
||||
acc = (tp + tn) / cm.sum()
|
||||
acc_hem = tn / (tn + fp)
|
||||
acc_all = tp / (tp + fn)
|
||||
|
||||
print(f"epoch={e} f1={f1:.4f}")
|
||||
|
||||
summarywriter.add_scalar('loss/train', train_loss, e)
|
||||
summarywriter.add_scalar('loss/valid', valid_loss, e)
|
||||
summarywriter.add_scalar('cm/tn', tn, e)
|
||||
summarywriter.add_scalar('cm/fp', fp, e)
|
||||
summarywriter.add_scalar('cm/fn', fn, e)
|
||||
summarywriter.add_scalar('cm/tp', tp, e)
|
||||
summarywriter.add_scalar('metrics/precision', prec, e)
|
||||
summarywriter.add_scalar('metrics/recall', rec, e)
|
||||
summarywriter.add_scalar('metrics/f1', f1, e)
|
||||
summarywriter.add_scalar('metrics/auc', auc, e)
|
||||
summarywriter.add_scalar('acc/acc', acc, e)
|
||||
summarywriter.add_scalar('acc/hem', acc_hem, e)
|
||||
summarywriter.add_scalar('acc/all', acc_all, e)
|
||||
|
||||
recorded_data['loss_train'].append(train_loss)
|
||||
recorded_data['loss_valid'].append(valid_loss)
|
||||
recorded_data['tn'].append(tn)
|
||||
recorded_data['tn'].append(tn)
|
||||
recorded_data['fp'].append(fp)
|
||||
recorded_data['fn'].append(fn)
|
||||
recorded_data['tp'].append(tp)
|
||||
recorded_data['precision'].append(prec)
|
||||
recorded_data['recall'].append(rec)
|
||||
recorded_data['f1'].append(f1)
|
||||
recorded_data['auc'].append(auc)
|
||||
recorded_data['acc'].append(acc)
|
||||
recorded_data['acc_hem'].append(acc_hem)
|
||||
recorded_data['acc_all'].append(acc_all)
|
||||
np.savez(f'{args.out}/results', **recorded_data)
|
||||
|
||||
return f1
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
train_loss = np.nan
|
||||
best_val_f1 = logged_eval(0)
|
||||
for e in trange(args.epochs, desc='Epoch'):
|
||||
scheduler.step(e)
|
||||
train_loss = train(model, opt, train_loader, class_weights, args.device)
|
||||
val_f1 = logged_eval(e + 1)
|
||||
|
||||
if val_f1 > best_val_f1:
|
||||
print(f"New best model at {val_f1:.6f}")
|
||||
torch.save(model.state_dict(), f'{args.out}/model.pt')
|
||||
best_val_f1 = val_f1
|
||||
|
||||
summarywriter.close()
|
||||
|
||||
subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device)
|
||||
np.savez(f'{args.out}/subj_acc', **subj_acc)
|
||||
|
||||
|
||||
def evaluate_subj_acc(model, dataset, subjects, device):
|
||||
model.eval()
|
||||
|
||||
subj_pred = defaultdict(list)
|
||||
subj_label = defaultdict(list)
|
||||
|
||||
dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False)
|
||||
|
||||
for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False):
|
||||
img, cls = to_device(device, img, cls)
|
||||
bs, nrot, c, h, w = img.size()
|
||||
with torch.no_grad():
|
||||
cls_hat = model(img.view(-1, c, h, w))
|
||||
cls_hat = cls_hat.view(bs, nrot).mean(1)
|
||||
subj_label[subj].append(cls.cpu())
|
||||
subj_pred[subj].append(cls_hat.cpu())
|
||||
|
||||
for k in subj_label:
|
||||
subj_label[k] = torch.cat(subj_label[k]).numpy()
|
||||
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0
|
||||
|
||||
subj_acc = {}
|
||||
for k in subj_label:
|
||||
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k])
|
||||
|
||||
return subj_acc
|
||||
|
||||
|
||||
def train_test(args):
|
||||
model = get_model().to(args.device)
|
||||
print("Model parameters:", count_parameters(model))
|
||||
|
||||
trainset, class_weights = get_dataset(args.dataroot, folds_train=(0, 1, 2, 3),
|
||||
folds_valid=None,
|
||||
tf_train=get_tf_train_transform(args.res),
|
||||
tf_valid=get_tf_vaild_rot_transform(args.res))
|
||||
class_weights = class_weights.to(args.device)
|
||||
print(f"Trainset length: {len(trainset)}")
|
||||
print(f"class_weights = {class_weights}")
|
||||
|
||||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)
|
||||
|
||||
opt = torch.optim.Adam([
|
||||
{'params': model.paramgroup01(), 'lr': 1e-6},
|
||||
{'params': model.paramgroup234(), 'lr': 1e-4},
|
||||
{'params': model.parameters_classifier(), 'lr': 1e-2},
|
||||
])
|
||||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
|
||||
lambda e: schedule(e),
|
||||
lambda e: schedule(e)])
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
for e in trange(args.epochs, desc='Epoch'):
|
||||
scheduler.step(e)
|
||||
train(model, opt, train_loader, class_weights, args.device)
|
||||
torch.save(model.state_dict(), f'{args.out}/model.pt')
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dataroot', default='data', help='path to dataset')
|
||||
parser.add_argument('--batch-size', type=int, default=16)
|
||||
parser.add_argument('--epochs', type=int, default=6)
|
||||
parser.add_argument('--seed', default=1, type=int, help='random seed')
|
||||
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
|
||||
parser.add_argument('--out', default='results', help='output folder')
|
||||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
|
||||
args = parser.parse_args()
|
||||
args.out = os.path.join(args.out, unique_string())
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
print(args)
|
||||
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
set_seeds(args.seed)
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
train_validate(args)
|
210
main_manual_abl_layerlr.py
Normal file
210
main_manual_abl_layerlr.py
Normal file
@ -0,0 +1,210 @@
|
||||
import argparse
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score
|
||||
from tensorboardX import SummaryWriter
|
||||
from torch.optim.lr_scheduler import StepLR, LambdaLR
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm, trange
|
||||
|
||||
from dataset import get_dataset, get_tf_train_transform, get_tf_vaild_rot_transform
|
||||
from model import get_model
|
||||
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters
|
||||
|
||||
|
||||
def evaluate(model, valid_loader, class_weights, device):
|
||||
model.eval()
|
||||
|
||||
all_labels = []
|
||||
all_preds = []
|
||||
loss_avg = IncrementalAverage()
|
||||
for img, label in tqdm(valid_loader, leave=False):
|
||||
img, label = to_device(device, img, label)
|
||||
bs, nrot, c, h, w = img.size()
|
||||
with torch.no_grad():
|
||||
pred = model(img.view(-1, c, h, w))
|
||||
pred = pred.view(bs, nrot).mean(1)
|
||||
loss = lossfn(pred, label.to(pred.dtype), class_weights)
|
||||
all_labels.append(label.cpu())
|
||||
all_preds.append(pred.cpu())
|
||||
loss_avg.update(loss.item())
|
||||
|
||||
all_labels = torch.cat(all_labels).numpy()
|
||||
all_preds = torch.cat(all_preds).numpy()
|
||||
all_preds_binary = all_preds > 0
|
||||
|
||||
cm = confusion_matrix(all_labels, all_preds_binary)
|
||||
auc = roc_auc_score(all_labels, all_preds)
|
||||
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted')
|
||||
return loss_avg.value, cm, auc, prec, rec, f1
|
||||
|
||||
|
||||
def train(model, opt, train_loader, class_weights, device):
|
||||
model.train()
|
||||
loss_avg = IncrementalAverage()
|
||||
for img, label in tqdm(train_loader, leave=False):
|
||||
img, label = to_device(device, img, label)
|
||||
pred = model(img)
|
||||
pred = pred.view(-1)
|
||||
loss = lossfn(pred, label.to(pred.dtype), class_weights)
|
||||
loss_avg.update(loss.item())
|
||||
|
||||
opt.zero_grad()
|
||||
loss.backward()
|
||||
opt.step()
|
||||
return loss_avg.value
|
||||
|
||||
|
||||
def lossfn(prediction, target, class_weights):
|
||||
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target))
|
||||
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight)
|
||||
|
||||
|
||||
def schedule(epoch):
|
||||
if epoch < 2:
|
||||
ub = 1
|
||||
elif epoch < 4:
|
||||
ub = 0.1
|
||||
else:
|
||||
ub = 0.01
|
||||
return ub
|
||||
|
||||
|
||||
def train_validate(args):
|
||||
model = get_model().to(args.device)
|
||||
print("Model parameters:", count_parameters(model))
|
||||
|
||||
trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot,
|
||||
tf_train=get_tf_train_transform(args.res),
|
||||
tf_valid=get_tf_vaild_rot_transform(args.res))
|
||||
class_weights = class_weights.to(args.device)
|
||||
print(f"Trainset length: {len(trainset)}")
|
||||
print(f"Validset length: {len(validset)}")
|
||||
print(f"class_weights = {class_weights}")
|
||||
|
||||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)
|
||||
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False)
|
||||
|
||||
opt = torch.optim.Adam([
|
||||
{'params': model.paramgroup01(), 'lr': args.lr},
|
||||
{'params': model.paramgroup234(), 'lr': args.lr},
|
||||
{'params': model.parameters_classifier(), 'lr': args.lr},
|
||||
])
|
||||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
|
||||
lambda e: schedule(e),
|
||||
lambda e: schedule(e)])
|
||||
|
||||
summarywriter = SummaryWriter(args.out)
|
||||
recorded_data = defaultdict(list)
|
||||
|
||||
def logged_eval(e):
|
||||
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device)
|
||||
|
||||
# Derive some accuracy metrics from confusion matrix
|
||||
tn, fp, fn, tp = cm.ravel()
|
||||
acc = (tp + tn) / cm.sum()
|
||||
acc_hem = tn / (tn + fp)
|
||||
acc_all = tp / (tp + fn)
|
||||
|
||||
print(f"epoch={e} f1={f1:.4f}")
|
||||
|
||||
summarywriter.add_scalar('loss/train', train_loss, e)
|
||||
summarywriter.add_scalar('loss/valid', valid_loss, e)
|
||||
summarywriter.add_scalar('cm/tn', tn, e)
|
||||
summarywriter.add_scalar('cm/fp', fp, e)
|
||||
summarywriter.add_scalar('cm/fn', fn, e)
|
||||
summarywriter.add_scalar('cm/tp', tp, e)
|
||||
summarywriter.add_scalar('metrics/precision', prec, e)
|
||||
summarywriter.add_scalar('metrics/recall', rec, e)
|
||||
summarywriter.add_scalar('metrics/f1', f1, e)
|
||||
summarywriter.add_scalar('metrics/auc', auc, e)
|
||||
summarywriter.add_scalar('acc/acc', acc, e)
|
||||
summarywriter.add_scalar('acc/hem', acc_hem, e)
|
||||
summarywriter.add_scalar('acc/all', acc_all, e)
|
||||
|
||||
recorded_data['loss_train'].append(train_loss)
|
||||
recorded_data['loss_valid'].append(valid_loss)
|
||||
recorded_data['tn'].append(tn)
|
||||
recorded_data['tn'].append(tn)
|
||||
recorded_data['fp'].append(fp)
|
||||
recorded_data['fn'].append(fn)
|
||||
recorded_data['tp'].append(tp)
|
||||
recorded_data['precision'].append(prec)
|
||||
recorded_data['recall'].append(rec)
|
||||
recorded_data['f1'].append(f1)
|
||||
recorded_data['auc'].append(auc)
|
||||
recorded_data['acc'].append(acc)
|
||||
recorded_data['acc_hem'].append(acc_hem)
|
||||
recorded_data['acc_all'].append(acc_all)
|
||||
np.savez(f'{args.out}/results', **recorded_data)
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
train_loss = np.nan
|
||||
logged_eval(0)
|
||||
for e in trange(args.epochs, desc='Epoch'):
|
||||
scheduler.step(e)
|
||||
train_loss = train(model, opt, train_loader, class_weights, args.device)
|
||||
logged_eval(e + 1)
|
||||
|
||||
summarywriter.close()
|
||||
|
||||
subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device)
|
||||
np.savez(f'{args.out}/subj_acc', **subj_acc)
|
||||
|
||||
|
||||
def evaluate_subj_acc(model, dataset, subjects, device):
|
||||
model.eval()
|
||||
|
||||
subj_pred = defaultdict(list)
|
||||
subj_label = defaultdict(list)
|
||||
|
||||
dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False)
|
||||
|
||||
for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False):
|
||||
img, cls = to_device(device, img, cls)
|
||||
bs, nrot, c, h, w = img.size()
|
||||
with torch.no_grad():
|
||||
cls_hat = model(img.view(-1, c, h, w))
|
||||
cls_hat = cls_hat.view(bs, nrot).mean(1)
|
||||
subj_label[subj].append(cls.cpu())
|
||||
subj_pred[subj].append(cls_hat.cpu())
|
||||
|
||||
for k in subj_label:
|
||||
subj_label[k] = torch.cat(subj_label[k]).numpy()
|
||||
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0
|
||||
|
||||
subj_acc = {}
|
||||
for k in subj_label:
|
||||
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k])
|
||||
|
||||
return subj_acc
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dataroot', default='data', help='path to dataset')
|
||||
parser.add_argument('--lr', type=float, default=1e-4)
|
||||
parser.add_argument('--batch-size', type=int, default=16)
|
||||
parser.add_argument('--epochs', type=int, default=6)
|
||||
parser.add_argument('--seed', default=1, type=int, help='random seed')
|
||||
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
|
||||
parser.add_argument('--out', default='results', help='output folder')
|
||||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
|
||||
args = parser.parse_args()
|
||||
args.out = os.path.join(args.out, unique_string())
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
print(args)
|
||||
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
set_seeds(args.seed)
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
train_validate(args)
|
208
main_manual_abl_testrot.py
Normal file
208
main_manual_abl_testrot.py
Normal file
@ -0,0 +1,208 @@
|
||||
import argparse
|
||||
import os
|
||||
from collections import defaultdict
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from sklearn.metrics import roc_auc_score, confusion_matrix, precision_recall_fscore_support, accuracy_score
|
||||
from tensorboardX import SummaryWriter
|
||||
from torch.optim.lr_scheduler import StepLR, LambdaLR
|
||||
from torch.utils.data import DataLoader
|
||||
from tqdm import tqdm, trange
|
||||
|
||||
from dataset import get_dataset, get_tf_valid_norot_transform, get_tf_train_transform
|
||||
from model import get_model
|
||||
from utils import IncrementalAverage, to_device, set_seeds, unique_string, count_parameters
|
||||
|
||||
|
||||
def evaluate(model, valid_loader, class_weights, device):
|
||||
model.eval()
|
||||
|
||||
all_labels = []
|
||||
all_preds = []
|
||||
loss_avg = IncrementalAverage()
|
||||
for img, label in tqdm(valid_loader, leave=False):
|
||||
img, label = to_device(device, img, label)
|
||||
with torch.no_grad():
|
||||
pred = model(img).view(-1)
|
||||
loss = lossfn(pred, label.to(pred.dtype), class_weights)
|
||||
all_labels.append(label.cpu())
|
||||
all_preds.append(pred.cpu())
|
||||
loss_avg.update(loss.item())
|
||||
|
||||
all_labels = torch.cat(all_labels).numpy()
|
||||
all_preds = torch.cat(all_preds).numpy()
|
||||
all_preds_binary = all_preds > 0
|
||||
|
||||
cm = confusion_matrix(all_labels, all_preds_binary)
|
||||
auc = roc_auc_score(all_labels, all_preds)
|
||||
prec, rec, f1, _ = precision_recall_fscore_support(all_labels, all_preds_binary, average='weighted')
|
||||
return loss_avg.value, cm, auc, prec, rec, f1
|
||||
|
||||
|
||||
def train(model, opt, train_loader, class_weights, device):
|
||||
model.train()
|
||||
loss_avg = IncrementalAverage()
|
||||
for img, label in tqdm(train_loader, leave=False):
|
||||
img, label = to_device(device, img, label)
|
||||
pred = model(img)
|
||||
pred = pred.view(-1)
|
||||
loss = lossfn(pred, label.to(pred.dtype), class_weights)
|
||||
loss_avg.update(loss.item())
|
||||
|
||||
opt.zero_grad()
|
||||
loss.backward()
|
||||
opt.step()
|
||||
return loss_avg.value
|
||||
|
||||
|
||||
def lossfn(prediction, target, class_weights):
|
||||
pos_weight = (class_weights[0] / class_weights[1]).expand(len(target))
|
||||
return F.binary_cross_entropy_with_logits(prediction, target, pos_weight=pos_weight)
|
||||
|
||||
|
||||
def schedule(epoch):
|
||||
if epoch < 2:
|
||||
ub = 1
|
||||
elif epoch < 4:
|
||||
ub = 0.1
|
||||
else:
|
||||
ub = 0.01
|
||||
return ub
|
||||
|
||||
|
||||
def train_validate(args):
|
||||
model = get_model().to(args.device)
|
||||
print("Model parameters:", count_parameters(model))
|
||||
|
||||
trainset, validset, validset_subjects, class_weights = get_dataset(args.dataroot,
|
||||
tf_valid=get_tf_valid_norot_transform(args.res),
|
||||
tf_train=get_tf_train_transform(args.res))
|
||||
class_weights = class_weights.to(args.device)
|
||||
print(f"Trainset length: {len(trainset)}")
|
||||
print(f"Validset length: {len(validset)}")
|
||||
print(f"class_weights = {class_weights}")
|
||||
|
||||
train_loader = DataLoader(trainset, batch_size=args.batch_size, num_workers=6, shuffle=True, drop_last=True)
|
||||
valid_loader = DataLoader(validset, batch_size=args.batch_size, num_workers=6, shuffle=False)
|
||||
|
||||
opt = torch.optim.Adam([
|
||||
{'params': model.paramgroup01(), 'lr': 1e-6},
|
||||
{'params': model.paramgroup234(), 'lr': 1e-4},
|
||||
{'params': model.parameters_classifier(), 'lr': 1e-2},
|
||||
])
|
||||
scheduler = LambdaLR(opt, lr_lambda=[lambda e: schedule(e),
|
||||
lambda e: schedule(e),
|
||||
lambda e: schedule(e)])
|
||||
|
||||
summarywriter = SummaryWriter(args.out)
|
||||
recorded_data = defaultdict(list)
|
||||
|
||||
def logged_eval(e):
|
||||
valid_loss, cm, auc, prec, rec, f1 = evaluate(model, valid_loader, class_weights, args.device)
|
||||
|
||||
# Derive some accuracy metrics from confusion matrix
|
||||
tn, fp, fn, tp = cm.ravel()
|
||||
acc = (tp + tn) / cm.sum()
|
||||
acc_hem = tn / (tn + fp)
|
||||
acc_all = tp / (tp + fn)
|
||||
|
||||
print(f"epoch={e} f1={f1:.4f}")
|
||||
|
||||
summarywriter.add_scalar('loss/train', train_loss, e)
|
||||
summarywriter.add_scalar('loss/valid', valid_loss, e)
|
||||
summarywriter.add_scalar('cm/tn', tn, e)
|
||||
summarywriter.add_scalar('cm/fp', fp, e)
|
||||
summarywriter.add_scalar('cm/fn', fn, e)
|
||||
summarywriter.add_scalar('cm/tp', tp, e)
|
||||
summarywriter.add_scalar('metrics/precision', prec, e)
|
||||
summarywriter.add_scalar('metrics/recall', rec, e)
|
||||
summarywriter.add_scalar('metrics/f1', f1, e)
|
||||
summarywriter.add_scalar('metrics/auc', auc, e)
|
||||
summarywriter.add_scalar('acc/acc', acc, e)
|
||||
summarywriter.add_scalar('acc/hem', acc_hem, e)
|
||||
summarywriter.add_scalar('acc/all', acc_all, e)
|
||||
|
||||
recorded_data['loss_train'].append(train_loss)
|
||||
recorded_data['loss_valid'].append(valid_loss)
|
||||
recorded_data['tn'].append(tn)
|
||||
recorded_data['tn'].append(tn)
|
||||
recorded_data['fp'].append(fp)
|
||||
recorded_data['fn'].append(fn)
|
||||
recorded_data['tp'].append(tp)
|
||||
recorded_data['precision'].append(prec)
|
||||
recorded_data['recall'].append(rec)
|
||||
recorded_data['f1'].append(f1)
|
||||
recorded_data['auc'].append(auc)
|
||||
recorded_data['acc'].append(acc)
|
||||
recorded_data['acc_hem'].append(acc_hem)
|
||||
recorded_data['acc_all'].append(acc_all)
|
||||
np.savez(f'{args.out}/results', **recorded_data)
|
||||
|
||||
model = torch.nn.DataParallel(model)
|
||||
train_loss = np.nan
|
||||
logged_eval(0)
|
||||
for e in trange(args.epochs, desc='Epoch'):
|
||||
scheduler.step(e)
|
||||
train_loss = train(model, opt, train_loader, class_weights, args.device)
|
||||
logged_eval(e + 1)
|
||||
|
||||
torch.save(model.state_dict(), f'{args.out}/model.pt')
|
||||
summarywriter.close()
|
||||
|
||||
subj_acc = evaluate_subj_acc(model, validset, validset_subjects, args.device)
|
||||
np.savez(f'{args.out}/subj_acc', **subj_acc)
|
||||
|
||||
|
||||
def evaluate_subj_acc(model, dataset, subjects, device):
|
||||
model.eval()
|
||||
|
||||
subj_pred = defaultdict(list)
|
||||
subj_label = defaultdict(list)
|
||||
|
||||
dataloader = DataLoader(dataset, batch_size=1, num_workers=1, shuffle=False)
|
||||
|
||||
for (img, cls), subj in tqdm(zip(dataloader, subjects), total=len(subjects), leave=False):
|
||||
img, cls = to_device(device, img, cls)
|
||||
bs, nrot, c, h, w = img.size()
|
||||
with torch.no_grad():
|
||||
cls_hat = model(img.view(-1, c, h, w))
|
||||
cls_hat = cls_hat.view(bs, nrot).mean(1)
|
||||
subj_label[subj].append(cls.cpu())
|
||||
subj_pred[subj].append(cls_hat.cpu())
|
||||
|
||||
for k in subj_label:
|
||||
subj_label[k] = torch.cat(subj_label[k]).numpy()
|
||||
subj_pred[k] = torch.cat(subj_pred[k]).numpy() > 0
|
||||
|
||||
subj_acc = {}
|
||||
for k in subj_label:
|
||||
subj_acc[k] = accuracy_score(subj_label[k], subj_pred[k])
|
||||
|
||||
return subj_acc
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--dataroot', default='data', help='path to dataset')
|
||||
parser.add_argument('--batch-size', type=int, default=16)
|
||||
parser.add_argument('--epochs', type=int, default=6)
|
||||
parser.add_argument('--seed', default=1, type=int, help='random seed')
|
||||
parser.add_argument('--device', default='cuda' if torch.cuda.is_available() else 'cpu')
|
||||
parser.add_argument('--out', default='results', help='output folder')
|
||||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
|
||||
args = parser.parse_args()
|
||||
args.out = os.path.join(args.out, unique_string())
|
||||
return args
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = parse_args()
|
||||
print(args)
|
||||
|
||||
os.makedirs(args.out, exist_ok=True)
|
||||
set_seeds(args.seed)
|
||||
torch.backends.cudnn.benchmark = True
|
||||
|
||||
train_validate(args)
|
183
model.py
Normal file
183
model.py
Normal file
@ -0,0 +1,183 @@
|
||||
# Code adapted from: https://github.com/Cadene/pretrained-models.pytorch
|
||||
import math
|
||||
from collections import OrderedDict
|
||||
from itertools import chain
|
||||
|
||||
import torch.nn as nn
|
||||
from torch.utils import model_zoo
|
||||
|
||||
from utils import Flatten
|
||||
|
||||
|
||||
class SEModule(nn.Module):
|
||||
def __init__(self, channels, reduction):
|
||||
super(SEModule, self).__init__()
|
||||
self.avg_pool = nn.AdaptiveAvgPool2d(1)
|
||||
self.fc1 = nn.Conv2d(channels, channels // reduction, kernel_size=1, padding=0)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.fc2 = nn.Conv2d(channels // reduction, channels, kernel_size=1, padding=0)
|
||||
self.sigmoid = nn.Sigmoid()
|
||||
|
||||
def forward(self, x):
|
||||
module_input = x
|
||||
x = self.avg_pool(x)
|
||||
x = self.fc1(x)
|
||||
x = self.relu(x)
|
||||
x = self.fc2(x)
|
||||
x = self.sigmoid(x)
|
||||
return module_input * x
|
||||
|
||||
|
||||
class SEResNeXtBottleneck(nn.Module):
|
||||
"""
|
||||
ResNeXt bottleneck type C with a Squeeze-and-Excitation module.
|
||||
"""
|
||||
expansion = 4
|
||||
|
||||
def __init__(self, inplanes, planes, groups, reduction, stride=1, downsample=None, base_width=4):
|
||||
super(SEResNeXtBottleneck, self).__init__()
|
||||
width = math.floor(planes * (base_width / 64)) * groups
|
||||
self.conv1 = nn.Conv2d(inplanes, width, kernel_size=1, bias=False, stride=1)
|
||||
self.bn1 = nn.BatchNorm2d(width)
|
||||
self.conv2 = nn.Conv2d(width, width, kernel_size=3, stride=stride, padding=1, groups=groups, bias=False)
|
||||
self.bn2 = nn.BatchNorm2d(width)
|
||||
self.conv3 = nn.Conv2d(width, planes * 4, kernel_size=1, bias=False)
|
||||
self.bn3 = nn.BatchNorm2d(planes * 4)
|
||||
self.relu = nn.ReLU(inplace=True)
|
||||
self.se_module = SEModule(planes * 4, reduction=reduction)
|
||||
self.downsample = downsample
|
||||
self.stride = stride
|
||||
|
||||
def forward(self, x):
|
||||
residual = x
|
||||
|
||||
out = self.conv1(x)
|
||||
out = self.bn1(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv2(out)
|
||||
out = self.bn2(out)
|
||||
out = self.relu(out)
|
||||
|
||||
out = self.conv3(out)
|
||||
out = self.bn3(out)
|
||||
|
||||
if self.downsample is not None:
|
||||
residual = self.downsample(x)
|
||||
|
||||
out = self.se_module(out) + residual
|
||||
out = self.relu(out)
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class SENet(nn.Module):
|
||||
def __init__(self, block, layers, groups, reduction, inplanes=128,
|
||||
downsample_kernel_size=3, downsample_padding=1):
|
||||
super(SENet, self).__init__()
|
||||
self.inplanes = inplanes
|
||||
|
||||
layer0_modules = [
|
||||
('conv1', nn.Conv2d(3, inplanes, kernel_size=7, stride=2, padding=3, bias=False)),
|
||||
('bn1', nn.BatchNorm2d(inplanes)),
|
||||
('relu1', nn.ReLU(inplace=True)),
|
||||
# To preserve compatibility with Caffe weights `ceil_mode=True`
|
||||
# is used instead of `padding=1`.
|
||||
('pool', nn.MaxPool2d(3, stride=2, ceil_mode=True))
|
||||
]
|
||||
self.layer0 = nn.Sequential(OrderedDict(layer0_modules))
|
||||
self.layer1 = self._make_layer(
|
||||
block,
|
||||
planes=64,
|
||||
blocks=layers[0],
|
||||
groups=groups,
|
||||
reduction=reduction,
|
||||
downsample_kernel_size=1,
|
||||
downsample_padding=0
|
||||
)
|
||||
self.layer2 = self._make_layer(
|
||||
block,
|
||||
planes=128,
|
||||
blocks=layers[1],
|
||||
stride=2,
|
||||
groups=groups,
|
||||
reduction=reduction,
|
||||
downsample_kernel_size=downsample_kernel_size,
|
||||
downsample_padding=downsample_padding
|
||||
)
|
||||
self.layer3 = self._make_layer(
|
||||
block,
|
||||
planes=256,
|
||||
blocks=layers[2],
|
||||
stride=2,
|
||||
groups=groups,
|
||||
reduction=reduction,
|
||||
downsample_kernel_size=downsample_kernel_size,
|
||||
downsample_padding=downsample_padding
|
||||
)
|
||||
self.layer4 = self._make_layer(
|
||||
block,
|
||||
planes=512,
|
||||
blocks=layers[3],
|
||||
stride=2,
|
||||
groups=groups,
|
||||
reduction=reduction,
|
||||
downsample_kernel_size=downsample_kernel_size,
|
||||
downsample_padding=downsample_padding
|
||||
)
|
||||
self.cls = nn.Sequential(
|
||||
nn.AdaptiveAvgPool2d(1),
|
||||
Flatten(),
|
||||
nn.Linear(512 * block.expansion, 1)
|
||||
)
|
||||
|
||||
def _make_layer(self, block, planes, blocks, groups, reduction, stride=1,
|
||||
downsample_kernel_size=1, downsample_padding=0):
|
||||
downsample = None
|
||||
if stride != 1 or self.inplanes != planes * block.expansion:
|
||||
downsample = nn.Sequential(
|
||||
nn.Conv2d(self.inplanes, planes * block.expansion,
|
||||
kernel_size=downsample_kernel_size, stride=stride,
|
||||
padding=downsample_padding, bias=False),
|
||||
nn.BatchNorm2d(planes * block.expansion),
|
||||
)
|
||||
|
||||
layers = [block(self.inplanes, planes, groups, reduction, stride, downsample)]
|
||||
self.inplanes = planes * block.expansion
|
||||
for i in range(1, blocks):
|
||||
layers.append(block(self.inplanes, planes, groups, reduction))
|
||||
|
||||
return nn.Sequential(*layers)
|
||||
|
||||
def paramgroup01(self):
|
||||
return chain(
|
||||
self.layer0.parameters(),
|
||||
self.layer1.parameters(),
|
||||
)
|
||||
|
||||
def paramgroup234(self):
|
||||
return chain(
|
||||
self.layer2.parameters(),
|
||||
self.layer3.parameters(),
|
||||
self.layer4.parameters(),
|
||||
)
|
||||
|
||||
def parameters_classifier(self):
|
||||
return self.cls.parameters()
|
||||
|
||||
def forward(self, x):
|
||||
x = self.layer0(x)
|
||||
x = self.layer1(x)
|
||||
x = self.layer2(x)
|
||||
x = self.layer3(x)
|
||||
x = self.layer4(x)
|
||||
c = self.cls(x)
|
||||
return c
|
||||
|
||||
|
||||
def get_model():
|
||||
model = SENet(SEResNeXtBottleneck, [3, 4, 6, 3], groups=32, reduction=16, inplanes=64,
|
||||
downsample_kernel_size=1, downsample_padding=0)
|
||||
checkpoint = model_zoo.load_url('http://data.lip6.fr/cadene/pretrainedmodels/se_resnext50_32x4d-a260b3a4.pth')
|
||||
model.load_state_dict(checkpoint, strict=False)
|
||||
return model
|
166
plot.py
Normal file
166
plot.py
Normal file
@ -0,0 +1,166 @@
|
||||
from glob import glob
|
||||
|
||||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
from os.path import join
|
||||
|
||||
from scipy.stats import mannwhitneyu
|
||||
|
||||
dataroots = {
|
||||
'PROPOSAL' : 'results',
|
||||
#'model_cnmc_res_128' : 'results/model_cnmc_res_128',
|
||||
#'model_cnmc_res_224' : 'results/model_cnmc_res_224',
|
||||
#'model_cnmc_res_256' : 'results/model_cnmc_res_256',
|
||||
#'model_cnmc_res_450' : 'results/model_cnmc_res_450',
|
||||
#'model_cnmc_res_450_blue_only' : 'results/model_cnmc_res_450_blue_only',
|
||||
#'model_cnmc_res_450_green_only' : 'results/model_cnmc_res_450_green_only',
|
||||
#'model_cnmc_res_450_red_only' : 'results/model_cnmc_res_450_red_only',
|
||||
#'model_cnmc_res_450_no_blue' : 'results/model_cnmc_res_450_no_blue',
|
||||
#'model_cnmc_res_450_no_green' : 'results/model_cnmc_res_450_no_green',
|
||||
#'model_cnmc_res_450_no_red' : 'results/model_cnmc_res_450_no_red',
|
||||
#'model_cnmc_res_450_grayscale' : 'results/model_cnmc_res_450_grayscale',
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
def get_values(dataroot, key):
|
||||
npzs = list(glob(join(dataroot, '*', 'results.npz')))
|
||||
vals = []
|
||||
for f in npzs:
|
||||
recorded_data = np.load(f)
|
||||
val = recorded_data[key]
|
||||
vals.append(val)
|
||||
vals = np.stack(vals, 0)
|
||||
return vals
|
||||
|
||||
|
||||
def plot_mean_std(dataroot, key, ax, **kwargs):
|
||||
vals = get_values(dataroot, key)
|
||||
mean = np.mean(vals, 0)
|
||||
std = np.std(vals, 0)
|
||||
epochs = np.arange(len(mean))
|
||||
|
||||
# Offset by 1 so that we have nicely zoomed plots
|
||||
mean = mean[1:]
|
||||
std = std[1:]
|
||||
epochs = epochs[1:]
|
||||
|
||||
ax.plot(epochs, mean, **kwargs)
|
||||
ax.fill_between(epochs, mean - std, mean + std, alpha=0.2)
|
||||
|
||||
|
||||
def plot3(key, ax):
|
||||
for k, v in dataroots.items():
|
||||
plot_mean_std(v, key, ax, label=k)
|
||||
|
||||
|
||||
def print_final_min_mean_max(dataroot, key, model_epochs):
|
||||
vals = get_values(dataroot, key) * 100
|
||||
vals = vals[np.arange(len(vals)), model_epochs]
|
||||
min = np.min(vals)
|
||||
mean = np.mean(vals)
|
||||
std = np.std(vals)
|
||||
max = np.max(vals)
|
||||
print(f'{min:.2f}', f'{mean:.2f} ± {std:.2f}', f'{max:.2f}', sep='\t')
|
||||
|
||||
|
||||
def print_final_table(dataroot):
|
||||
best_model_epochs = np.argmax(get_values(dataroot, 'f1'), axis=1)
|
||||
|
||||
print_final_min_mean_max(dataroot, 'acc', best_model_epochs)
|
||||
print_final_min_mean_max(dataroot, 'acc_all', best_model_epochs)
|
||||
print_final_min_mean_max(dataroot, 'acc_hem', best_model_epochs)
|
||||
print_final_min_mean_max(dataroot, 'f1', best_model_epochs)
|
||||
print_final_min_mean_max(dataroot, 'precision', best_model_epochs)
|
||||
print_final_min_mean_max(dataroot, 'recall', best_model_epochs)
|
||||
|
||||
|
||||
def get_best_f1_scores(dataroot):
|
||||
f1_scores = get_values(dataroot, 'f1')
|
||||
best_model_epochs = np.argmax(f1_scores, axis=1)
|
||||
return f1_scores[np.arange(len(f1_scores)), best_model_epochs]
|
||||
|
||||
|
||||
def is_statistically_greater(dataroot1, dataroot2):
|
||||
# Tests if F1-score of dataroot1 is greater than dataroot2
|
||||
a = get_best_f1_scores(dataroot1)
|
||||
b = get_best_f1_scores(dataroot2)
|
||||
u, p = mannwhitneyu(a, b, alternative='greater')
|
||||
return u, p
|
||||
|
||||
|
||||
######
|
||||
|
||||
for k, v in dataroots.items():
|
||||
print(k)
|
||||
print_final_table(v)
|
||||
print()
|
||||
|
||||
|
||||
######
|
||||
|
||||
#print("MWU-Test of PROPOSAL > NOSPECLR")
|
||||
#print(is_statistically_greater(dataroots['PROPOSAL'], dataroots['NOSPECLR']))
|
||||
#print()
|
||||
#print("MWU-Test of PROPOSAL > NOROT")
|
||||
#print(is_statistically_greater(dataroots['PROPOSAL'], dataroots['NOROT']))
|
||||
|
||||
######
|
||||
|
||||
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(9, 5))
|
||||
|
||||
ax[0, 0].set_title('Accuracy')
|
||||
plot3('acc', ax[0, 0])
|
||||
|
||||
ax[0, 1].set_title('Sensitivity')
|
||||
plot3('acc_all', ax[0, 1])
|
||||
|
||||
ax[0, 2].set_title('Specificity')
|
||||
plot3('acc_hem', ax[0, 2])
|
||||
|
||||
ax[1, 0].set_title('F1 score')
|
||||
plot3('f1', ax[1, 0])
|
||||
|
||||
ax[1, 1].set_title('Precision')
|
||||
plot3('precision', ax[1, 1])
|
||||
|
||||
ax[1, 2].set_title('Recall')
|
||||
plot3('recall', ax[1, 2])
|
||||
|
||||
fig.legend(loc='lower center', ncol=3)
|
||||
fig.tight_layout()
|
||||
fig.subplots_adjust(bottom=0.12)
|
||||
fig.savefig('results/plot_ablations.pdf')
|
||||
|
||||
######
|
||||
npload= 'results/model_cnmc_res_128'
|
||||
npload_sub=npload + '/subj_acc.npz'
|
||||
npload_res=npload + '/results.npz'
|
||||
subj_acc = np.load(npload_sub)
|
||||
subj = list(sorted(subj_acc.keys()))
|
||||
acc = [subj_acc[k] for k in subj]
|
||||
fig, ax = plt.subplots(figsize=(9, 2))
|
||||
ax.bar(range(len(acc)), acc, width=0.3, tick_label=subj)
|
||||
fig.tight_layout()
|
||||
fig.savefig('results/plot_subj_acc.pdf')
|
||||
|
||||
######
|
||||
|
||||
data = np.load(npload_res)
|
||||
loss_train = data['loss_train']
|
||||
loss_valid = data['loss_valid'][1:]
|
||||
f1_valid = data['f1'][1:]
|
||||
fig, ax = plt.subplots(ncols=3, figsize=(9, 2))
|
||||
ax[0].plot(range(len(loss_train)), loss_train)
|
||||
ax[0].set_title("Training set loss")
|
||||
ax[1].plot(range(1, len(loss_valid) + 1), loss_valid)
|
||||
ax[1].set_title("Preliminary test set loss")
|
||||
ax[2].plot(range(1, len(f1_valid) + 1), f1_valid)
|
||||
ax[2].set_title("Preliminary test set F1-score")
|
||||
fig.tight_layout()
|
||||
fig.savefig('results/plot_curves.pdf')
|
||||
|
||||
######
|
||||
|
||||
plt.show()
|
679
run.ipynb
Normal file
679
run.ipynb
Normal file
@ -0,0 +1,679 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "cd8aaf96",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install pandas tqdm"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "26bd5e25",
|
||||
"metadata": {
|
||||
"scrolled": true
|
||||
},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 55,
|
||||
"id": "b753e6b8",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:41, 1.43it/s] \n",
|
||||
"Positive: 1234\n",
|
||||
"Negative: 633\n",
|
||||
"AUC: 0.8797024225483345\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/20220216T154306Z.AZHL\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3246460b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 32\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8a953a39",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 128\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 128"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "12c15b33",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 224\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 224"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "08ba15b4",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 256\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 256"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3cf25ec3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "73b9d9d3",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC_Grayscale\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_grayscale\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ce16353c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC_no_red\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_red\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "959ab837",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC_no_green\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_green\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "879beb46",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC_no_blue\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_blue\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "6d545dce",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC_red_only\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_red_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "25480226",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC_green_only\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_green_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "a064d169",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC_blue_only\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_blue_only\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8d53828a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"# TRAIN\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 main_manual.py --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC\" --batch-size 32 --epochs 100 --seed 30042022 --device cuda --out results --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 183,
|
||||
"id": "ea9c2f23",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"PROPOSAL\n",
|
||||
"68.51\t83.57 ± 5.16\t89.61\n",
|
||||
"84.33\t89.06 ± 2.09\t92.95\n",
|
||||
"38.73\t73.26 ± 11.77\t84.72\n",
|
||||
"66.76\t83.35 ± 5.61\t89.57\n",
|
||||
"66.81\t83.36 ± 5.60\t89.55\n",
|
||||
"68.51\t83.57 ± 5.16\t89.61\n",
|
||||
"\n",
|
||||
"Figure(900x500)\n",
|
||||
"Figure(900x200)\n",
|
||||
"Figure(900x200)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# PLOT\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 plot.py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "8c92073d",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# EVALUATION"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 135,
|
||||
"id": "b25a4267",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:05, 11.69it/s] \n",
|
||||
"Positive: 1425\n",
|
||||
"Negative: 442\n",
|
||||
"AUC: 0.6153299354864846\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION \n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 32\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_32\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 32"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 136,
|
||||
"id": "b14e3e67",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:09, 6.24it/s] \n",
|
||||
"Positive: 1315\n",
|
||||
"Negative: 552\n",
|
||||
"AUC: 0.7711131113339208\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 128\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_128\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 128"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 137,
|
||||
"id": "dfb25744",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:14, 4.19it/s] \n",
|
||||
"Positive: 1262\n",
|
||||
"Negative: 605\n",
|
||||
"AUC: 0.8143717274835677\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 224\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_224\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 224"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 138,
|
||||
"id": "68600db4",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:41, 1.44it/s] \n",
|
||||
"Positive: 1195\n",
|
||||
"Negative: 672\n",
|
||||
"AUC: 0.8400701597139936\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 256\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_256\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 256"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 139,
|
||||
"id": "71a5547e",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:41, 1.42it/s] \n",
|
||||
"Positive: 1241\n",
|
||||
"Negative: 626\n",
|
||||
"AUC: 0.8813918512441892\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 154,
|
||||
"id": "58450362",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:41, 1.42it/s] \n",
|
||||
"Positive: 1261\n",
|
||||
"Negative: 606\n",
|
||||
"AUC: 0.8045073375262055\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC_Grayscale\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_grayscale\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_grayscale/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 155,
|
||||
"id": "48c40f18",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:44, 1.33it/s] \n",
|
||||
"Positive: 1178\n",
|
||||
"Negative: 689\n",
|
||||
"AUC: 0.8661869929814967\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC_no_red\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_red\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_red/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 156,
|
||||
"id": "b6ad9232",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:52, 1.12it/s] \n",
|
||||
"Positive: 1266\n",
|
||||
"Negative: 601\n",
|
||||
"AUC: 0.8018310900454735\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC_no_green\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_green\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_green/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 157,
|
||||
"id": "1ba76d51",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:48, 1.23it/s] \n",
|
||||
"Positive: 1248\n",
|
||||
"Negative: 619\n",
|
||||
"AUC: 0.8570821813062721\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC_no_blue\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_no_blue\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_no_blue/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 158,
|
||||
"id": "05cfaf9c",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:52, 1.12it/s] \n",
|
||||
"Positive: 1239\n",
|
||||
"Negative: 628\n",
|
||||
"AUC: 0.8013924335875389\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC_red_only\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_red_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_red_only/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 159,
|
||||
"id": "1ad09456",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:52, 1.13it/s] \n",
|
||||
"Positive: 1221\n",
|
||||
"Negative: 646\n",
|
||||
"AUC: 0.8590070792695896\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC_green_only\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_green_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_green_only/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 160,
|
||||
"id": "41e8d3a0",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [00:52, 1.12it/s] \n",
|
||||
"Positive: 1255\n",
|
||||
"Negative: 612\n",
|
||||
"AUC: 0.8268636253152251\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC_blue_only\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_blue_only\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC_blue_only/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 186,
|
||||
"id": "88bc18db",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Loading model\n",
|
||||
"Classifying\n",
|
||||
"59it [01:24, 1.43s/it] \n",
|
||||
"Positive: 1235\n",
|
||||
"Negative: 632\n",
|
||||
"AUC: 0.8588406050294211\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# EVALUATION\n",
|
||||
"# dataset : CNMC-blackborder\n",
|
||||
"# res : 450\n",
|
||||
"# epochs : 100\n",
|
||||
"!python3 submission.py --modelroot \"/home/feoktistovar67431/isbi2019cancer-master/results/model_cnmc_res_450_w_blackborder\" --dataroot \"/home/feoktistovar67431/data/isbi2019/CNMC/phase2\" --batch-size 32 --res 450"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "ec31125a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.9"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
92
submission.py
Normal file
92
submission.py
Normal file
@ -0,0 +1,92 @@
|
||||
import argparse
|
||||
import os
|
||||
import zipfile
|
||||
from os.path import join
|
||||
|
||||
import torch
|
||||
from PIL import Image
|
||||
from torch.utils.data import DataLoader, Dataset
|
||||
from tqdm import tqdm
|
||||
import numpy as np
|
||||
|
||||
from model import get_model
|
||||
from dataset import get_tf_vaild_rot_transform
|
||||
|
||||
from sklearn import metrics
|
||||
import matplotlib.pyplot as plt
|
||||
import csv
|
||||
from sklearn.metrics import roc_curve, roc_auc_score
|
||||
|
||||
class OrderedImages(Dataset):
|
||||
def __init__(self, root, transform):
|
||||
super().__init__()
|
||||
self.root = root
|
||||
self.transform = transform
|
||||
|
||||
def __len__(self):
|
||||
return 1867
|
||||
|
||||
def __getitem__(self, index):
|
||||
img = Image.open(os.path.join(self.root, f'{index + 1}.bmp'))#.convert('RGB')
|
||||
return self.transform(img)
|
||||
|
||||
VALIDATION_ALL = 1219
|
||||
VALIDATION_HEM = 648
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--batch-size', type=int, default=64)
|
||||
parser.add_argument('--modelroot', default='results/20190313T101236Z.LGJL', help='path to model')
|
||||
parser.add_argument('--dataroot', default='data/phase3', help='path to dataset')
|
||||
parser.add_argument('--res', type=int, default='450', help='Desired input resolution')
|
||||
args = parser.parse_args()
|
||||
|
||||
dataset = OrderedImages(args.dataroot, get_tf_vaild_rot_transform(args.res))
|
||||
|
||||
print(f"Loading model")
|
||||
model = get_model().to('cuda:0')
|
||||
model = torch.nn.DataParallel(model)
|
||||
model.load_state_dict(torch.load(join(args.modelroot, 'model.pt')))
|
||||
model.eval()
|
||||
|
||||
dataloader = DataLoader(dataset, batch_size=args.batch_size, num_workers=6)
|
||||
|
||||
print("Classifying")
|
||||
all_labels = []
|
||||
for x in tqdm(dataloader, total=len(dataset) // args.batch_size):
|
||||
x = x.to('cuda:0')
|
||||
bs, nrot, c, h, w = x.size()
|
||||
with torch.no_grad():
|
||||
y = model(x.view(-1, c, h, w))
|
||||
y = y.view(bs, nrot).mean(1)
|
||||
labels = y > 0
|
||||
all_labels.append(labels)
|
||||
|
||||
all_labels = torch.cat(all_labels)
|
||||
print("Positive:", all_labels.sum().item())
|
||||
print("Negative:", len(all_labels) - all_labels.sum().item())
|
||||
|
||||
file_w = open(r'/home/feoktistovar67431/data/resources/phase2_labels.csv', "r")
|
||||
true_labels = []
|
||||
reader = csv.reader(file_w, delimiter=',')
|
||||
for row in reader:
|
||||
true_labels.append(row)
|
||||
|
||||
print(f'AUC: {roc_auc_score(true_labels, all_labels.cpu())}') # Zeige Flaeche unter der Kurve an
|
||||
|
||||
|
||||
#print("Accuracy", metrics.accuracy_score(y_test, y_pred))
|
||||
#import matplotlib.pyplot as plt
|
||||
#import numpy as np
|
||||
#x = # false_positive_rate
|
||||
#y = # true_positive_rate
|
||||
# This is the ROC curve
|
||||
#plt.plot(x,y)
|
||||
#plt.show()
|
||||
# This is the AUC
|
||||
#auc = np.trapz(y,x)
|
||||
|
||||
csv_path = join(args.modelroot, 'submission.csv')
|
||||
zip_path = join(args.modelroot, 'submission.zip')
|
||||
np.savetxt(csv_path, all_labels.cpu().numpy(), '%d')
|
||||
with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zipf:
|
||||
zipf.write(csv_path, 'isbi_valid.predict')
|
58
utils.py
Normal file
58
utils.py
Normal file
@ -0,0 +1,58 @@
|
||||
import pickle
|
||||
import random
|
||||
import string
|
||||
from datetime import datetime
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
|
||||
class IncrementalAverage:
|
||||
def __init__(self):
|
||||
self.value = 0
|
||||
self.counter = 0
|
||||
|
||||
def update(self, x):
|
||||
self.counter += 1
|
||||
self.value += (x - self.value) / self.counter
|
||||
|
||||
|
||||
class Flatten(nn.Module):
|
||||
def forward(self, x):
|
||||
return x.view(x.size(0), -1)
|
||||
|
||||
|
||||
class SizePrinter(nn.Module):
|
||||
def forward(self, x):
|
||||
print(x.size())
|
||||
return x
|
||||
|
||||
|
||||
def count_parameters(model, grad_only=True):
|
||||
return sum(p.numel() for p in model.parameters() if not grad_only or p.requires_grad)
|
||||
|
||||
|
||||
def to_device(device, *tensors):
|
||||
return tuple(x.to(device) for x in tensors)
|
||||
|
||||
|
||||
def loop_iter(iter):
|
||||
while True:
|
||||
for item in iter:
|
||||
yield item
|
||||
|
||||
|
||||
def unique_string():
|
||||
return '{}.{}'.format(datetime.now().strftime('%Y%m%dT%H%M%SZ'),
|
||||
''.join(random.choice(string.ascii_uppercase) for _ in range(4)))
|
||||
|
||||
|
||||
def set_seeds(seed):
|
||||
random.seed(seed)
|
||||
torch.manual_seed(seed)
|
||||
torch.cuda.manual_seed_all(seed)
|
||||
|
||||
|
||||
def pickle_dump(obj, file):
|
||||
with open(file, 'wb') as f:
|
||||
pickle.dump(obj, f)
|
Loading…
x
Reference in New Issue
Block a user