Skip to content
Snippets Groups Projects
Commit 69e9d6cc authored by hannandarryl's avatar hannandarryl
Browse files

eval fixes for mlp models

parent 5a7d27b8
No related branches found
No related tags found
No related merge requests found
......@@ -61,24 +61,63 @@ class ONSDClassifier(keras.layers.Layer):
return class_pred, width_pred
class ONSDConv(keras.layers.Layer):
def __init__(self, do_regression):
super(ONSDConv, self).__init__()
# self.ff_dropout = keras.layers.Dropout(0.1)
self.conv_1 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
# self.max_pool = keras.layers.MaxPooling2D(
self.conv_2 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
self.flatten = keras.layers.Flatten()
# self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
# self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(10, activation='relu', use_bias=True)
# self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
if do_regression:
self.ff_final_1 = keras.layers.Dense(1)
else:
self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
self.do_dropout = True
# @tf.function
def call(self, activations):
print(activations.shape)
raise Exception
x = self.conv_1(activations)
x = self.flatten(x)
x = self.ff_2(x)
# x = self.ff_dropout(x, self.do_dropout)
# x = self.ff_3(x)
class_pred = self.ff_final_1(x)
return class_pred
class ONSDMLP(keras.layers.Layer):
def __init__(self):
def __init__(self, do_regression):
super(ONSDMLP, self).__init__()
self.ff_dropout = keras.layers.Dropout(0.1)
# self.ff_dropout = keras.layers.Dropout(0.1)
# self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
# self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(16, activation='relu', use_bias=True)
self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(8, activation='relu', use_bias=True)
# self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
if do_regression:
self.ff_final_1 = keras.layers.Dense(1)
else:
self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
self.do_dropout = True
# @tf.function
def call(self, activations):
x = self.ff_2(activations)
x = self.ff_dropout(x, self.do_dropout)
x = self.ff_3(x)
# x = self.ff_dropout(x, self.do_dropout)
# x = self.ff_3(x)
class_pred = self.ff_final_1(x)
return class_pred
......
......@@ -9,13 +9,12 @@ from typing import Sequence, Iterator
import csv
from sklearn.model_selection import train_test_split, GroupShuffleSplit, LeaveOneGroupOut, LeaveOneOut, StratifiedGroupKFold, StratifiedKFold, KFold, ShuffleSplit
def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=None, n_splits=None):
def load_onsd_videos(batch_size, crop_size, yolo_model=None, mode=None, n_splits=None, do_regression=False):
video_path = "/shared_data/bamc_onsd_data/revised_extended_onsd_data"
transforms = torchvision.transforms.Compose(
[torchvision.transforms.Grayscale(1),
MinMaxScaler(0, 255),
torchvision.transforms.Resize(input_size[:2])
MinMaxScaler(0, 255)
])
# augment_transforms = torchvision.transforms.Compose(
# [torchvision.transforms.RandomRotation(45),
......@@ -23,6 +22,9 @@ def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=No
# torchvision.transforms.RandomAdjustSharpness(0.05)
# ])
if do_regression:
dataset = ONSDGoodFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
else:
dataset = ONSDAllFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
targets = dataset.get_labels()
......
......@@ -10,7 +10,7 @@ import os
from sparse_coding_torch.onsd.load_data import load_onsd_videos
from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights
from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d
from sparse_coding_torch.onsd.classifier_model import ONSDClassifier
from sparse_coding_torch.onsd.classifier_model import ONSDMLP
from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd
import time
import numpy as np
......@@ -30,6 +30,8 @@ from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.preprocessing import normalize
from scikeras.wrappers import KerasClassifier, KerasRegressor
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
if __name__ == "__main__":
......@@ -54,6 +56,8 @@ if __name__ == "__main__":
parser.add_argument('--scale_factor', type=int, default=2)
parser.add_argument('--clip_depth', type=int, default=1)
parser.add_argument('--frames_to_skip', type=int, default=1)
parser.add_argument('--flatten', action='store_true')
parser.add_argument('--regression', action='store_true')
args = parser.parse_args()
......@@ -91,8 +95,7 @@ if __name__ == "__main__":
sparse_model = keras.Model(inputs=(inputs, filter_inputs), outputs=output)
recon_model = keras.models.load_model(args.sparse_checkpoint)
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits)
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits, do_regression=args.regression)
positive_class = 'Positives'
# difficult_vids = split_difficult_vids(dataset.get_difficult_vids(), args.n_splits)
......@@ -134,49 +137,91 @@ if __name__ == "__main__":
# clf = LogisticRegression(max_iter=1000)
# clf = RidgeClassifier(alpha=3.0)
clf = MLPClassifier(hidden_layer_sizes=(16,))
# clf = MLPClassifier(hidden_layer_sizes=(16,))
if args.flatten:
classifier_inputs = keras.Input(shape=(args.num_kernels * ((image_height - args.kernel_height) // args.stride + 1)))
else:
classifier_inputs = keras.Input(shape=(args.num_kernels))
classifier_outputs = ONSDMLP()(classifier_inputs)
classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs)
if args.regression:
clf = KerasRegressor(classifier_model, loss='mean_squared_error', optimizer='adam', epochs=200, verbose=False)
else:
clf = KerasClassifier(classifier_model, loss='binary_crossentropy', optimizer='adam', epochs=200, verbose=False)
train_filter_activations = [[] for _ in range(args.num_kernels)]
# train_filter_activations = [[] for _ in range(args.num_kernels)]
train_filter_activations = []
for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
for b_idx in range(activations.shape[0]):
acts = np.squeeze(activations[b_idx])
activations = tf.squeeze(activations, axis=1)
activations = tf.squeeze(activations, axis=2)
if args.flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
else:
activations = tf.math.reduce_sum(activations, axis=1)
for b_idx, act in enumerate(activations):
if args.regression:
train_filter_activations.append((act, width[b_idx]))
else:
train_filter_activations.append((act, labels[b_idx]))
for i in range(args.num_kernels):
acts_for_filter = acts[:, i]
# for b_idx in range(activations.shape[0]):
# acts = np.squeeze(activations[b_idx])
act_sum = np.sum(acts_for_filter)
# for i in range(args.num_kernels):
# acts_for_filter = acts[:, i]
train_filter_activations[i].append((act_sum, float(labels[b_idx])))
# act_sum = np.sum(acts_for_filter)
test_filter_activations = [[] for _ in range(args.num_kernels)]
# train_filter_activations[i].append((act_sum, float(labels[b_idx])))
# test_filter_activations = [[] for _ in range(args.num_kernels)]
test_filter_activations = []
for images, labels, width in tqdm(test_tf.batch(args.batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
for b_idx in range(activations.shape[0]):
acts = np.squeeze(activations[b_idx])
activations = tf.squeeze(activations, axis=1)
activations = tf.squeeze(activations, axis=2)
if args.flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
else:
activations = tf.math.reduce_sum(activations, axis=1)
for b_idx, act in enumerate(activations):
if args.regression:
test_filter_activations.append((act, width[b_idx]))
else:
test_filter_activations.append((act, labels[b_idx]))
# for b_idx in range(activations.shape[0]):
# acts = np.squeeze(activations[b_idx])
for i in range(args.num_kernels):
acts_for_filter = acts[:, i]
# for i in range(args.num_kernels):
# acts_for_filter = acts[:, i]
act_sum = np.sum(acts_for_filter)
# act_sum = np.sum(acts_for_filter)
test_filter_activations[i].append((act_sum, float(labels[b_idx])))
# test_filter_activations[i].append((act_sum, float(labels[b_idx])))
train_X = []
train_y = []
for i in range(len(train_filter_activations[0])):
x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)])
label = train_filter_activations[0][i][1]
# for i in range(len(train_filter_activations[0])):
# x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)])
# label = train_filter_activations[0][i][1]
# train_X.append(x)
# train_y.append(label)
for x, label in train_filter_activations:
train_X.append(x)
train_y.append(label)
......@@ -186,10 +231,14 @@ if __name__ == "__main__":
test_X = []
test_y = []
for i in range(len(test_filter_activations[0])):
x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)])
label = test_filter_activations[0][i][1]
# for i in range(len(test_filter_activations[0])):
# x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)])
# label = test_filter_activations[0][i][1]
# test_X.append(x)
# test_y.append(label)
for x, label in test_filter_activations:
test_X.append(x)
test_y.append(label)
......@@ -217,6 +266,19 @@ if __name__ == "__main__":
test_gt_all = np.concatenate([test_gt_all, test_y])
if args.splits == 'leave_one_out':
if args.regression:
video_gt = np.average(test_y)
if video_gt >= 100 / dataset.max_width:
video_gt = np.array([1])
else:
video_gt = np.array([0])
video_pred = np.array([np.average(test_pred)])
if video_pred >= 100 / dataset.max_width:
video_pred = np.array([1])
else:
video_pred = np.array([0])
else:
video_gt = np.array([test_y[0]])
video_pred = np.array([np.round(np.average(test_pred))])
......@@ -238,6 +300,10 @@ if __name__ == "__main__":
frame_pred_all = np.concatenate([frame_pred_all, frame_pred])
frame_gt_all = np.concatenate([frame_gt_all, frame_gt])
if args.regression:
train_acc = metrics.mean_absolute_error(train_pred, train_y)
test_acc = metrics.mean_absolute_error(test_pred, test_y)
else:
train_acc = metrics.accuracy_score(train_pred, train_y)
test_acc = metrics.accuracy_score(test_pred, test_y)
......@@ -245,11 +311,18 @@ if __name__ == "__main__":
print('Final Predictions!')
if args.regression:
train_accuracy = metrics.mean_absolute_error(train_pred_all, train_gt_all)
test_accuracy = metrics.mean_absolute_error(test_pred_all, test_gt_all)
frame_accuracy = metrics.mean_absolute_error(frame_pred_all, frame_gt_all)
else:
train_accuracy = metrics.accuracy_score(train_pred_all, train_gt_all)
test_accuracy = metrics.accuracy_score(test_pred_all, test_gt_all)
frame_accuracy = metrics.accuracy_score(frame_pred_all, frame_gt_all)
if args.splits == 'leave_one_out':
print(video_pred_all)
print(video_gt_all)
video_accuracy = metrics.accuracy_score(video_pred_all, video_gt_all)
print('train_acc={:.2f}, test_acc={:.2f}, frame_acc={:.2f}, video_acc={:.2f}'.format(train_accuracy, test_accuracy, frame_accuracy, video_accuracy))
......
......@@ -10,11 +10,11 @@ import os
from sparse_coding_torch.onsd.load_data import load_onsd_videos
from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights
from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d
from sparse_coding_torch.onsd.classifier_model import ONSDMLP
from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd
from sparse_coding_torch.onsd.classifier_model import ONSDMLP, ONSDConv
from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd, get_participants
import time
import numpy as np
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, mean_absolute_error
import random
import pickle
# from sparse_coding_torch.onsd.train_sparse_model import sparse_loss
......@@ -25,10 +25,77 @@ import glob
import cv2
import copy
import matplotlib.pyplot as plt
import itertools
import csv
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height, max_width):
def calculate_onsd_scores_measured(input_videos, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height):
frame_path = 'sparse_coding_torch/onsd/onsd_good_for_eval'
all_preds = []
all_gt = []
fp = []
fn = []
for vid_f in tqdm(input_videos):
split_path = vid_f.split('/')
frame_path = '/'.join(split_path[:-1])
label = split_path[-3]
f = [png_file for png_file in os.listdir(frame_path) if png_file.endswith('.png')][0]
# for f in tqdm(os.listdir(os.path.join(frame_path, label))):
# if not f.endswith('.png'):
# continue
# print(split_path)
# print(frame_path)
# print(label)
# print(f)
# raise Exception
frame = torch.tensor(cv2.imread(os.path.join(frame_path, f))).swapaxes(2, 1).swapaxes(1, 0)
# print(frame.size())
frame = get_yolo_region_onsd(yolo_model, frame, crop_width, crop_height, False)
if not frame:
continue
# print(frame)
frame = frame[0]
# print(frame)
frame = transform(frame).to(torch.float32).unsqueeze(3).unsqueeze(1).numpy()
activations = tf.stop_gradient(sparse_model([frame, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))]))
activations = tf.squeeze(activations, axis=1)
activations = tf.squeeze(activations, axis=2)
activations = tf.math.reduce_sum(activations, axis=1)
pred = classifier_model.predict(activations)
pred = tf.math.round(pred)
final_pred = float(pred)
all_preds.append(final_pred)
if label == 'Positives':
all_gt.append(1.0)
if final_pred == 0.0:
fn.append(f)
elif label == 'Negatives':
all_gt.append(0.0)
if final_pred == 1.0:
fp.append(f)
return np.array(all_preds), np.array(all_gt), fn, fp
def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height, max_width, flatten, do_regression, activations_2d, use_valid, valid_vids):
all_predictions = []
numerical_labels = []
......@@ -42,6 +109,9 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
fp_ids = []
fn_ids = []
for v_idx, f in tqdm(enumerate(input_videos)):
if use_valid and not get_participants([f])[0] in valid_vids:
continue
vc = torchvision.io.read_video(f)[0].permute(3, 0, 1, 2)
all_classes = []
......@@ -51,7 +121,9 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
all_yolo = [get_yolo_region_onsd(yolo_model, frame, crop_width, crop_height, False) for frame in all_frames]
all_yolo = [yolo[0] for yolo in all_yolo if yolo is not None]
all_yolo = list(itertools.chain.from_iterable([y for y in all_yolo if y is not None]))
# all_yolo = [yolo[0] for yolo in all_yolo if yolo is not None]
for i in range(0, len(all_yolo), 32):
batch = torch.stack(all_yolo[i:i+32])
......@@ -62,15 +134,34 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
activations = tf.squeeze(activations, axis=1)
activations = tf.squeeze(activations, axis=2)
if flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
elif activations_2d:
activations = tf.expand_dims(activations, axis=3)
else:
activations = tf.math.reduce_sum(activations, axis=1)
pred = classifier_model(activations)
pred = classifier_model.predict(activations)
pred = tf.math.round(tf.math.sigmoid(pred))
# if not do_regression:
# pred = tf.math.round(pred)
# width_pred = tf.math.round(width_pred * max_width)
all_classes.append(pred)
if do_regression:
final_pred = np.average(np.concatenate(all_classes))
# raise Exception
# print(all_classes)
# print(final_pred)
# print(max_width)
# print(100/max_width)
# raise Exception
if final_pred >= 100:
final_pred = np.array([1])
else:
final_pred = np.array([0])
else:
final_pred = np.round(np.average(np.concatenate(all_classes)))
# print(all_widths)
# average_width = np.average(np.array(all_widths))
......@@ -92,7 +183,7 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=128, type=int)
parser.add_argument('--batch_size', default=200, type=int)
parser.add_argument('--kernel_width', default=150, type=int)
parser.add_argument('--kernel_height', default=10, type=int)
parser.add_argument('--kernel_depth', default=1, type=int)
......@@ -100,8 +191,8 @@ if __name__ == "__main__":
parser.add_argument('--stride', default=1, type=int)
parser.add_argument('--max_activation_iter', default=300, type=int)
parser.add_argument('--activation_lr', default=1e-2, type=float)
parser.add_argument('--lr', default=5e-2, type=float)
parser.add_argument('--epochs', default=15, type=int)
parser.add_argument('--lr', default=0.001, type=float)
parser.add_argument('--epochs', default=200, type=int)
parser.add_argument('--lam', default=0.05, type=float)
parser.add_argument('--output_dir', default='./output', type=str)
parser.add_argument('--sparse_checkpoint', default=None, type=str)
......@@ -118,6 +209,10 @@ if __name__ == "__main__":
parser.add_argument('--scale_factor', type=int, default=2)
parser.add_argument('--clip_depth', type=int, default=1)
parser.add_argument('--frames_to_skip', type=int, default=1)
parser.add_argument('--do_regression', action='store_true')
parser.add_argument('--flatten', action='store_true')
parser.add_argument('--activations_2d', action='store_true')
parser.add_argument('--valid_vids', action='store_true')
args = parser.parse_args()
......@@ -142,6 +237,16 @@ if __name__ == "__main__":
with open(os.path.join(output_dir, 'arguments.txt'), 'w+') as out_f:
out_f.write(str(args))
valid_vids = set()
with open('sparse_coding_torch/onsd/good_frames_onsd.csv', 'r') as valid_in:
reader = csv.DictReader(valid_in)
for row in reader:
vid = row['video'].strip()
good_frames = row['good_frames'].strip()
if good_frames:
valid_vids.add(vid)
yolo_model = YoloModel(args.dataset)
all_errors = []
......@@ -163,13 +268,19 @@ if __name__ == "__main__":
])
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits)
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits, do_regression=args.do_regression)
positive_class = 'Positives'
all_video_labels = [f.split('/')[-3] for f in dataset.get_all_videos()]
print('{} videos with positive labels.'.format(len([lbl for lbl in all_video_labels if lbl == 'Positives'])))
print('{} videos with negative labels.'.format(len([lbl for lbl in all_video_labels if lbl == 'Negatives'])))
# difficult_vids = split_difficult_vids(dataset.get_difficult_vids(), args.n_splits)
print('Processing frames...')
sparse_codes = []
total_acts = 0
total_non_zero = 0
frames = dataset.get_frames()
for i in tqdm(range(0, len(frames), 32)):
frame = tf.stack(frames[i:i+32])
......@@ -177,14 +288,24 @@ if __name__ == "__main__":
activations = tf.stop_gradient(sparse_model([frame, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
total_non_zero += float(tf.math.count_nonzero(activations))
total_acts += float(tf.math.reduce_prod(tf.shape(activations)))
activations = tf.squeeze(activations, axis=1)
activations = tf.squeeze(activations, axis=2)
if args.flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
elif args.activations_2d:
activations = tf.expand_dims(activations, axis=3)
else:
activations = tf.math.reduce_sum(activations, axis=1)
for act in activations:
sparse_codes.append(act)
assert len(sparse_codes) == len(frames)
print('Average sparsity is: {}'.format(total_non_zero / total_acts))
video_true = []
video_pred = []
......@@ -215,16 +336,26 @@ if __name__ == "__main__":
train_sparse_codes = [sc for i, sc in enumerate(sparse_codes) if i in train_idx]
test_sparse_codes = [sc for i, sc in enumerate(sparse_codes) if i in test_idx]
train_tf = tf.data.Dataset.from_tensor_slices((train_sparse_codes, train_loader.get_labels(), train_loader.get_widths()))
test_tf = tf.data.Dataset.from_tensor_slices((test_sparse_codes, test_loader.get_labels(), test_loader.get_widths()))
if args.do_regression:
train_x = tf.stack(train_sparse_codes)
test_x = tf.stack(test_sparse_codes)
train_y = tf.stack(train_loader.get_widths())
test_y = tf.stack(test_loader.get_widths())
else:
train_x = tf.stack(train_sparse_codes)
test_x = tf.stack(test_sparse_codes)
train_y = tf.stack(train_loader.get_labels())
test_y = tf.stack(test_loader.get_labels())
print('{} train videos.'.format(len(train_tf)))
print('{} positive videos.'.format(len(list(train_tf.filter(lambda features, label, width: label==1)))))
print('{} negative videos.'.format(len(list(train_tf.filter(lambda features, label, width: label==0)))))
print('-----------------')
print('{} test videos.'.format(len(test_tf)))
print('{} positive videos.'.format(len(list(test_tf.filter(lambda features, label, width: label==1)))))
print('{} negative videos.'.format(len(list(test_tf.filter(lambda features, label, width: label==0)))))
# print('{} train frames.'.format(len(train_x)))
# print('{} positive frames.'.format(len(list(train_y.filter(lambda features, label, width: label==1)))))
# print('{} negative frames.'.format(len(list(train_y.filter(lambda features, label, width: label==0)))))
# print('-----------------')
# print('{} test frames.'.format(len(test_tf)))
# print('{} positive frames.'.format(len(list(test_tf.filter(lambda features, label, width: label==1)))))
# print('{} negative frames.'.format(len(list(test_tf.filter(lambda features, label, width: label==0)))))
# negative_ds = (
......@@ -241,111 +372,64 @@ if __name__ == "__main__":
if args.checkpoint:
classifier_model = keras.models.load_model(args.checkpoint)
else:
if args.flatten:
classifier_inputs = keras.Input(shape=(args.num_kernels * ((image_height - args.kernel_height) // args.stride + 1)))
elif args.activations_2d:
classifier_inputs = keras.Input(shape=(((image_height - args.kernel_height) // args.stride + 1), args.num_kernels, 1))
else:
classifier_inputs = keras.Input(shape=(args.num_kernels))
classifier_outputs = ONSDMLP()(classifier_inputs)
classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs)
prediction_optimizer = keras.optimizers.Adam(learning_rate=args.lr)
if args.activations_2d:
classifier_outputs = ONSDConv(args.do_regression)(classifier_inputs)
else:
classifier_outputs = ONSDMLP(args.do_regression)(classifier_inputs)
best_so_far = float('inf')
classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs)
class_criterion = keras.losses.BinaryCrossentropy(from_logits=True, reduction=keras.losses.Reduction.SUM)
# width_criterion = keras.losses.MeanSquaredError(reduction=keras.losses.Reduction.SUM)
if not args.do_regression:
criterion = keras.losses.BinaryCrossentropy()
else:
criterion = keras.losses.MeanSquaredError()
train_losses = []
test_losses = []
train_accuracies = []
test_accuracies = []
classifier_model.compile(optimizer=keras.optimizers.Adam(learning_rate=args.lr), loss=criterion)
# train_mse = []
# test_mse = []
if args.train:
for epoch in range(args.epochs):
epoch_loss = 0
t1 = time.perf_counter()
# for images, labels, width in tqdm(balanced_ds.shuffle(len(train_tf)).batch(args.batch_size)):
# for images, labels, width in tqdm(balanced_ds.take(len(train_tf)).shuffle(len(train_tf)).batch(args.batch_size)):
classifier_model.do_dropout = True
for activations, labels, width in train_tf.shuffle(len(train_tf)).batch(args.batch_size):
with tf.GradientTape() as tape:
class_pred = classifier_model(activations)
class_loss = class_criterion(labels, class_pred)
# width_loss = width_criterion(width, width_pred * width_mask)
loss = class_loss
epoch_loss += loss * activations.shape[0]
gradients = tape.gradient(loss, classifier_model.trainable_weights)
prediction_optimizer.apply_gradients(zip(gradients, classifier_model.trainable_weights))
t2 = time.perf_counter()
if epoch_loss < best_so_far:
print("found better model")
# Save model parameters
classifier_model.save(os.path.join(output_dir, "best_classifier_{}.pt".format(i_fold)))
# recon_model.save(os.path.join(output_dir, "best_sparse_model_{}.pt".format(i_fold)))
# pickle.dump(prediction_optimizer.get_weights(), open(os.path.join(output_dir, 'optimizer_{}.pt'.format(i_fold)), 'wb+'))
best_so_far = epoch_loss
classifier_model = keras.models.load_model(os.path.join(output_dir, "best_classifier_{}.pt".format(i_fold)))
y_true_train = None
y_pred_train = None
y_true_test = None
y_pred_test = None
classifier_model.fit(train_x, train_y, batch_size=args.batch_size, epochs=args.epochs, verbose=False)
classifier_model.do_dropout = False
for activations, labels, width in train_tf.batch(args.batch_size):
pred = classifier_model(activations)
if y_true_train is None:
y_true_train = labels
y_pred_train = tf.math.round(tf.math.sigmoid(pred))
else:
y_true_train = tf.concat((y_true_train, labels), axis=0)
y_pred_train = tf.concat((y_pred_train, tf.math.round(tf.math.sigmoid(pred))), axis=0)
for activations, labels, width in test_tf.batch(args.batch_size):
pred = classifier_model(activations)
if y_true_test is None:
y_true_test = labels
y_pred_test = tf.math.round(tf.math.sigmoid(pred))
y_true_train = train_y
if args.do_regression:
y_pred_train = classifier_model.predict(train_x)
else:
y_true_test = tf.concat((y_true_test, labels), axis=0)
y_pred_test = tf.concat((y_pred_test, tf.math.round(tf.math.sigmoid(pred))), axis=0)
t2 = time.perf_counter()
y_true_test = tf.cast(y_true_test, tf.int32)
y_pred_test = tf.cast(y_pred_test, tf.int32)
y_true_train = tf.cast(y_true_train, tf.int32)
y_pred_train = tf.cast(y_pred_train, tf.int32)
y_pred_train = np.round(classifier_model.predict(train_x))
train_frame_true.append(y_true_train)
train_frame_pred.append(y_pred_train)
y_true_test = test_y
if args.do_regression:
y_pred_test = classifier_model.predict(test_x)
else:
y_pred_test = np.round(classifier_model.predict(test_x))
test_frame_true.append(y_true_test)
test_frame_pred.append(y_pred_test)
t2 = time.perf_counter()
if args.do_regression:
f1 = 0.0
accuracy = mean_absolute_error(y_true_test, y_pred_test)
train_accuracy = mean_absolute_error(y_true_train, y_pred_train)
else:
f1 = f1_score(y_true_test, y_pred_test, average='macro')
accuracy = accuracy_score(y_true_test, y_pred_test)
train_accuracy = accuracy_score(y_true_train, y_pred_train)
# test_mae = keras.losses.MeanAbsoluteError()(width_gt, width_p)
test_mae = 0.0
train_accuracies.append(train_accuracy)
test_accuracies.append(accuracy)
# train_accuracies.append(train_accuracy)
# test_accuracies.append(accuracy)
pred_dict = {}
gt_dict = {}
......@@ -362,17 +446,23 @@ if __name__ == "__main__":
test_labels = [vid_f.split('/')[-3] for vid_f in test_videos]
classifier_model.do_dropout = False
y_pred, y_true, fn, fp = calculate_onsd_scores(test_videos, test_labels, yolo_model, classifier_model, sparse_model, recon_model, transform, image_width, image_height, 0)
max_width = 0
if args.do_regression:
max_width = dataset.max_width
y_pred, y_true, fn, fp = calculate_onsd_scores(test_videos, test_labels, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height, max_width, args.flatten, args.do_regression, args.activations_2d, args.valid_vids, valid_vids)
# y_pred, y_true, fn, fp = calculate_onsd_scores_measured(test_videos, yolo_model, classifier_model, sparse_model, recon_model, transform, image_width, image_height)
t2 = time.perf_counter()
print('i_fold={}, time={:.2f}'.format(i_fold, t2-t1))
if np.size(y_pred):
y_true = tf.cast(y_true, tf.int32)
y_pred = tf.cast(y_pred, tf.int32)
f1 = f1_score(y_true, y_pred, average='macro')
accuracy = accuracy_score(y_true, y_pred)
vid_accuracy = accuracy_score(y_true, y_pred)
video_fn.extend(fn)
video_fp.extend(fp)
......@@ -380,7 +470,7 @@ if __name__ == "__main__":
video_true.extend(y_true)
video_pred.extend(y_pred)
print("Test f1={:.2f}, vid_acc={:.2f}".format(f1, accuracy))
print("Test f1={:.2f}, vid acc={:.2f}, train acc={:.2f}, test acc={:.2f}".format(f1, vid_accuracy, train_accuracy, accuracy))
print(confusion_matrix(y_true, y_pred))
......@@ -418,6 +508,10 @@ if __name__ == "__main__":
test_frame_true = np.concatenate(test_frame_true)
test_frame_pred = np.concatenate(test_frame_pred)
if args.do_regression:
train_frame_acc = mean_absolute_error(train_frame_true, train_frame_pred)
test_frame_acc = mean_absolute_error(test_frame_true, test_frame_pred)
else:
train_frame_acc = accuracy_score(train_frame_true, train_frame_pred)
test_frame_acc = accuracy_score(test_frame_true, test_frame_pred)
......
......@@ -16,6 +16,8 @@ from sparse_coding_torch.utils import plot_filters
from yolov4.get_bounding_boxes import YoloModel
import copy
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
def sparse_loss(images, recon, activations, batch_size, lam, stride):
loss = 0.5 * (1/batch_size) * tf.math.reduce_sum(tf.math.pow(images - recon, 2))
loss += lam * tf.reduce_mean(tf.math.reduce_sum(tf.math.abs(tf.reshape(activations, (batch_size, -1))), axis=1))
......@@ -24,24 +26,25 @@ def sparse_loss(images, recon, activations, batch_size, lam, stride):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--kernel_width', default=150, type=int)
parser.add_argument('--kernel_height', default=10, type=int)
parser.add_argument('--kernel_width', default=60, type=int)
parser.add_argument('--kernel_height', default=30, type=int)
parser.add_argument('--kernel_depth', default=1, type=int)
parser.add_argument('--num_kernels', default=10, type=int)
parser.add_argument('--num_kernels', default=16, type=int)
parser.add_argument('--stride', default=1, type=int)
parser.add_argument('--max_activation_iter', default=300, type=int)
parser.add_argument('--activation_lr', default=1e-2, type=float)
parser.add_argument('--lr', default=0.003, type=float)
parser.add_argument('--epochs', default=150, type=int)
parser.add_argument('--lam', default=0.05, type=float)
parser.add_argument('--epochs', default=200, type=int)
parser.add_argument('--lam', default=0.1, type=float)
parser.add_argument('--output_dir', default='./output', type=str)
parser.add_argument('--seed', default=42, type=int)
parser.add_argument('--run_2d', action='store_true')
parser.add_argument('--save_filters', action='store_true')
parser.add_argument('--optimizer', default='sgd', type=str)
parser.add_argument('--crop_height', type=int, default=100)
parser.add_argument('--crop_height', type=int, default=30)
parser.add_argument('--crop_width', type=int, default=300)
parser.add_argument('--scale_factor', type=int, default=2)
parser.add_argument('--image_height', type=int, default=30)
parser.add_argument('--image_width', type=int, default=250)
parser.add_argument('--clip_depth', type=int, default=1)
parser.add_argument('--frames_to_skip', type=int, default=1)
......@@ -55,8 +58,8 @@ if __name__ == "__main__":
crop_height = args.crop_height
crop_width = args.crop_width
image_height = int(crop_height / args.scale_factor)
image_width = int(crop_width / args.scale_factor)
image_height = args.image_height
image_width = args.image_width
clip_depth = args.clip_depth
yolo_model = YoloModel('onsd')
......@@ -71,7 +74,7 @@ if __name__ == "__main__":
out_f.write(str(args))
# splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width, clip_depth), mode='all_train')
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1)
splits, dataset = load_onsd_videos(args.batch_size, crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1)
train_idx, test_idx = list(splits)[0]
train_loader = copy.deepcopy(dataset)
......@@ -109,6 +112,15 @@ if __name__ == "__main__":
else:
filter_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
crop_amount = (crop_width - image_width)
assert crop_amount % 2 == 0
crop_amount = crop_amount // 2
data_augmentation = keras.Sequential([
keras.layers.RandomTranslation(0, 0.08),
keras.layers.Cropping2D((0, crop_amount))
])
loss_log = []
best_so_far = float('inf')
......@@ -119,11 +131,15 @@ if __name__ == "__main__":
num_iters = 0
average_activations = []
for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(args.batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
images = tf.expand_dims(data_augmentation(tf.transpose(images, [0, 2, 3, 1])), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))]))
average_activations.append(float(tf.math.count_nonzero(activations)) / float(tf.math.reduce_prod(tf.shape(activations))))
with tf.GradientTape() as tape:
recon = recon_model(activations)
loss = sparse_loss(images, recon, activations, images.shape[0], args.lam, args.stride)
......@@ -159,7 +175,9 @@ if __name__ == "__main__":
best_so_far = epoch_loss
loss_log.append(epoch_loss)
print('epoch={}, epoch_loss={:.2f}, time={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start))
sparsity = np.average(np.array(average_activations))
print('epoch={}, epoch_loss={:.2f}, time={:.2f}, average sparsity={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start, sparsity))
plt.plot(loss_log)
......
......@@ -183,7 +183,7 @@ class ONSDGoodFramesLoader:
for start_range, end_range in ranges:
for j in range(start_range, end_range, 5):
if j == vc.size(1):
if j >= vc.size(1):
break
frame = vc[:, j, :, :]
......@@ -192,7 +192,7 @@ class ONSDGoodFramesLoader:
width_key = txt_label + '/' + width_key
width_key = width_key + '/' + str(j) + '.png'
if width_key not in onsd_widths:
width = 0
continue
else:
width = onsd_widths[width_key]
......@@ -213,7 +213,7 @@ class ONSDGoodFramesLoader:
elif label == 'Negatives':
label = np.array(0.0)
width = np.round(width / 30)
# width = np.round(width / 30)
for frm in all_frames:
self.clips.append((label, frm.numpy(), self.videos[vid_idx][2], width))
......@@ -264,7 +264,7 @@ class ONSDGoodFramesLoader:
return [frame for _, frame, _, _ in self.clips]
def get_widths(self):
return [width / self.max_width for _, _, _, width in self.clips]
return [width for _, _, _, width in self.clips]
def __next__(self):
if self.count < len(self.clips):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment