Skip to content
Snippets Groups Projects
Commit 69e9d6cc authored by hannandarryl's avatar hannandarryl
Browse files

eval fixes for mlp models

parent 5a7d27b8
Branches
No related tags found
No related merge requests found
......@@ -61,24 +61,63 @@ class ONSDClassifier(keras.layers.Layer):
return class_pred, width_pred
class ONSDConv(keras.layers.Layer):
def __init__(self, do_regression):
super(ONSDConv, self).__init__()
# self.ff_dropout = keras.layers.Dropout(0.1)
self.conv_1 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
# self.max_pool = keras.layers.MaxPooling2D(
self.conv_2 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
self.flatten = keras.layers.Flatten()
# self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
# self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(10, activation='relu', use_bias=True)
# self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
if do_regression:
self.ff_final_1 = keras.layers.Dense(1)
else:
self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
self.do_dropout = True
# @tf.function
def call(self, activations):
print(activations.shape)
raise Exception
x = self.conv_1(activations)
x = self.flatten(x)
x = self.ff_2(x)
# x = self.ff_dropout(x, self.do_dropout)
# x = self.ff_3(x)
class_pred = self.ff_final_1(x)
return class_pred
class ONSDMLP(keras.layers.Layer):
def __init__(self):
def __init__(self, do_regression):
super(ONSDMLP, self).__init__()
self.ff_dropout = keras.layers.Dropout(0.1)
# self.ff_dropout = keras.layers.Dropout(0.1)
# self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
# self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(16, activation='relu', use_bias=True)
self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(8, activation='relu', use_bias=True)
# self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
if do_regression:
self.ff_final_1 = keras.layers.Dense(1)
else:
self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
self.do_dropout = True
# @tf.function
def call(self, activations):
x = self.ff_2(activations)
x = self.ff_dropout(x, self.do_dropout)
x = self.ff_3(x)
# x = self.ff_dropout(x, self.do_dropout)
# x = self.ff_3(x)
class_pred = self.ff_final_1(x)
return class_pred
......
......@@ -9,13 +9,12 @@ from typing import Sequence, Iterator
import csv
from sklearn.model_selection import train_test_split, GroupShuffleSplit, LeaveOneGroupOut, LeaveOneOut, StratifiedGroupKFold, StratifiedKFold, KFold, ShuffleSplit
def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=None, n_splits=None):
def load_onsd_videos(batch_size, crop_size, yolo_model=None, mode=None, n_splits=None, do_regression=False):
video_path = "/shared_data/bamc_onsd_data/revised_extended_onsd_data"
transforms = torchvision.transforms.Compose(
[torchvision.transforms.Grayscale(1),
MinMaxScaler(0, 255),
torchvision.transforms.Resize(input_size[:2])
MinMaxScaler(0, 255)
])
# augment_transforms = torchvision.transforms.Compose(
# [torchvision.transforms.RandomRotation(45),
......@@ -23,6 +22,9 @@ def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=No
# torchvision.transforms.RandomAdjustSharpness(0.05)
# ])
if do_regression:
dataset = ONSDGoodFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
else:
dataset = ONSDAllFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
targets = dataset.get_labels()
......
......@@ -10,7 +10,7 @@ import os
from sparse_coding_torch.onsd.load_data import load_onsd_videos
from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights
from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d
from sparse_coding_torch.onsd.classifier_model import ONSDClassifier
from sparse_coding_torch.onsd.classifier_model import ONSDMLP
from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd
import time
import numpy as np
......@@ -30,6 +30,8 @@ from sklearn.neural_network import MLPClassifier
from sklearn import metrics
from sklearn.preprocessing import normalize
from scikeras.wrappers import KerasClassifier, KerasRegressor
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
if __name__ == "__main__":
......@@ -54,6 +56,8 @@ if __name__ == "__main__":
parser.add_argument('--scale_factor', type=int, default=2)
parser.add_argument('--clip_depth', type=int, default=1)
parser.add_argument('--frames_to_skip', type=int, default=1)
parser.add_argument('--flatten', action='store_true')
parser.add_argument('--regression', action='store_true')
args = parser.parse_args()
......@@ -91,8 +95,7 @@ if __name__ == "__main__":
sparse_model = keras.Model(inputs=(inputs, filter_inputs), outputs=output)
recon_model = keras.models.load_model(args.sparse_checkpoint)
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits)
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits, do_regression=args.regression)
positive_class = 'Positives'
# difficult_vids = split_difficult_vids(dataset.get_difficult_vids(), args.n_splits)
......@@ -134,49 +137,91 @@ if __name__ == "__main__":
# clf = LogisticRegression(max_iter=1000)
# clf = RidgeClassifier(alpha=3.0)
clf = MLPClassifier(hidden_layer_sizes=(16,))
# clf = MLPClassifier(hidden_layer_sizes=(16,))
if args.flatten:
classifier_inputs = keras.Input(shape=(args.num_kernels * ((image_height - args.kernel_height) // args.stride + 1)))
else:
classifier_inputs = keras.Input(shape=(args.num_kernels))
classifier_outputs = ONSDMLP()(classifier_inputs)
classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs)
if args.regression:
clf = KerasRegressor(classifier_model, loss='mean_squared_error', optimizer='adam', epochs=200, verbose=False)
else:
clf = KerasClassifier(classifier_model, loss='binary_crossentropy', optimizer='adam', epochs=200, verbose=False)
train_filter_activations = [[] for _ in range(args.num_kernels)]
# train_filter_activations = [[] for _ in range(args.num_kernels)]
train_filter_activations = []
for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
for b_idx in range(activations.shape[0]):
acts = np.squeeze(activations[b_idx])
activations = tf.squeeze(activations, axis=1)
activations = tf.squeeze(activations, axis=2)
if args.flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
else:
activations = tf.math.reduce_sum(activations, axis=1)
for b_idx, act in enumerate(activations):
if args.regression:
train_filter_activations.append((act, width[b_idx]))
else:
train_filter_activations.append((act, labels[b_idx]))
for i in range(args.num_kernels):
acts_for_filter = acts[:, i]
# for b_idx in range(activations.shape[0]):
# acts = np.squeeze(activations[b_idx])
act_sum = np.sum(acts_for_filter)
# for i in range(args.num_kernels):
# acts_for_filter = acts[:, i]
train_filter_activations[i].append((act_sum, float(labels[b_idx])))
# act_sum = np.sum(acts_for_filter)
test_filter_activations = [[] for _ in range(args.num_kernels)]
# train_filter_activations[i].append((act_sum, float(labels[b_idx])))
# test_filter_activations = [[] for _ in range(args.num_kernels)]
test_filter_activations = []
for images, labels, width in tqdm(test_tf.batch(args.batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
for b_idx in range(activations.shape[0]):
acts = np.squeeze(activations[b_idx])
activations = tf.squeeze(activations, axis=1)
activations = tf.squeeze(activations, axis=2)
if args.flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
else:
activations = tf.math.reduce_sum(activations, axis=1)
for b_idx, act in enumerate(activations):
if args.regression:
test_filter_activations.append((act, width[b_idx]))
else:
test_filter_activations.append((act, labels[b_idx]))
# for b_idx in range(activations.shape[0]):
# acts = np.squeeze(activations[b_idx])
for i in range(args.num_kernels):
acts_for_filter = acts[:, i]
# for i in range(args.num_kernels):
# acts_for_filter = acts[:, i]
act_sum = np.sum(acts_for_filter)
# act_sum = np.sum(acts_for_filter)
test_filter_activations[i].append((act_sum, float(labels[b_idx])))
# test_filter_activations[i].append((act_sum, float(labels[b_idx])))
train_X = []
train_y = []
for i in range(len(train_filter_activations[0])):
x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)])
label = train_filter_activations[0][i][1]
# for i in range(len(train_filter_activations[0])):
# x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)])
# label = train_filter_activations[0][i][1]
# train_X.append(x)
# train_y.append(label)
for x, label in train_filter_activations:
train_X.append(x)
train_y.append(label)
......@@ -186,10 +231,14 @@ if __name__ == "__main__":
test_X = []
test_y = []
for i in range(len(test_filter_activations[0])):
x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)])
label = test_filter_activations[0][i][1]
# for i in range(len(test_filter_activations[0])):
# x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)])
# label = test_filter_activations[0][i][1]
# test_X.append(x)
# test_y.append(label)
for x, label in test_filter_activations:
test_X.append(x)
test_y.append(label)
......@@ -217,6 +266,19 @@ if __name__ == "__main__":
test_gt_all = np.concatenate([test_gt_all, test_y])
if args.splits == 'leave_one_out':
if args.regression:
video_gt = np.average(test_y)
if video_gt >= 100 / dataset.max_width:
video_gt = np.array([1])
else:
video_gt = np.array([0])
video_pred = np.array([np.average(test_pred)])
if video_pred >= 100 / dataset.max_width:
video_pred = np.array([1])
else:
video_pred = np.array([0])
else:
video_gt = np.array([test_y[0]])
video_pred = np.array([np.round(np.average(test_pred))])
......@@ -238,6 +300,10 @@ if __name__ == "__main__":
frame_pred_all = np.concatenate([frame_pred_all, frame_pred])
frame_gt_all = np.concatenate([frame_gt_all, frame_gt])
if args.regression:
train_acc = metrics.mean_absolute_error(train_pred, train_y)
test_acc = metrics.mean_absolute_error(test_pred, test_y)
else:
train_acc = metrics.accuracy_score(train_pred, train_y)
test_acc = metrics.accuracy_score(test_pred, test_y)
......@@ -245,11 +311,18 @@ if __name__ == "__main__":
print('Final Predictions!')
if args.regression:
train_accuracy = metrics.mean_absolute_error(train_pred_all, train_gt_all)
test_accuracy = metrics.mean_absolute_error(test_pred_all, test_gt_all)
frame_accuracy = metrics.mean_absolute_error(frame_pred_all, frame_gt_all)
else:
train_accuracy = metrics.accuracy_score(train_pred_all, train_gt_all)
test_accuracy = metrics.accuracy_score(test_pred_all, test_gt_all)
frame_accuracy = metrics.accuracy_score(frame_pred_all, frame_gt_all)
if args.splits == 'leave_one_out':
print(video_pred_all)
print(video_gt_all)
video_accuracy = metrics.accuracy_score(video_pred_all, video_gt_all)
print('train_acc={:.2f}, test_acc={:.2f}, frame_acc={:.2f}, video_acc={:.2f}'.format(train_accuracy, test_accuracy, frame_accuracy, video_accuracy))
......
This diff is collapsed.
......@@ -16,6 +16,8 @@ from sparse_coding_torch.utils import plot_filters
from yolov4.get_bounding_boxes import YoloModel
import copy
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
def sparse_loss(images, recon, activations, batch_size, lam, stride):
loss = 0.5 * (1/batch_size) * tf.math.reduce_sum(tf.math.pow(images - recon, 2))
loss += lam * tf.reduce_mean(tf.math.reduce_sum(tf.math.abs(tf.reshape(activations, (batch_size, -1))), axis=1))
......@@ -24,24 +26,25 @@ def sparse_loss(images, recon, activations, batch_size, lam, stride):
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--kernel_width', default=150, type=int)
parser.add_argument('--kernel_height', default=10, type=int)
parser.add_argument('--kernel_width', default=60, type=int)
parser.add_argument('--kernel_height', default=30, type=int)
parser.add_argument('--kernel_depth', default=1, type=int)
parser.add_argument('--num_kernels', default=10, type=int)
parser.add_argument('--num_kernels', default=16, type=int)
parser.add_argument('--stride', default=1, type=int)
parser.add_argument('--max_activation_iter', default=300, type=int)
parser.add_argument('--activation_lr', default=1e-2, type=float)
parser.add_argument('--lr', default=0.003, type=float)
parser.add_argument('--epochs', default=150, type=int)
parser.add_argument('--lam', default=0.05, type=float)
parser.add_argument('--epochs', default=200, type=int)
parser.add_argument('--lam', default=0.1, type=float)
parser.add_argument('--output_dir', default='./output', type=str)
parser.add_argument('--seed', default=42, type=int)
parser.add_argument('--run_2d', action='store_true')
parser.add_argument('--save_filters', action='store_true')
parser.add_argument('--optimizer', default='sgd', type=str)
parser.add_argument('--crop_height', type=int, default=100)
parser.add_argument('--crop_height', type=int, default=30)
parser.add_argument('--crop_width', type=int, default=300)
parser.add_argument('--scale_factor', type=int, default=2)
parser.add_argument('--image_height', type=int, default=30)
parser.add_argument('--image_width', type=int, default=250)
parser.add_argument('--clip_depth', type=int, default=1)
parser.add_argument('--frames_to_skip', type=int, default=1)
......@@ -55,8 +58,8 @@ if __name__ == "__main__":
crop_height = args.crop_height
crop_width = args.crop_width
image_height = int(crop_height / args.scale_factor)
image_width = int(crop_width / args.scale_factor)
image_height = args.image_height
image_width = args.image_width
clip_depth = args.clip_depth
yolo_model = YoloModel('onsd')
......@@ -71,7 +74,7 @@ if __name__ == "__main__":
out_f.write(str(args))
# splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width, clip_depth), mode='all_train')
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1)
splits, dataset = load_onsd_videos(args.batch_size, crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1)
train_idx, test_idx = list(splits)[0]
train_loader = copy.deepcopy(dataset)
......@@ -109,6 +112,15 @@ if __name__ == "__main__":
else:
filter_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
crop_amount = (crop_width - image_width)
assert crop_amount % 2 == 0
crop_amount = crop_amount // 2
data_augmentation = keras.Sequential([
keras.layers.RandomTranslation(0, 0.08),
keras.layers.Cropping2D((0, crop_amount))
])
loss_log = []
best_so_far = float('inf')
......@@ -119,11 +131,15 @@ if __name__ == "__main__":
num_iters = 0
average_activations = []
for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(args.batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
images = tf.expand_dims(data_augmentation(tf.transpose(images, [0, 2, 3, 1])), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))]))
average_activations.append(float(tf.math.count_nonzero(activations)) / float(tf.math.reduce_prod(tf.shape(activations))))
with tf.GradientTape() as tape:
recon = recon_model(activations)
loss = sparse_loss(images, recon, activations, images.shape[0], args.lam, args.stride)
......@@ -159,7 +175,9 @@ if __name__ == "__main__":
best_so_far = epoch_loss
loss_log.append(epoch_loss)
print('epoch={}, epoch_loss={:.2f}, time={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start))
sparsity = np.average(np.array(average_activations))
print('epoch={}, epoch_loss={:.2f}, time={:.2f}, average sparsity={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start, sparsity))
plt.plot(loss_log)
......
......@@ -183,7 +183,7 @@ class ONSDGoodFramesLoader:
for start_range, end_range in ranges:
for j in range(start_range, end_range, 5):
if j == vc.size(1):
if j >= vc.size(1):
break
frame = vc[:, j, :, :]
......@@ -192,7 +192,7 @@ class ONSDGoodFramesLoader:
width_key = txt_label + '/' + width_key
width_key = width_key + '/' + str(j) + '.png'
if width_key not in onsd_widths:
width = 0
continue
else:
width = onsd_widths[width_key]
......@@ -213,7 +213,7 @@ class ONSDGoodFramesLoader:
elif label == 'Negatives':
label = np.array(0.0)
width = np.round(width / 30)
# width = np.round(width / 30)
for frm in all_frames:
self.clips.append((label, frm.numpy(), self.videos[vid_idx][2], width))
......@@ -264,7 +264,7 @@ class ONSDGoodFramesLoader:
return [frame for _, frame, _, _ in self.clips]
def get_widths(self):
return [width / self.max_width for _, _, _, width in self.clips]
return [width for _, _, _, width in self.clips]
def __next__(self):
if self.count < len(self.clips):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment