Skip to content
Snippets Groups Projects
Commit 69e9d6cc authored by hannandarryl's avatar hannandarryl
Browse files

eval fixes for mlp models

parent 5a7d27b8
No related branches found
No related tags found
No related merge requests found
...@@ -61,24 +61,63 @@ class ONSDClassifier(keras.layers.Layer): ...@@ -61,24 +61,63 @@ class ONSDClassifier(keras.layers.Layer):
return class_pred, width_pred return class_pred, width_pred
class ONSDConv(keras.layers.Layer):
def __init__(self, do_regression):
super(ONSDConv, self).__init__()
# self.ff_dropout = keras.layers.Dropout(0.1)
self.conv_1 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
# self.max_pool = keras.layers.MaxPooling2D(
self.conv_2 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
self.flatten = keras.layers.Flatten()
# self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
# self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(10, activation='relu', use_bias=True)
# self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
if do_regression:
self.ff_final_1 = keras.layers.Dense(1)
else:
self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
self.do_dropout = True
# @tf.function
def call(self, activations):
print(activations.shape)
raise Exception
x = self.conv_1(activations)
x = self.flatten(x)
x = self.ff_2(x)
# x = self.ff_dropout(x, self.do_dropout)
# x = self.ff_3(x)
class_pred = self.ff_final_1(x)
return class_pred
class ONSDMLP(keras.layers.Layer): class ONSDMLP(keras.layers.Layer):
def __init__(self): def __init__(self, do_regression):
super(ONSDMLP, self).__init__() super(ONSDMLP, self).__init__()
self.ff_dropout = keras.layers.Dropout(0.1) # self.ff_dropout = keras.layers.Dropout(0.1)
# self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True) # self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
# self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True) # self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
self.ff_2 = keras.layers.Dense(16, activation='relu', use_bias=True) self.ff_2 = keras.layers.Dense(8, activation='relu', use_bias=True)
self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True) # self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
if do_regression:
self.ff_final_1 = keras.layers.Dense(1) self.ff_final_1 = keras.layers.Dense(1)
else:
self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
self.do_dropout = True self.do_dropout = True
# @tf.function # @tf.function
def call(self, activations): def call(self, activations):
x = self.ff_2(activations) x = self.ff_2(activations)
x = self.ff_dropout(x, self.do_dropout) # x = self.ff_dropout(x, self.do_dropout)
x = self.ff_3(x) # x = self.ff_3(x)
class_pred = self.ff_final_1(x) class_pred = self.ff_final_1(x)
return class_pred return class_pred
......
...@@ -9,13 +9,12 @@ from typing import Sequence, Iterator ...@@ -9,13 +9,12 @@ from typing import Sequence, Iterator
import csv import csv
from sklearn.model_selection import train_test_split, GroupShuffleSplit, LeaveOneGroupOut, LeaveOneOut, StratifiedGroupKFold, StratifiedKFold, KFold, ShuffleSplit from sklearn.model_selection import train_test_split, GroupShuffleSplit, LeaveOneGroupOut, LeaveOneOut, StratifiedGroupKFold, StratifiedKFold, KFold, ShuffleSplit
def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=None, n_splits=None): def load_onsd_videos(batch_size, crop_size, yolo_model=None, mode=None, n_splits=None, do_regression=False):
video_path = "/shared_data/bamc_onsd_data/revised_extended_onsd_data" video_path = "/shared_data/bamc_onsd_data/revised_extended_onsd_data"
transforms = torchvision.transforms.Compose( transforms = torchvision.transforms.Compose(
[torchvision.transforms.Grayscale(1), [torchvision.transforms.Grayscale(1),
MinMaxScaler(0, 255), MinMaxScaler(0, 255)
torchvision.transforms.Resize(input_size[:2])
]) ])
# augment_transforms = torchvision.transforms.Compose( # augment_transforms = torchvision.transforms.Compose(
# [torchvision.transforms.RandomRotation(45), # [torchvision.transforms.RandomRotation(45),
...@@ -23,6 +22,9 @@ def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=No ...@@ -23,6 +22,9 @@ def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=No
# torchvision.transforms.RandomAdjustSharpness(0.05) # torchvision.transforms.RandomAdjustSharpness(0.05)
# ]) # ])
if do_regression:
dataset = ONSDGoodFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
else:
dataset = ONSDAllFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model) dataset = ONSDAllFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
targets = dataset.get_labels() targets = dataset.get_labels()
......
...@@ -10,7 +10,7 @@ import os ...@@ -10,7 +10,7 @@ import os
from sparse_coding_torch.onsd.load_data import load_onsd_videos from sparse_coding_torch.onsd.load_data import load_onsd_videos
from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights
from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d
from sparse_coding_torch.onsd.classifier_model import ONSDClassifier from sparse_coding_torch.onsd.classifier_model import ONSDMLP
from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd
import time import time
import numpy as np import numpy as np
...@@ -30,6 +30,8 @@ from sklearn.neural_network import MLPClassifier ...@@ -30,6 +30,8 @@ from sklearn.neural_network import MLPClassifier
from sklearn import metrics from sklearn import metrics
from sklearn.preprocessing import normalize from sklearn.preprocessing import normalize
from scikeras.wrappers import KerasClassifier, KerasRegressor
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR) tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
if __name__ == "__main__": if __name__ == "__main__":
...@@ -54,6 +56,8 @@ if __name__ == "__main__": ...@@ -54,6 +56,8 @@ if __name__ == "__main__":
parser.add_argument('--scale_factor', type=int, default=2) parser.add_argument('--scale_factor', type=int, default=2)
parser.add_argument('--clip_depth', type=int, default=1) parser.add_argument('--clip_depth', type=int, default=1)
parser.add_argument('--frames_to_skip', type=int, default=1) parser.add_argument('--frames_to_skip', type=int, default=1)
parser.add_argument('--flatten', action='store_true')
parser.add_argument('--regression', action='store_true')
args = parser.parse_args() args = parser.parse_args()
...@@ -91,8 +95,7 @@ if __name__ == "__main__": ...@@ -91,8 +95,7 @@ if __name__ == "__main__":
sparse_model = keras.Model(inputs=(inputs, filter_inputs), outputs=output) sparse_model = keras.Model(inputs=(inputs, filter_inputs), outputs=output)
recon_model = keras.models.load_model(args.sparse_checkpoint) recon_model = keras.models.load_model(args.sparse_checkpoint)
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits, do_regression=args.regression)
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits)
positive_class = 'Positives' positive_class = 'Positives'
# difficult_vids = split_difficult_vids(dataset.get_difficult_vids(), args.n_splits) # difficult_vids = split_difficult_vids(dataset.get_difficult_vids(), args.n_splits)
...@@ -134,49 +137,91 @@ if __name__ == "__main__": ...@@ -134,49 +137,91 @@ if __name__ == "__main__":
# clf = LogisticRegression(max_iter=1000) # clf = LogisticRegression(max_iter=1000)
# clf = RidgeClassifier(alpha=3.0) # clf = RidgeClassifier(alpha=3.0)
clf = MLPClassifier(hidden_layer_sizes=(16,)) # clf = MLPClassifier(hidden_layer_sizes=(16,))
if args.flatten:
classifier_inputs = keras.Input(shape=(args.num_kernels * ((image_height - args.kernel_height) // args.stride + 1)))
else:
classifier_inputs = keras.Input(shape=(args.num_kernels))
classifier_outputs = ONSDMLP()(classifier_inputs)
classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs)
if args.regression:
clf = KerasRegressor(classifier_model, loss='mean_squared_error', optimizer='adam', epochs=200, verbose=False)
else:
clf = KerasClassifier(classifier_model, loss='binary_crossentropy', optimizer='adam', epochs=200, verbose=False)
train_filter_activations = [[] for _ in range(args.num_kernels)] # train_filter_activations = [[] for _ in range(args.num_kernels)]
train_filter_activations = []
for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(batch_size)): for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1) images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy() activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
for b_idx in range(activations.shape[0]): activations = tf.squeeze(activations, axis=1)
acts = np.squeeze(activations[b_idx]) activations = tf.squeeze(activations, axis=2)
if args.flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
else:
activations = tf.math.reduce_sum(activations, axis=1)
for b_idx, act in enumerate(activations):
if args.regression:
train_filter_activations.append((act, width[b_idx]))
else:
train_filter_activations.append((act, labels[b_idx]))
for i in range(args.num_kernels): # for b_idx in range(activations.shape[0]):
acts_for_filter = acts[:, i] # acts = np.squeeze(activations[b_idx])
act_sum = np.sum(acts_for_filter) # for i in range(args.num_kernels):
# acts_for_filter = acts[:, i]
train_filter_activations[i].append((act_sum, float(labels[b_idx]))) # act_sum = np.sum(acts_for_filter)
test_filter_activations = [[] for _ in range(args.num_kernels)] # train_filter_activations[i].append((act_sum, float(labels[b_idx])))
# test_filter_activations = [[] for _ in range(args.num_kernels)]
test_filter_activations = []
for images, labels, width in tqdm(test_tf.batch(args.batch_size)): for images, labels, width in tqdm(test_tf.batch(args.batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1) images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy() activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
for b_idx in range(activations.shape[0]): activations = tf.squeeze(activations, axis=1)
acts = np.squeeze(activations[b_idx]) activations = tf.squeeze(activations, axis=2)
if args.flatten:
activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
else:
activations = tf.math.reduce_sum(activations, axis=1)
for b_idx, act in enumerate(activations):
if args.regression:
test_filter_activations.append((act, width[b_idx]))
else:
test_filter_activations.append((act, labels[b_idx]))
# for b_idx in range(activations.shape[0]):
# acts = np.squeeze(activations[b_idx])
for i in range(args.num_kernels): # for i in range(args.num_kernels):
acts_for_filter = acts[:, i] # acts_for_filter = acts[:, i]
act_sum = np.sum(acts_for_filter) # act_sum = np.sum(acts_for_filter)
test_filter_activations[i].append((act_sum, float(labels[b_idx]))) # test_filter_activations[i].append((act_sum, float(labels[b_idx])))
train_X = [] train_X = []
train_y = [] train_y = []
for i in range(len(train_filter_activations[0])): # for i in range(len(train_filter_activations[0])):
x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)]) # x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)])
label = train_filter_activations[0][i][1] # label = train_filter_activations[0][i][1]
# train_X.append(x)
# train_y.append(label)
for x, label in train_filter_activations:
train_X.append(x) train_X.append(x)
train_y.append(label) train_y.append(label)
...@@ -186,10 +231,14 @@ if __name__ == "__main__": ...@@ -186,10 +231,14 @@ if __name__ == "__main__":
test_X = [] test_X = []
test_y = [] test_y = []
for i in range(len(test_filter_activations[0])): # for i in range(len(test_filter_activations[0])):
x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)]) # x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)])
label = test_filter_activations[0][i][1] # label = test_filter_activations[0][i][1]
# test_X.append(x)
# test_y.append(label)
for x, label in test_filter_activations:
test_X.append(x) test_X.append(x)
test_y.append(label) test_y.append(label)
...@@ -217,6 +266,19 @@ if __name__ == "__main__": ...@@ -217,6 +266,19 @@ if __name__ == "__main__":
test_gt_all = np.concatenate([test_gt_all, test_y]) test_gt_all = np.concatenate([test_gt_all, test_y])
if args.splits == 'leave_one_out': if args.splits == 'leave_one_out':
if args.regression:
video_gt = np.average(test_y)
if video_gt >= 100 / dataset.max_width:
video_gt = np.array([1])
else:
video_gt = np.array([0])
video_pred = np.array([np.average(test_pred)])
if video_pred >= 100 / dataset.max_width:
video_pred = np.array([1])
else:
video_pred = np.array([0])
else:
video_gt = np.array([test_y[0]]) video_gt = np.array([test_y[0]])
video_pred = np.array([np.round(np.average(test_pred))]) video_pred = np.array([np.round(np.average(test_pred))])
...@@ -238,6 +300,10 @@ if __name__ == "__main__": ...@@ -238,6 +300,10 @@ if __name__ == "__main__":
frame_pred_all = np.concatenate([frame_pred_all, frame_pred]) frame_pred_all = np.concatenate([frame_pred_all, frame_pred])
frame_gt_all = np.concatenate([frame_gt_all, frame_gt]) frame_gt_all = np.concatenate([frame_gt_all, frame_gt])
if args.regression:
train_acc = metrics.mean_absolute_error(train_pred, train_y)
test_acc = metrics.mean_absolute_error(test_pred, test_y)
else:
train_acc = metrics.accuracy_score(train_pred, train_y) train_acc = metrics.accuracy_score(train_pred, train_y)
test_acc = metrics.accuracy_score(test_pred, test_y) test_acc = metrics.accuracy_score(test_pred, test_y)
...@@ -245,11 +311,18 @@ if __name__ == "__main__": ...@@ -245,11 +311,18 @@ if __name__ == "__main__":
print('Final Predictions!') print('Final Predictions!')
if args.regression:
train_accuracy = metrics.mean_absolute_error(train_pred_all, train_gt_all)
test_accuracy = metrics.mean_absolute_error(test_pred_all, test_gt_all)
frame_accuracy = metrics.mean_absolute_error(frame_pred_all, frame_gt_all)
else:
train_accuracy = metrics.accuracy_score(train_pred_all, train_gt_all) train_accuracy = metrics.accuracy_score(train_pred_all, train_gt_all)
test_accuracy = metrics.accuracy_score(test_pred_all, test_gt_all) test_accuracy = metrics.accuracy_score(test_pred_all, test_gt_all)
frame_accuracy = metrics.accuracy_score(frame_pred_all, frame_gt_all) frame_accuracy = metrics.accuracy_score(frame_pred_all, frame_gt_all)
if args.splits == 'leave_one_out': if args.splits == 'leave_one_out':
print(video_pred_all)
print(video_gt_all)
video_accuracy = metrics.accuracy_score(video_pred_all, video_gt_all) video_accuracy = metrics.accuracy_score(video_pred_all, video_gt_all)
print('train_acc={:.2f}, test_acc={:.2f}, frame_acc={:.2f}, video_acc={:.2f}'.format(train_accuracy, test_accuracy, frame_accuracy, video_accuracy)) print('train_acc={:.2f}, test_acc={:.2f}, frame_acc={:.2f}, video_acc={:.2f}'.format(train_accuracy, test_accuracy, frame_accuracy, video_accuracy))
......
This diff is collapsed.
...@@ -16,6 +16,8 @@ from sparse_coding_torch.utils import plot_filters ...@@ -16,6 +16,8 @@ from sparse_coding_torch.utils import plot_filters
from yolov4.get_bounding_boxes import YoloModel from yolov4.get_bounding_boxes import YoloModel
import copy import copy
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
def sparse_loss(images, recon, activations, batch_size, lam, stride): def sparse_loss(images, recon, activations, batch_size, lam, stride):
loss = 0.5 * (1/batch_size) * tf.math.reduce_sum(tf.math.pow(images - recon, 2)) loss = 0.5 * (1/batch_size) * tf.math.reduce_sum(tf.math.pow(images - recon, 2))
loss += lam * tf.reduce_mean(tf.math.reduce_sum(tf.math.abs(tf.reshape(activations, (batch_size, -1))), axis=1)) loss += lam * tf.reduce_mean(tf.math.reduce_sum(tf.math.abs(tf.reshape(activations, (batch_size, -1))), axis=1))
...@@ -24,24 +26,25 @@ def sparse_loss(images, recon, activations, batch_size, lam, stride): ...@@ -24,24 +26,25 @@ def sparse_loss(images, recon, activations, batch_size, lam, stride):
if __name__ == "__main__": if __name__ == "__main__":
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument('--batch_size', default=32, type=int) parser.add_argument('--batch_size', default=32, type=int)
parser.add_argument('--kernel_width', default=150, type=int) parser.add_argument('--kernel_width', default=60, type=int)
parser.add_argument('--kernel_height', default=10, type=int) parser.add_argument('--kernel_height', default=30, type=int)
parser.add_argument('--kernel_depth', default=1, type=int) parser.add_argument('--kernel_depth', default=1, type=int)
parser.add_argument('--num_kernels', default=10, type=int) parser.add_argument('--num_kernels', default=16, type=int)
parser.add_argument('--stride', default=1, type=int) parser.add_argument('--stride', default=1, type=int)
parser.add_argument('--max_activation_iter', default=300, type=int) parser.add_argument('--max_activation_iter', default=300, type=int)
parser.add_argument('--activation_lr', default=1e-2, type=float) parser.add_argument('--activation_lr', default=1e-2, type=float)
parser.add_argument('--lr', default=0.003, type=float) parser.add_argument('--lr', default=0.003, type=float)
parser.add_argument('--epochs', default=150, type=int) parser.add_argument('--epochs', default=200, type=int)
parser.add_argument('--lam', default=0.05, type=float) parser.add_argument('--lam', default=0.1, type=float)
parser.add_argument('--output_dir', default='./output', type=str) parser.add_argument('--output_dir', default='./output', type=str)
parser.add_argument('--seed', default=42, type=int) parser.add_argument('--seed', default=42, type=int)
parser.add_argument('--run_2d', action='store_true') parser.add_argument('--run_2d', action='store_true')
parser.add_argument('--save_filters', action='store_true') parser.add_argument('--save_filters', action='store_true')
parser.add_argument('--optimizer', default='sgd', type=str) parser.add_argument('--optimizer', default='sgd', type=str)
parser.add_argument('--crop_height', type=int, default=100) parser.add_argument('--crop_height', type=int, default=30)
parser.add_argument('--crop_width', type=int, default=300) parser.add_argument('--crop_width', type=int, default=300)
parser.add_argument('--scale_factor', type=int, default=2) parser.add_argument('--image_height', type=int, default=30)
parser.add_argument('--image_width', type=int, default=250)
parser.add_argument('--clip_depth', type=int, default=1) parser.add_argument('--clip_depth', type=int, default=1)
parser.add_argument('--frames_to_skip', type=int, default=1) parser.add_argument('--frames_to_skip', type=int, default=1)
...@@ -55,8 +58,8 @@ if __name__ == "__main__": ...@@ -55,8 +58,8 @@ if __name__ == "__main__":
crop_height = args.crop_height crop_height = args.crop_height
crop_width = args.crop_width crop_width = args.crop_width
image_height = int(crop_height / args.scale_factor) image_height = args.image_height
image_width = int(crop_width / args.scale_factor) image_width = args.image_width
clip_depth = args.clip_depth clip_depth = args.clip_depth
yolo_model = YoloModel('onsd') yolo_model = YoloModel('onsd')
...@@ -71,7 +74,7 @@ if __name__ == "__main__": ...@@ -71,7 +74,7 @@ if __name__ == "__main__":
out_f.write(str(args)) out_f.write(str(args))
# splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width, clip_depth), mode='all_train') # splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width, clip_depth), mode='all_train')
splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1) splits, dataset = load_onsd_videos(args.batch_size, crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1)
train_idx, test_idx = list(splits)[0] train_idx, test_idx = list(splits)[0]
train_loader = copy.deepcopy(dataset) train_loader = copy.deepcopy(dataset)
...@@ -109,6 +112,15 @@ if __name__ == "__main__": ...@@ -109,6 +112,15 @@ if __name__ == "__main__":
else: else:
filter_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) filter_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
crop_amount = (crop_width - image_width)
assert crop_amount % 2 == 0
crop_amount = crop_amount // 2
data_augmentation = keras.Sequential([
keras.layers.RandomTranslation(0, 0.08),
keras.layers.Cropping2D((0, crop_amount))
])
loss_log = [] loss_log = []
best_so_far = float('inf') best_so_far = float('inf')
...@@ -119,11 +131,15 @@ if __name__ == "__main__": ...@@ -119,11 +131,15 @@ if __name__ == "__main__":
num_iters = 0 num_iters = 0
average_activations = []
for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(args.batch_size)): for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(args.batch_size)):
images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1) images = tf.expand_dims(data_augmentation(tf.transpose(images, [0, 2, 3, 1])), axis=1)
activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])) activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))]))
average_activations.append(float(tf.math.count_nonzero(activations)) / float(tf.math.reduce_prod(tf.shape(activations))))
with tf.GradientTape() as tape: with tf.GradientTape() as tape:
recon = recon_model(activations) recon = recon_model(activations)
loss = sparse_loss(images, recon, activations, images.shape[0], args.lam, args.stride) loss = sparse_loss(images, recon, activations, images.shape[0], args.lam, args.stride)
...@@ -159,7 +175,9 @@ if __name__ == "__main__": ...@@ -159,7 +175,9 @@ if __name__ == "__main__":
best_so_far = epoch_loss best_so_far = epoch_loss
loss_log.append(epoch_loss) loss_log.append(epoch_loss)
print('epoch={}, epoch_loss={:.2f}, time={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start))
sparsity = np.average(np.array(average_activations))
print('epoch={}, epoch_loss={:.2f}, time={:.2f}, average sparsity={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start, sparsity))
plt.plot(loss_log) plt.plot(loss_log)
......
...@@ -183,7 +183,7 @@ class ONSDGoodFramesLoader: ...@@ -183,7 +183,7 @@ class ONSDGoodFramesLoader:
for start_range, end_range in ranges: for start_range, end_range in ranges:
for j in range(start_range, end_range, 5): for j in range(start_range, end_range, 5):
if j == vc.size(1): if j >= vc.size(1):
break break
frame = vc[:, j, :, :] frame = vc[:, j, :, :]
...@@ -192,7 +192,7 @@ class ONSDGoodFramesLoader: ...@@ -192,7 +192,7 @@ class ONSDGoodFramesLoader:
width_key = txt_label + '/' + width_key width_key = txt_label + '/' + width_key
width_key = width_key + '/' + str(j) + '.png' width_key = width_key + '/' + str(j) + '.png'
if width_key not in onsd_widths: if width_key not in onsd_widths:
width = 0 continue
else: else:
width = onsd_widths[width_key] width = onsd_widths[width_key]
...@@ -213,7 +213,7 @@ class ONSDGoodFramesLoader: ...@@ -213,7 +213,7 @@ class ONSDGoodFramesLoader:
elif label == 'Negatives': elif label == 'Negatives':
label = np.array(0.0) label = np.array(0.0)
width = np.round(width / 30) # width = np.round(width / 30)
for frm in all_frames: for frm in all_frames:
self.clips.append((label, frm.numpy(), self.videos[vid_idx][2], width)) self.clips.append((label, frm.numpy(), self.videos[vid_idx][2], width))
...@@ -264,7 +264,7 @@ class ONSDGoodFramesLoader: ...@@ -264,7 +264,7 @@ class ONSDGoodFramesLoader:
return [frame for _, frame, _, _ in self.clips] return [frame for _, frame, _, _ in self.clips]
def get_widths(self): def get_widths(self):
return [width / self.max_width for _, _, _, width in self.clips] return [width for _, _, _, width in self.clips]
def __next__(self): def __next__(self):
if self.count < len(self.clips): if self.count < len(self.clips):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment