From 69e9d6ccb355dd2ee64766f4655a414533fc123b Mon Sep 17 00:00:00 2001
From: hannandarryl <hannandarryl@gmail.com>
Date: Fri, 9 Dec 2022 15:42:16 +0000
Subject: [PATCH] eval fixes for mlp models

---
 sparse_coding_torch/onsd/classifier_model.py  |  53 ++-
 sparse_coding_torch/onsd/load_data.py         |  10 +-
 .../onsd/logistic_regression.py               | 137 +++++--
 sparse_coding_torch/onsd/train_MLP.py         | 346 +++++++++++-------
 .../onsd/train_sparse_model.py                |  42 ++-
 sparse_coding_torch/onsd/video_loader.py      |   8 +-
 6 files changed, 411 insertions(+), 185 deletions(-)

diff --git a/sparse_coding_torch/onsd/classifier_model.py b/sparse_coding_torch/onsd/classifier_model.py
index a1c72fc..70dbe71 100644
--- a/sparse_coding_torch/onsd/classifier_model.py
+++ b/sparse_coding_torch/onsd/classifier_model.py
@@ -61,24 +61,63 @@ class ONSDClassifier(keras.layers.Layer):
 
         return class_pred, width_pred
     
+class ONSDConv(keras.layers.Layer):
+    def __init__(self, do_regression):
+        super(ONSDConv, self).__init__()
+        
+#         self.ff_dropout = keras.layers.Dropout(0.1)
+        self.conv_1 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
+#         self.max_pool = keras.layers.MaxPooling2D(
+        self.conv_2 = keras.layers.Conv2D(8, kernel_size=(1, 4), strides=1, activation='relu', padding='valid')
+
+        self.flatten = keras.layers.Flatten()
+
+#         self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
+#         self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
+        self.ff_2 = keras.layers.Dense(10, activation='relu', use_bias=True)
+#         self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
+        if do_regression:
+            self.ff_final_1 = keras.layers.Dense(1)
+        else:
+            self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
+        self.do_dropout = True
+
+#     @tf.function
+    def call(self, activations):
+        print(activations.shape)
+        raise Exception
+        x = self.conv_1(activations)
+        
+        x = self.flatten(x)
+        
+        x = self.ff_2(x)
+#         x = self.ff_dropout(x, self.do_dropout)
+#         x = self.ff_3(x)
+        class_pred = self.ff_final_1(x)
+
+        return class_pred
+    
 class ONSDMLP(keras.layers.Layer):
-    def __init__(self):
+    def __init__(self, do_regression):
         super(ONSDMLP, self).__init__()
         
-        self.ff_dropout = keras.layers.Dropout(0.1)
+#         self.ff_dropout = keras.layers.Dropout(0.1)
 
 #         self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
 #         self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True)
-        self.ff_2 = keras.layers.Dense(16, activation='relu', use_bias=True)
-        self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
-        self.ff_final_1 = keras.layers.Dense(1)
+        self.ff_2 = keras.layers.Dense(8, activation='relu', use_bias=True)
+#         self.ff_3 = keras.layers.Dense(8, activation='relu', use_bias=True)
+        if do_regression:
+            self.ff_final_1 = keras.layers.Dense(1)
+        else:
+            self.ff_final_1 = keras.layers.Dense(1, activation='sigmoid')
         self.do_dropout = True
 
 #     @tf.function
     def call(self, activations):
         x = self.ff_2(activations)
-        x = self.ff_dropout(x, self.do_dropout)
-        x = self.ff_3(x)
+#         x = self.ff_dropout(x, self.do_dropout)
+#         x = self.ff_3(x)
         class_pred = self.ff_final_1(x)
 
         return class_pred
diff --git a/sparse_coding_torch/onsd/load_data.py b/sparse_coding_torch/onsd/load_data.py
index b422d63..451966e 100644
--- a/sparse_coding_torch/onsd/load_data.py
+++ b/sparse_coding_torch/onsd/load_data.py
@@ -9,13 +9,12 @@ from typing import Sequence, Iterator
 import csv
 from sklearn.model_selection import train_test_split, GroupShuffleSplit, LeaveOneGroupOut, LeaveOneOut, StratifiedGroupKFold, StratifiedKFold, KFold, ShuffleSplit
     
-def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=None, n_splits=None):   
+def load_onsd_videos(batch_size, crop_size, yolo_model=None, mode=None, n_splits=None, do_regression=False):   
     video_path = "/shared_data/bamc_onsd_data/revised_extended_onsd_data"
     
     transforms = torchvision.transforms.Compose(
     [torchvision.transforms.Grayscale(1),
-     MinMaxScaler(0, 255),
-     torchvision.transforms.Resize(input_size[:2])
+     MinMaxScaler(0, 255)
     ])
 #     augment_transforms = torchvision.transforms.Compose(
 #     [torchvision.transforms.RandomRotation(45),
@@ -23,7 +22,10 @@ def load_onsd_videos(batch_size, input_size, crop_size, yolo_model=None, mode=No
 #      torchvision.transforms.RandomAdjustSharpness(0.05)
      
 #     ])
-    dataset = ONSDAllFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
+    if do_regression:
+        dataset = ONSDGoodFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
+    else:
+        dataset = ONSDAllFramesLoader(video_path, crop_size[1], crop_size[0], transform=transforms, yolo_model=yolo_model)
     
     targets = dataset.get_labels()
     
diff --git a/sparse_coding_torch/onsd/logistic_regression.py b/sparse_coding_torch/onsd/logistic_regression.py
index 0dcc679..e04314f 100644
--- a/sparse_coding_torch/onsd/logistic_regression.py
+++ b/sparse_coding_torch/onsd/logistic_regression.py
@@ -10,7 +10,7 @@ import os
 from sparse_coding_torch.onsd.load_data import load_onsd_videos
 from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights
 from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d
-from sparse_coding_torch.onsd.classifier_model import ONSDClassifier
+from sparse_coding_torch.onsd.classifier_model import ONSDMLP
 from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd
 import time
 import numpy as np
@@ -30,6 +30,8 @@ from sklearn.neural_network import MLPClassifier
 from sklearn import metrics
 from sklearn.preprocessing import normalize
 
+from scikeras.wrappers import KerasClassifier, KerasRegressor
+
 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
 
 if __name__ == "__main__":
@@ -54,6 +56,8 @@ if __name__ == "__main__":
     parser.add_argument('--scale_factor', type=int, default=2)
     parser.add_argument('--clip_depth', type=int, default=1)
     parser.add_argument('--frames_to_skip', type=int, default=1)
+    parser.add_argument('--flatten', action='store_true')
+    parser.add_argument('--regression', action='store_true')
     
     args = parser.parse_args()
     
@@ -89,10 +93,9 @@ if __name__ == "__main__":
     output = SparseCode(batch_size=args.batch_size, image_height=image_height, image_width=image_width, clip_depth=clip_depth, in_channels=1, out_channels=args.num_kernels, kernel_height=args.kernel_height, kernel_width=args.kernel_width, kernel_depth=args.kernel_depth, stride=args.stride, lam=args.lam, activation_lr=args.activation_lr, max_activation_iter=args.max_activation_iter, run_2d=False)(inputs, filter_inputs)
 
     sparse_model = keras.Model(inputs=(inputs, filter_inputs), outputs=output)
-    recon_model = keras.models.load_model(args.sparse_checkpoint)
-        
+    recon_model = keras.models.load_model(args.sparse_checkpoint)    
     
-    splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits)
+    splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits, do_regression=args.regression)
     positive_class = 'Positives'
     
 #     difficult_vids = split_difficult_vids(dataset.get_difficult_vids(), args.n_splits)
@@ -134,49 +137,91 @@ if __name__ == "__main__":
         
 #         clf = LogisticRegression(max_iter=1000)
 #         clf = RidgeClassifier(alpha=3.0)
-        clf = MLPClassifier(hidden_layer_sizes=(16,))
+#         clf = MLPClassifier(hidden_layer_sizes=(16,))
+        if args.flatten:
+            classifier_inputs = keras.Input(shape=(args.num_kernels * ((image_height - args.kernel_height) // args.stride + 1)))
+        else:
+            classifier_inputs = keras.Input(shape=(args.num_kernels))
+        classifier_outputs = ONSDMLP()(classifier_inputs)
+
+        classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs)
+        if args.regression:
+            clf = KerasRegressor(classifier_model, loss='mean_squared_error', optimizer='adam', epochs=200, verbose=False)
+        else:
+            clf = KerasClassifier(classifier_model, loss='binary_crossentropy', optimizer='adam', epochs=200, verbose=False)
         
-        train_filter_activations = [[] for _ in range(args.num_kernels)]
+#         train_filter_activations = [[] for _ in range(args.num_kernels)]
+        train_filter_activations = []
 
         for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(batch_size)):
             images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
 
             activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
             
-            for b_idx in range(activations.shape[0]):
-                acts = np.squeeze(activations[b_idx])
+            activations = tf.squeeze(activations, axis=1)
+            activations = tf.squeeze(activations, axis=2)
+            if args.flatten:
+                activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
+            else:
+                activations = tf.math.reduce_sum(activations, axis=1)
+            
+            for b_idx, act in enumerate(activations):
+                if args.regression:
+                    train_filter_activations.append((act, width[b_idx]))
+                else:
+                    train_filter_activations.append((act, labels[b_idx]))
+            
+#             for b_idx in range(activations.shape[0]):
+#                 acts = np.squeeze(activations[b_idx])
 
-                for i in range(args.num_kernels):
-                    acts_for_filter = acts[:, i]
+#                 for i in range(args.num_kernels):
+#                     acts_for_filter = acts[:, i]
 
-                    act_sum = np.sum(acts_for_filter)
+#                     act_sum = np.sum(acts_for_filter)
 
-                    train_filter_activations[i].append((act_sum, float(labels[b_idx])))
+#                     train_filter_activations[i].append((act_sum, float(labels[b_idx])))
                 
-        test_filter_activations = [[] for _ in range(args.num_kernels)]
+#         test_filter_activations = [[] for _ in range(args.num_kernels)]
+        test_filter_activations = []
                 
         for images, labels, width in tqdm(test_tf.batch(args.batch_size)):
             images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
 
             activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
             
-            for b_idx in range(activations.shape[0]):
-                acts = np.squeeze(activations[b_idx])
+            activations = tf.squeeze(activations, axis=1)
+            activations = tf.squeeze(activations, axis=2)
+            if args.flatten:
+                activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
+            else:
+                activations = tf.math.reduce_sum(activations, axis=1)
+            
+            for b_idx, act in enumerate(activations):
+                if args.regression:
+                    test_filter_activations.append((act, width[b_idx]))
+                else:
+                    test_filter_activations.append((act, labels[b_idx]))
+            
+#             for b_idx in range(activations.shape[0]):
+#                 acts = np.squeeze(activations[b_idx])
 
-                for i in range(args.num_kernels):
-                    acts_for_filter = acts[:, i]
+#                 for i in range(args.num_kernels):
+#                     acts_for_filter = acts[:, i]
 
-                    act_sum = np.sum(acts_for_filter)
+#                     act_sum = np.sum(acts_for_filter)
 
-                    test_filter_activations[i].append((act_sum, float(labels[b_idx])))
+#                     test_filter_activations[i].append((act_sum, float(labels[b_idx])))
                 
         train_X = []
         train_y = []
 
-        for i in range(len(train_filter_activations[0])):
-            x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)])
-            label = train_filter_activations[0][i][1]
+#         for i in range(len(train_filter_activations[0])):
+#             x = np.array([train_filter_activations[j][i][0] for j in range(args.num_kernels)])
+#             label = train_filter_activations[0][i][1]
             
+#             train_X.append(x)
+#             train_y.append(label)
+        for x, label in train_filter_activations:
             train_X.append(x)
             train_y.append(label)
 
@@ -186,10 +231,14 @@ if __name__ == "__main__":
         test_X = []
         test_y = []
 
-        for i in range(len(test_filter_activations[0])):
-            x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)])
-            label = test_filter_activations[0][i][1]
+#         for i in range(len(test_filter_activations[0])):
+#             x = np.array([test_filter_activations[j][i][0] for j in range(args.num_kernels)])
+#             label = test_filter_activations[0][i][1]
+            
+#             test_X.append(x)
+#             test_y.append(label)
             
+        for x, label in test_filter_activations:
             test_X.append(x)
             test_y.append(label)
 
@@ -217,8 +266,21 @@ if __name__ == "__main__":
             test_gt_all = np.concatenate([test_gt_all, test_y])
             
         if args.splits == 'leave_one_out':
-            video_gt = np.array([test_y[0]])
-            video_pred = np.array([np.round(np.average(test_pred))])
+            if args.regression:
+                video_gt = np.average(test_y)
+                if video_gt >= 100 / dataset.max_width:
+                    video_gt = np.array([1])
+                else:
+                    video_gt = np.array([0])
+                
+                video_pred = np.array([np.average(test_pred)])
+                if video_pred >=  100 / dataset.max_width:
+                    video_pred = np.array([1])
+                else:
+                    video_pred = np.array([0])
+            else:
+                video_gt = np.array([test_y[0]])
+                video_pred = np.array([np.round(np.average(test_pred))])
             
             if video_pred_all is None:
                 video_pred_all = video_pred
@@ -238,18 +300,29 @@ if __name__ == "__main__":
             frame_pred_all = np.concatenate([frame_pred_all, frame_pred])
             frame_gt_all = np.concatenate([frame_gt_all, frame_gt])
 
-        train_acc = metrics.accuracy_score(train_pred, train_y)
-        test_acc = metrics.accuracy_score(test_pred, test_y)
+        if args.regression:
+            train_acc = metrics.mean_absolute_error(train_pred, train_y)
+            test_acc = metrics.mean_absolute_error(test_pred, test_y)
+        else:
+            train_acc = metrics.accuracy_score(train_pred, train_y)
+            test_acc = metrics.accuracy_score(test_pred, test_y)
 
         print('i_fold={}, train_acc={:.2f}, test_acc={:.2f}'.format(i_fold, train_acc, test_acc))
         
     print('Final Predictions!')
     
-    train_accuracy = metrics.accuracy_score(train_pred_all, train_gt_all)
-    test_accuracy = metrics.accuracy_score(test_pred_all, test_gt_all)
-    frame_accuracy = metrics.accuracy_score(frame_pred_all, frame_gt_all)
+    if args.regression:
+        train_accuracy = metrics.mean_absolute_error(train_pred_all, train_gt_all)
+        test_accuracy = metrics.mean_absolute_error(test_pred_all, test_gt_all)
+        frame_accuracy = metrics.mean_absolute_error(frame_pred_all, frame_gt_all)
+    else:
+        train_accuracy = metrics.accuracy_score(train_pred_all, train_gt_all)
+        test_accuracy = metrics.accuracy_score(test_pred_all, test_gt_all)
+        frame_accuracy = metrics.accuracy_score(frame_pred_all, frame_gt_all)
     
     if args.splits == 'leave_one_out':
+        print(video_pred_all)
+        print(video_gt_all)
         video_accuracy = metrics.accuracy_score(video_pred_all, video_gt_all)
         
         print('train_acc={:.2f}, test_acc={:.2f}, frame_acc={:.2f}, video_acc={:.2f}'.format(train_accuracy, test_accuracy, frame_accuracy, video_accuracy))
diff --git a/sparse_coding_torch/onsd/train_MLP.py b/sparse_coding_torch/onsd/train_MLP.py
index 1ad78c4..afdc46f 100644
--- a/sparse_coding_torch/onsd/train_MLP.py
+++ b/sparse_coding_torch/onsd/train_MLP.py
@@ -10,11 +10,11 @@ import os
 from sparse_coding_torch.onsd.load_data import load_onsd_videos
 from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights
 from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d
-from sparse_coding_torch.onsd.classifier_model import ONSDMLP
-from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd
+from sparse_coding_torch.onsd.classifier_model import ONSDMLP, ONSDConv
+from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd, get_participants
 import time
 import numpy as np
-from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
+from sklearn.metrics import f1_score, accuracy_score, confusion_matrix, mean_absolute_error
 import random
 import pickle
 # from sparse_coding_torch.onsd.train_sparse_model import sparse_loss
@@ -25,10 +25,77 @@ import glob
 import cv2
 import copy
 import matplotlib.pyplot as plt
+import itertools
+import csv
 
 tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+import absl.logging
+absl.logging.set_verbosity(absl.logging.ERROR)
 
-def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height, max_width):
+def calculate_onsd_scores_measured(input_videos, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height):
+    frame_path = 'sparse_coding_torch/onsd/onsd_good_for_eval'
+    
+    all_preds = []
+    all_gt = []
+    fp = []
+    fn = []
+
+    for vid_f in tqdm(input_videos):
+        split_path = vid_f.split('/')
+        frame_path = '/'.join(split_path[:-1])
+        label = split_path[-3]
+        f = [png_file for png_file in os.listdir(frame_path) if png_file.endswith('.png')][0]
+#     for f in tqdm(os.listdir(os.path.join(frame_path, label))):
+#         if not f.endswith('.png'):
+#             continue
+#         print(split_path)
+#         print(frame_path)
+#         print(label)
+#         print(f)
+#         raise Exception
+
+        frame = torch.tensor(cv2.imread(os.path.join(frame_path, f))).swapaxes(2, 1).swapaxes(1, 0)
+    
+#         print(frame.size())
+
+        frame = get_yolo_region_onsd(yolo_model, frame, crop_width, crop_height, False)
+        if not frame:
+            continue
+        
+#         print(frame)
+
+        frame = frame[0]
+        
+#         print(frame)
+
+        frame = transform(frame).to(torch.float32).unsqueeze(3).unsqueeze(1).numpy()
+
+        activations = tf.stop_gradient(sparse_model([frame, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))]))
+            
+        activations = tf.squeeze(activations, axis=1)
+        activations = tf.squeeze(activations, axis=2)
+        activations = tf.math.reduce_sum(activations, axis=1)
+
+        pred = classifier_model.predict(activations)
+
+        pred = tf.math.round(pred)
+
+        final_pred = float(pred)
+
+        all_preds.append(final_pred)
+
+        if label == 'Positives':
+            all_gt.append(1.0)
+            if final_pred == 0.0:
+                fn.append(f)
+        elif label == 'Negatives':
+            all_gt.append(0.0)
+            if final_pred == 1.0:
+                fp.append(f)
+            
+    return np.array(all_preds), np.array(all_gt), fn, fp
+
+def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height, max_width, flatten, do_regression, activations_2d, use_valid, valid_vids):
     all_predictions = []
     
     numerical_labels = []
@@ -42,6 +109,9 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
     fp_ids = []
     fn_ids = []
     for v_idx, f in tqdm(enumerate(input_videos)):
+        if use_valid and not get_participants([f])[0] in valid_vids:
+            continue
+        
         vc = torchvision.io.read_video(f)[0].permute(3, 0, 1, 2)
         
         all_classes = []
@@ -51,7 +121,9 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
         
         all_yolo = [get_yolo_region_onsd(yolo_model, frame, crop_width, crop_height, False) for frame in all_frames]
         
-        all_yolo = [yolo[0] for yolo in all_yolo if yolo is not None]
+        all_yolo = list(itertools.chain.from_iterable([y for y in all_yolo if y is not None]))
+        
+#         all_yolo = [yolo[0] for yolo in all_yolo if yolo is not None]
         
         for i in range(0, len(all_yolo), 32):
             batch = torch.stack(all_yolo[i:i+32])
@@ -62,16 +134,35 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
             
             activations = tf.squeeze(activations, axis=1)
             activations = tf.squeeze(activations, axis=2)
-            activations = tf.math.reduce_sum(activations, axis=1)
+            if flatten:
+                activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
+            elif activations_2d:
+                activations = tf.expand_dims(activations, axis=3)
+            else:
+                activations = tf.math.reduce_sum(activations, axis=1)
             
-            pred = classifier_model(activations)
+            pred = classifier_model.predict(activations)
 
-            pred = tf.math.round(tf.math.sigmoid(pred))
+#             if not do_regression:
+#                 pred = tf.math.round(pred)
 #             width_pred = tf.math.round(width_pred * max_width)
             
             all_classes.append(pred)
             
-        final_pred = np.round(np.average(np.concatenate(all_classes)))
+        if do_regression:
+            final_pred = np.average(np.concatenate(all_classes))
+#             raise Exception
+#             print(all_classes)
+#             print(final_pred)
+#             print(max_width)
+#             print(100/max_width)
+#             raise Exception
+            if final_pred >= 100:
+                final_pred = np.array([1])
+            else:
+                final_pred = np.array([0])
+        else:
+            final_pred = np.round(np.average(np.concatenate(all_classes)))
 #         print(all_widths)
 #         average_width = np.average(np.array(all_widths))
 #         print(average_width)
@@ -92,7 +183,7 @@ def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, sp
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--batch_size', default=128, type=int)
+    parser.add_argument('--batch_size', default=200, type=int)
     parser.add_argument('--kernel_width', default=150, type=int)
     parser.add_argument('--kernel_height', default=10, type=int)
     parser.add_argument('--kernel_depth', default=1, type=int)
@@ -100,8 +191,8 @@ if __name__ == "__main__":
     parser.add_argument('--stride', default=1, type=int)
     parser.add_argument('--max_activation_iter', default=300, type=int)
     parser.add_argument('--activation_lr', default=1e-2, type=float)
-    parser.add_argument('--lr', default=5e-2, type=float)
-    parser.add_argument('--epochs', default=15, type=int)
+    parser.add_argument('--lr', default=0.001, type=float)
+    parser.add_argument('--epochs', default=200, type=int)
     parser.add_argument('--lam', default=0.05, type=float)
     parser.add_argument('--output_dir', default='./output', type=str)
     parser.add_argument('--sparse_checkpoint', default=None, type=str)
@@ -118,6 +209,10 @@ if __name__ == "__main__":
     parser.add_argument('--scale_factor', type=int, default=2)
     parser.add_argument('--clip_depth', type=int, default=1)
     parser.add_argument('--frames_to_skip', type=int, default=1)
+    parser.add_argument('--do_regression', action='store_true')
+    parser.add_argument('--flatten', action='store_true')
+    parser.add_argument('--activations_2d', action='store_true')
+    parser.add_argument('--valid_vids', action='store_true')
     
     args = parser.parse_args()
     
@@ -141,6 +236,16 @@ if __name__ == "__main__":
         
     with open(os.path.join(output_dir, 'arguments.txt'), 'w+') as out_f:
         out_f.write(str(args))
+        
+    valid_vids = set()
+    with open('sparse_coding_torch/onsd/good_frames_onsd.csv', 'r') as valid_in:
+        reader = csv.DictReader(valid_in)
+        for row in reader:
+            vid = row['video'].strip()
+            good_frames = row['good_frames'].strip()
+            
+            if good_frames:
+                valid_vids.add(vid)
     
     yolo_model = YoloModel(args.dataset)
 
@@ -163,28 +268,44 @@ if __name__ == "__main__":
     ])
         
     
-    splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits)
+    splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode=args.splits, n_splits=args.n_splits, do_regression=args.do_regression)
     positive_class = 'Positives'
     
+    all_video_labels = [f.split('/')[-3] for f in dataset.get_all_videos()]
+    print('{} videos with positive labels.'.format(len([lbl for lbl in all_video_labels if lbl == 'Positives'])))
+    print('{} videos with negative labels.'.format(len([lbl for lbl in all_video_labels if lbl == 'Negatives'])))
+    
 #     difficult_vids = split_difficult_vids(dataset.get_difficult_vids(), args.n_splits)
 
     print('Processing frames...')
     sparse_codes = []
+    total_acts = 0
+    total_non_zero = 0
     frames = dataset.get_frames()
     for i in tqdm(range(0, len(frames), 32)):
         frame = tf.stack(frames[i:i+32])
         frame = tf.expand_dims(data_augmentation(tf.transpose(frame, [0, 2, 3, 1])), axis=1)
 
         activations = tf.stop_gradient(sparse_model([frame, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))])).numpy()
+        
+        total_non_zero += float(tf.math.count_nonzero(activations))
+        total_acts += float(tf.math.reduce_prod(tf.shape(activations)))
 
         activations = tf.squeeze(activations, axis=1)
         activations = tf.squeeze(activations, axis=2)
-        activations = tf.math.reduce_sum(activations, axis=1)
+
+        if args.flatten:
+            activations = tf.reshape(activations, (-1, activations.shape[1] * activations.shape[2]))
+        elif args.activations_2d:
+            activations = tf.expand_dims(activations, axis=3)
+        else:
+            activations = tf.math.reduce_sum(activations, axis=1)
         
         for act in activations:
             sparse_codes.append(act)
 
     assert len(sparse_codes) == len(frames)
+    print('Average sparsity is: {}'.format(total_non_zero / total_acts))
 
     video_true = []
     video_pred = []
@@ -214,17 +335,27 @@ if __name__ == "__main__":
             
         train_sparse_codes = [sc for i, sc in enumerate(sparse_codes) if i in train_idx]
         test_sparse_codes = [sc for i, sc in enumerate(sparse_codes) if i in test_idx]
-
-        train_tf = tf.data.Dataset.from_tensor_slices((train_sparse_codes, train_loader.get_labels(), train_loader.get_widths()))
-        test_tf = tf.data.Dataset.from_tensor_slices((test_sparse_codes, test_loader.get_labels(), test_loader.get_widths()))
         
-        print('{} train videos.'.format(len(train_tf)))
-        print('{} positive videos.'.format(len(list(train_tf.filter(lambda features, label, width: label==1)))))
-        print('{} negative videos.'.format(len(list(train_tf.filter(lambda features, label, width: label==0)))))
-        print('-----------------')
-        print('{} test videos.'.format(len(test_tf)))
-        print('{} positive videos.'.format(len(list(test_tf.filter(lambda features, label, width: label==1)))))
-        print('{} negative videos.'.format(len(list(test_tf.filter(lambda features, label, width: label==0)))))
+        if args.do_regression:
+            train_x = tf.stack(train_sparse_codes)
+            test_x = tf.stack(test_sparse_codes)
+            
+            train_y = tf.stack(train_loader.get_widths())
+            test_y = tf.stack(test_loader.get_widths())
+        else:
+            train_x = tf.stack(train_sparse_codes)
+            test_x = tf.stack(test_sparse_codes)
+            
+            train_y = tf.stack(train_loader.get_labels())
+            test_y = tf.stack(test_loader.get_labels())
+        
+#         print('{} train frames.'.format(len(train_x)))
+#         print('{} positive frames.'.format(len(list(train_y.filter(lambda features, label, width: label==1)))))
+#         print('{} negative frames.'.format(len(list(train_y.filter(lambda features, label, width: label==0)))))
+#         print('-----------------')
+#         print('{} test frames.'.format(len(test_tf)))
+#         print('{} positive frames.'.format(len(list(test_tf.filter(lambda features, label, width: label==1)))))
+#         print('{} negative frames.'.format(len(list(test_tf.filter(lambda features, label, width: label==0)))))
         
 
 #         negative_ds = (
@@ -242,110 +373,63 @@ if __name__ == "__main__":
         if args.checkpoint:
             classifier_model = keras.models.load_model(args.checkpoint)
         else:
-            classifier_inputs = keras.Input(shape=(args.num_kernels))
-            classifier_outputs = ONSDMLP()(classifier_inputs)
+            if args.flatten:
+                classifier_inputs = keras.Input(shape=(args.num_kernels * ((image_height - args.kernel_height) // args.stride + 1)))
+            elif args.activations_2d:
+                classifier_inputs = keras.Input(shape=(((image_height - args.kernel_height) // args.stride + 1), args.num_kernels, 1))
+            else:
+                classifier_inputs = keras.Input(shape=(args.num_kernels))
+                
+            if args.activations_2d:
+                classifier_outputs = ONSDConv(args.do_regression)(classifier_inputs)
+            else:
+                classifier_outputs = ONSDMLP(args.do_regression)(classifier_inputs)
 
             classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs)
-
-        prediction_optimizer = keras.optimizers.Adam(learning_rate=args.lr)
-
-        best_so_far = float('inf')
-
-        class_criterion = keras.losses.BinaryCrossentropy(from_logits=True, reduction=keras.losses.Reduction.SUM)
-#         width_criterion = keras.losses.MeanSquaredError(reduction=keras.losses.Reduction.SUM)
-
-        train_losses = []
-        test_losses = []
-        
-        train_accuracies = []
-        test_accuracies = []
+            
+        if not args.do_regression:
+            criterion = keras.losses.BinaryCrossentropy()
+        else:
+            criterion = keras.losses.MeanSquaredError()
+            
+            
+        classifier_model.compile(optimizer=keras.optimizers.Adam(learning_rate=args.lr), loss=criterion)
         
-#         train_mse = []
-#         test_mse = []
         if args.train:
-            for epoch in range(args.epochs):
-                epoch_loss = 0
-                t1 = time.perf_counter()
-
-#                 for images, labels, width in tqdm(balanced_ds.shuffle(len(train_tf)).batch(args.batch_size)):
-#                 for images, labels, width in tqdm(balanced_ds.take(len(train_tf)).shuffle(len(train_tf)).batch(args.batch_size)):
-                classifier_model.do_dropout = True
-                for activations, labels, width in train_tf.shuffle(len(train_tf)).batch(args.batch_size):
-                    with tf.GradientTape() as tape:
-                        class_pred = classifier_model(activations)
-                        class_loss = class_criterion(labels, class_pred)
-#                         width_loss = width_criterion(width, width_pred * width_mask)
-                        loss = class_loss
-
-                    epoch_loss += loss * activations.shape[0]
-
-                    gradients = tape.gradient(loss, classifier_model.trainable_weights)
-
-                    prediction_optimizer.apply_gradients(zip(gradients, classifier_model.trainable_weights))
-
-                t2 = time.perf_counter()
-                
-                if epoch_loss < best_so_far:
-                    print("found better model")
-                    # Save model parameters
-                    classifier_model.save(os.path.join(output_dir, "best_classifier_{}.pt".format(i_fold)))
-#                     recon_model.save(os.path.join(output_dir, "best_sparse_model_{}.pt".format(i_fold)))
-#                     pickle.dump(prediction_optimizer.get_weights(), open(os.path.join(output_dir, 'optimizer_{}.pt'.format(i_fold)), 'wb+'))
-                    best_so_far = epoch_loss
-    
-            classifier_model = keras.models.load_model(os.path.join(output_dir, "best_classifier_{}.pt".format(i_fold)))
-        
-        y_true_train = None
-        y_pred_train = None
-        
-        y_true_test = None
-        y_pred_test = None
-
-        classifier_model.do_dropout = False
-        for activations, labels, width in train_tf.batch(args.batch_size):
-            pred = classifier_model(activations)
-
-            if y_true_train is None:
-                y_true_train = labels
-                y_pred_train = tf.math.round(tf.math.sigmoid(pred))
-            else:
-                y_true_train = tf.concat((y_true_train, labels), axis=0)
-                y_pred_train = tf.concat((y_pred_train, tf.math.round(tf.math.sigmoid(pred))), axis=0)
-
-        for activations, labels, width in test_tf.batch(args.batch_size):
-            pred = classifier_model(activations)
-
-            if y_true_test is None:
-                y_true_test = labels
-                y_pred_test = tf.math.round(tf.math.sigmoid(pred))
-            else:
-                y_true_test = tf.concat((y_true_test, labels), axis=0)
-                y_pred_test = tf.concat((y_pred_test, tf.math.round(tf.math.sigmoid(pred))), axis=0)
+            classifier_model.fit(train_x, train_y, batch_size=args.batch_size, epochs=args.epochs, verbose=False)
 
-        t2 = time.perf_counter()
-
-        y_true_test = tf.cast(y_true_test, tf.int32)
-        y_pred_test = tf.cast(y_pred_test, tf.int32)
-
-        y_true_train = tf.cast(y_true_train, tf.int32)
-        y_pred_train = tf.cast(y_pred_train, tf.int32)
+        y_true_train = train_y
+        if args.do_regression:
+            y_pred_train = classifier_model.predict(train_x)
+        else:
+            y_pred_train = np.round(classifier_model.predict(train_x))
         
         train_frame_true.append(y_true_train)
         train_frame_pred.append(y_pred_train)
         
+        y_true_test = test_y
+        if args.do_regression:
+            y_pred_test = classifier_model.predict(test_x)
+        else:
+            y_pred_test = np.round(classifier_model.predict(test_x))
+        
         test_frame_true.append(y_true_test)
         test_frame_pred.append(y_pred_test)
 
-        f1 = f1_score(y_true_test, y_pred_test, average='macro')
-        accuracy = accuracy_score(y_true_test, y_pred_test)
+        t2 = time.perf_counter()
 
-        train_accuracy = accuracy_score(y_true_train, y_pred_train)
+        if args.do_regression:
+            f1 = 0.0
+            accuracy = mean_absolute_error(y_true_test, y_pred_test)
+            train_accuracy = mean_absolute_error(y_true_train, y_pred_train)
+        else:
+            f1 = f1_score(y_true_test, y_pred_test, average='macro')
+            accuracy = accuracy_score(y_true_test, y_pred_test)
 
-#                 test_mae = keras.losses.MeanAbsoluteError()(width_gt, width_p)
-        test_mae = 0.0
+            train_accuracy = accuracy_score(y_true_train, y_pred_train)
 
-        train_accuracies.append(train_accuracy)
-        test_accuracies.append(accuracy)
+#         train_accuracies.append(train_accuracy)
+#         test_accuracies.append(accuracy)
 
         pred_dict = {}
         gt_dict = {}
@@ -362,27 +446,33 @@ if __name__ == "__main__":
         test_labels = [vid_f.split('/')[-3] for vid_f in test_videos]
 
         classifier_model.do_dropout = False
-        y_pred, y_true, fn, fp = calculate_onsd_scores(test_videos, test_labels, yolo_model, classifier_model, sparse_model, recon_model, transform, image_width, image_height, 0)
+        max_width = 0
+        if args.do_regression:
+            max_width = dataset.max_width
+        y_pred, y_true, fn, fp = calculate_onsd_scores(test_videos, test_labels, yolo_model, classifier_model, sparse_model, recon_model, transform, crop_width, crop_height, max_width, args.flatten, args.do_regression, args.activations_2d, args.valid_vids, valid_vids)
+#         y_pred, y_true, fn, fp = calculate_onsd_scores_measured(test_videos, yolo_model, classifier_model, sparse_model, recon_model, transform, image_width, image_height)
             
         t2 = time.perf_counter()
 
         print('i_fold={}, time={:.2f}'.format(i_fold, t2-t1))
+        
+        if np.size(y_pred):
 
-        y_true = tf.cast(y_true, tf.int32)
-        y_pred = tf.cast(y_pred, tf.int32)
+            y_true = tf.cast(y_true, tf.int32)
+            y_pred = tf.cast(y_pred, tf.int32)
 
-        f1 = f1_score(y_true, y_pred, average='macro')
-        accuracy = accuracy_score(y_true, y_pred)
+            f1 = f1_score(y_true, y_pred, average='macro')
+            vid_accuracy = accuracy_score(y_true, y_pred)
 
-        video_fn.extend(fn)
-        video_fp.extend(fp)
+            video_fn.extend(fn)
+            video_fp.extend(fp)
 
-        video_true.extend(y_true)
-        video_pred.extend(y_pred)
+            video_true.extend(y_true)
+            video_pred.extend(y_pred)
 
-        print("Test f1={:.2f}, vid_acc={:.2f}".format(f1, accuracy))
+            print("Test f1={:.2f}, vid acc={:.2f}, train acc={:.2f}, test acc={:.2f}".format(f1, vid_accuracy, train_accuracy, accuracy))
 
-        print(confusion_matrix(y_true, y_pred))
+            print(confusion_matrix(y_true, y_pred))
         
 #         plt.clf()
 #         plt.figure()
@@ -418,8 +508,12 @@ if __name__ == "__main__":
     test_frame_true = np.concatenate(test_frame_true)
     test_frame_pred = np.concatenate(test_frame_pred)
     
-    train_frame_acc = accuracy_score(train_frame_true, train_frame_pred)
-    test_frame_acc = accuracy_score(test_frame_true, test_frame_pred)
+    if args.do_regression:
+        train_frame_acc = mean_absolute_error(train_frame_true, train_frame_pred)
+        test_frame_acc = mean_absolute_error(test_frame_true, test_frame_pred)
+    else:
+        train_frame_acc = accuracy_score(train_frame_true, train_frame_pred)
+        test_frame_acc = accuracy_score(test_frame_true, test_frame_pred)
             
     print("Final video accuracy={:.2f}, video f1={:.2f}, frame train accuracy={:.2f}, frame test accuracy={:.2f}".format(final_acc, final_f1, train_frame_acc, test_frame_acc))
     print(final_conf)
diff --git a/sparse_coding_torch/onsd/train_sparse_model.py b/sparse_coding_torch/onsd/train_sparse_model.py
index b3452ab..68f8bd7 100644
--- a/sparse_coding_torch/onsd/train_sparse_model.py
+++ b/sparse_coding_torch/onsd/train_sparse_model.py
@@ -16,6 +16,8 @@ from sparse_coding_torch.utils import plot_filters
 from yolov4.get_bounding_boxes import YoloModel
 import copy
 
+tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
+
 def sparse_loss(images, recon, activations, batch_size, lam, stride):
     loss = 0.5 * (1/batch_size) * tf.math.reduce_sum(tf.math.pow(images - recon, 2))
     loss += lam * tf.reduce_mean(tf.math.reduce_sum(tf.math.abs(tf.reshape(activations, (batch_size, -1))), axis=1))
@@ -24,24 +26,25 @@ def sparse_loss(images, recon, activations, batch_size, lam, stride):
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument('--batch_size', default=32, type=int)
-    parser.add_argument('--kernel_width', default=150, type=int)
-    parser.add_argument('--kernel_height', default=10, type=int)
+    parser.add_argument('--kernel_width', default=60, type=int)
+    parser.add_argument('--kernel_height', default=30, type=int)
     parser.add_argument('--kernel_depth', default=1, type=int)
-    parser.add_argument('--num_kernels', default=10, type=int)
+    parser.add_argument('--num_kernels', default=16, type=int)
     parser.add_argument('--stride', default=1, type=int)
     parser.add_argument('--max_activation_iter', default=300, type=int)
     parser.add_argument('--activation_lr', default=1e-2, type=float)
     parser.add_argument('--lr', default=0.003, type=float)
-    parser.add_argument('--epochs', default=150, type=int)
-    parser.add_argument('--lam', default=0.05, type=float)
+    parser.add_argument('--epochs', default=200, type=int)
+    parser.add_argument('--lam', default=0.1, type=float)
     parser.add_argument('--output_dir', default='./output', type=str)
     parser.add_argument('--seed', default=42, type=int)
     parser.add_argument('--run_2d', action='store_true')
     parser.add_argument('--save_filters', action='store_true')
     parser.add_argument('--optimizer', default='sgd', type=str)
-    parser.add_argument('--crop_height', type=int, default=100)
+    parser.add_argument('--crop_height', type=int, default=30)
     parser.add_argument('--crop_width', type=int, default=300)
-    parser.add_argument('--scale_factor', type=int, default=2)
+    parser.add_argument('--image_height', type=int, default=30)
+    parser.add_argument('--image_width', type=int, default=250)
     parser.add_argument('--clip_depth', type=int, default=1)
     parser.add_argument('--frames_to_skip', type=int, default=1)
     
@@ -55,8 +58,8 @@ if __name__ == "__main__":
     crop_height = args.crop_height
     crop_width = args.crop_width
 
-    image_height = int(crop_height / args.scale_factor)
-    image_width = int(crop_width / args.scale_factor)
+    image_height = args.image_height
+    image_width = args.image_width
     clip_depth = args.clip_depth
     
     yolo_model = YoloModel('onsd')
@@ -71,7 +74,7 @@ if __name__ == "__main__":
         out_f.write(str(args))
 
 #     splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width, clip_depth), mode='all_train')
-    splits, dataset = load_onsd_videos(args.batch_size, input_size=(image_height, image_width), crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1)
+    splits, dataset = load_onsd_videos(args.batch_size, crop_size=(crop_height, crop_width), yolo_model=yolo_model, mode='all_train', n_splits=1)
     train_idx, test_idx = list(splits)[0]
     
     train_loader = copy.deepcopy(dataset)
@@ -108,6 +111,15 @@ if __name__ == "__main__":
         filter_optimizer = tf.keras.optimizers.SGD(learning_rate=learning_rate)
     else:
         filter_optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
+        
+    crop_amount = (crop_width - image_width)
+    assert crop_amount % 2 == 0
+    crop_amount = crop_amount // 2
+        
+    data_augmentation = keras.Sequential([
+        keras.layers.RandomTranslation(0, 0.08),
+        keras.layers.Cropping2D((0, crop_amount))
+    ])
 
     loss_log = []
     best_so_far = float('inf')
@@ -118,12 +130,16 @@ if __name__ == "__main__":
         epoch_start = time.perf_counter()
         
         num_iters = 0
+        
+        average_activations = []
 
         for images, labels, width in tqdm(train_tf.shuffle(len(train_tf)).batch(args.batch_size)):
-            images = tf.expand_dims(tf.transpose(images, [0, 2, 3, 1]), axis=1)
+            images = tf.expand_dims(data_augmentation(tf.transpose(images, [0, 2, 3, 1])), axis=1)
                 
             activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.trainable_weights[0], axis=0))]))
             
+            average_activations.append(float(tf.math.count_nonzero(activations)) / float(tf.math.reduce_prod(tf.shape(activations))))
+            
             with tf.GradientTape() as tape:
                 recon = recon_model(activations)
                 loss = sparse_loss(images, recon, activations, images.shape[0], args.lam, args.stride)
@@ -159,7 +175,9 @@ if __name__ == "__main__":
             best_so_far = epoch_loss
 
         loss_log.append(epoch_loss)
-        print('epoch={}, epoch_loss={:.2f}, time={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start))
+        
+        sparsity = np.average(np.array(average_activations))
+        print('epoch={}, epoch_loss={:.2f}, time={:.2f}, average sparsity={:.2f}'.format(epoch, epoch_loss, epoch_end - epoch_start, sparsity))
 
     plt.plot(loss_log)
 
diff --git a/sparse_coding_torch/onsd/video_loader.py b/sparse_coding_torch/onsd/video_loader.py
index 877fe29..74d5d97 100644
--- a/sparse_coding_torch/onsd/video_loader.py
+++ b/sparse_coding_torch/onsd/video_loader.py
@@ -183,7 +183,7 @@ class ONSDGoodFramesLoader:
                     
                     for start_range, end_range in ranges:
                         for j in range(start_range, end_range, 5):
-                            if j == vc.size(1):
+                            if j >= vc.size(1):
                                 break
                             frame = vc[:, j, :, :]
 
@@ -192,7 +192,7 @@ class ONSDGoodFramesLoader:
                             width_key = txt_label + '/' + width_key
                             width_key = width_key + '/' + str(j) + '.png'
                             if width_key not in onsd_widths:
-                                width = 0
+                                continue
                             else:
                                 width = onsd_widths[width_key]
 
@@ -213,7 +213,7 @@ class ONSDGoodFramesLoader:
                             elif label == 'Negatives':
                                 label = np.array(0.0)
 
-                            width = np.round(width / 30)
+#                             width = np.round(width / 30)
 
                             for frm in all_frames:
                                 self.clips.append((label, frm.numpy(), self.videos[vid_idx][2], width))
@@ -264,7 +264,7 @@ class ONSDGoodFramesLoader:
         return [frame for _, frame, _, _ in self.clips]
     
     def get_widths(self):
-        return [width / self.max_width for _, _, _, width in self.clips]
+        return [width for _, _, _, width in self.clips]
     
     def __next__(self):
         if self.count < len(self.clips):
-- 
GitLab