merge conflicts resolved

5fa412af · hannandarryl · 90a6f48a · aca5c0b7 · 5fa412af · 5fa412af
Commit 5fa412af authored 3 years ago by hannandarryl
--- a/clips_to_test_swift/6.mp4
+++ b/clips_to_test_swift/6.mp4
--- a/keras/generate_tflite.py
+++ b/keras/generate_tflite.py
@@ -8,49 +8,56 @@ import torch
 import torch.nn as nn
 from sparse_coding_torch.video_loader import VideoGrayScaler, MinMaxScaler
 from sparse_coding_torch.conv_sparse_model import ConvSparseLayer
+from sparse_coding_torch.small_data_classifier import SmallDataClassifierConv3d
 from keras_model import MobileModel

-inputs = keras.Input(shape=(100, 200, 5))
+inputs = keras.Input(shape=(5, 100, 200, 3))

-outputs = MobileModel(sparse_checkpoint='../sparse.pt', batch_size=1, in_channels=1, out_channels=64, kernel_size=15, stride=1, lam=0.05, activation_lr=1e-2, max_activation_iter=40, run_2d=True)(inputs)
+outputs = MobileModel(sparse_checkpoint='../sparse.pt', batch_size=1, in_channels=1, out_channels=64, kernel_size=15, stride=2, lam=0.05, activation_lr=1e-1, max_activation_iter=100, run_2d=True)(inputs)
+# outputs = tf.math.add(inputs, 1)

 model = keras.Model(inputs=inputs, outputs=outputs)


-pytorch_checkpoint = torch.load('../output/final_model_75_iter/model-best_fold_0.pt', map_location='cpu')['model_state_dict']
-conv_weights = [pytorch_checkpoint['module.compress_activations_conv_1.weight'].view(8, 8, 64, 24).numpy(), pytorch_checkpoint['module.compress_activations_conv_1.bias'].numpy()]
+pytorch_checkpoint = torch.load('../stride_2_100_iter.pt', map_location='cpu')['model_state_dict']
+conv_weights = [pytorch_checkpoint['module.compress_activations_conv_1.weight'].squeeze(2).swapaxes(0, 2).swapaxes(1, 3).swapaxes(2, 3).numpy(), pytorch_checkpoint['module.compress_activations_conv_1.bias'].numpy()]
 model.get_layer('mobile_model').classifier.conv.set_weights(conv_weights)
-
-ff_1_weights = [pytorch_checkpoint['module.fc1.weight'].permute(1,0).numpy(), pytorch_checkpoint['module.fc1.bias'].numpy()]
-model.get_layer('mobile_model').classifier.ff_1.set_weights(ff_1_weights)
-ff_2_weights = [pytorch_checkpoint['module.fc2.weight'].permute(1,0).numpy(), pytorch_checkpoint['module.fc2.bias'].numpy()]
-model.get_layer('mobile_model').classifier.ff_2.set_weights(ff_2_weights)
-ff_3_weights = [pytorch_checkpoint['module.fc3.weight'].permute(1,0).numpy(), pytorch_checkpoint['module.fc3.bias'].numpy()]
+# # ff_1_weights = [pytorch_checkpoint['module.fc1.weight'].swapaxes(1,0).numpy(), pytorch_checkpoint['module.fc1.bias'].numpy()]
+# # model.get_layer('mobile_model').classifier.ff_1.set_weights(ff_1_weights)
+# # ff_2_weights = [pytorch_checkpoint['module.fc2.weight'].swapaxes(1,0).numpy(), pytorch_checkpoint['module.fc2.bias'].numpy()]
+# # model.get_layer('mobile_model').classifier.ff_2.set_weights(ff_2_weights)
+ff_3_weights = [pytorch_checkpoint['module.fc3.weight'].swapaxes(1,0).numpy(), pytorch_checkpoint['module.fc3.bias'].numpy()]
 model.get_layer('mobile_model').classifier.ff_3.set_weights(ff_3_weights)
-ff_4_weights = [pytorch_checkpoint['module.fc4.weight'].permute(1,0).numpy(), pytorch_checkpoint['module.fc4.bias'].numpy()]
+ff_4_weights = [pytorch_checkpoint['module.fc4.weight'].swapaxes(1,0).numpy(), pytorch_checkpoint['module.fc4.bias'].numpy()]
 model.get_layer('mobile_model').classifier.ff_4.set_weights(ff_4_weights)

 # frozen_sparse = ConvSparseLayer(in_channels=1,
 #                                out_channels=64,
 #                                kernel_size=(5, 15, 15),
-#                                stride=1,
-#                                padding=(0, 7, 7),
+#                                stride=2,
+#                                padding=0,
 #                                convo_dim=3,
 #                                rectifier=True,
 #                                lam=0.05,
-#                                max_activation_iter=10,
-#                                activation_lr=1e-2)
-
+#                                max_activation_iter=100,
+#                                activation_lr=1e-1)
+#
 # sparse_param = torch.load('../sparse.pt', map_location='cpu')
 # frozen_sparse.load_state_dict(sparse_param['model_state_dict'])
 #
-# # pytorch_filter = frozen_sparse.filters[30, :, 0, :, :].squeeze(0).unsqueeze(2).detach().numpy()
-# # keras_filter = model.get_layer('sparse_code').filter[0,:,:,:,30].numpy()
-# #
-# # cv2.imwrite('pytorch_filter.png', pytorch_filter / np.max(pytorch_filter) * 255.)
-# # cv2.imwrite('keras_filter.png', keras_filter / np.max(keras_filter) * 255.)
-# # raise Exception
+# predictive_model = SmallDataClassifierConv3d()
+# classifier_param = {k.replace('module.', ''): v for k,v in torch.load('../stride_2_100_iter.pt', map_location='cpu')['model_state_dict'].items()}
+# predictive_model.load_state_dict(classifier_param)
 #
+# predictive_model.eval()
+# #
+# # # pytorch_filter = frozen_sparse.filters[30, :, 0, :, :].squeeze(0).unsqueeze(2).detach().numpy()
+# # # keras_filter = model.get_layer('sparse_code').filter[0,:,:,:,30].numpy()
+# # #
+# # # cv2.imwrite('pytorch_filter.png', pytorch_filter / np.max(pytorch_filter) * 255.)
+# # # cv2.imwrite('keras_filter.png', keras_filter / np.max(keras_filter) * 255.)
+# # # raise Exception
+# #
 # img = tv.io.read_video('../clips/No_Sliding/Image_262499828648_clean1050.mp4')[0].permute(3, 0, 1, 2)
 # transform = tv.transforms.Compose(
 # [VideoGrayScaler(),
@@ -59,12 +66,13 @@ model.get_layer('mobile_model').classifier.ff_4.set_weights(ff_4_weights)
 #  tv.transforms.CenterCrop((100, 200))
 # ])
 # img = transform(img)
-
+#
 # with torch.no_grad():
-#     activations = frozen_sparse(img.unsqueeze(0))
-
+#     activations, _ = predictive_model(frozen_sparse(img.unsqueeze(0)).squeeze(2))
+#     activations = torch.nn.Sigmoid()(activations)
+#
 # output = model(img.swapaxes(1, 3).swapaxes(1,2).numpy())
-
+#
 # print(activations.size())
 # print(output.shape)
 # print(torch.sum(activations))
@@ -72,7 +80,7 @@ model.get_layer('mobile_model').classifier.ff_4.set_weights(ff_4_weights)

 input_name = model.input_names[0]
 index = model.input_names.index(input_name)
-model.inputs[index].set_shape([1, 100, 200, 5])
+model.inputs[index].set_shape([1, 5, 100, 200, 3])

 converter = tf.lite.TFLiteConverter.from_keras_model(model)
 # converter.experimental_new_converter = True

--- a/keras/keras_model.py
+++ b/keras/keras_model.py
@@ -40,7 +40,7 @@ def conv_error(filters, e, stride):

    return g

-# @tf.function
+@tf.function
 def conv_error_3d(filters, e, stride):
 #     e = tf.pad(e, paddings=[[0,0], [0, 0], [7, 7], [7, 7], [0, 0]])
    g = tf.nn.conv3d(e, filters, strides=[1, 1, stride, stride, 1], padding='VALID')
@@ -59,7 +59,7 @@ def normalize_weights(filters, out_channels):

    return adjusted

-# @tf.function
+@tf.function
 def normalize_weights_3d(filters, out_channels):
    #for f in filters:
    #    print('filters 3d shape', f.shape)
@@ -82,6 +82,7 @@ class SparseCode(keras.layers.Layer):

        self.out_channels = out_channels
        self.in_channels = in_channels
+        self.kernel_size = kernel_size
        self.stride = stride
        self.lam = lam
        self.activation_lr = activation_lr
@@ -125,14 +126,10 @@ class SparseCode(keras.layers.Layer):
        mh = m / (1 - tf.math.pow(b1, (1+i)))
        
        vh = v / (1 - tf.math.pow(b2, (1+i)))
-        
        du = self.activation_lr * mh / (tf.math.sqrt(vh) + eps)
        
        u += du
        
-#         i += 1
-
-#         return images, filters, u, m, v, b1, b2, eps, i
        return u, m, v

 #     @tf.function
@@ -146,16 +143,11 @@ class SparseCode(keras.layers.Layer):
        u = tf.zeros(shape=output_shape)
        m = tf.zeros(shape=output_shape)
        v = tf.zeros(shape=output_shape)
-        
 #         tf.print('activations before:', tf.reduce_sum(u))

        b1 = tf.constant(0.9, dtype='float32')
        b2 = tf.constant(0.99, dtype='float32')
        eps = tf.constant(1e-8, dtype='float32')
-        
-#         print(u)
-        
-        
 #         i = tf.constant(0, dtype='float32')
 #         c = lambda images, filters, u, m, v, b1, b2, eps, i: tf.less(i, self.max_activation_iter)
 #         images, filters, u, m, v, b1, b2, eps, i = tf.while_loop(c, self.do_update, [images, filters, u, m, v, b1, b2, eps, i])
@@ -213,14 +205,14 @@ class Classifier(keras.layers.Layer):
        super(Classifier, self).__init__()

        self.max_pool = keras.layers.MaxPooling2D(pool_size=4, strides=4)
-        self.conv = keras.layers.Conv2D(24, kernel_size=8, strides=4, activation='relu', padding='SAME')
+        self.conv = keras.layers.Conv2D(24, kernel_size=8, strides=4, activation='relu', padding='valid')

        self.flatten = keras.layers.Flatten()

        self.dropout = keras.layers.Dropout(0.5)

-        self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
-        self.ff_2 = keras.layers.Dense(100, activation='relu', use_bias=True)
+        # self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True)
+        # self.ff_2 = keras.layers.Dense(100, activation='relu', use_bias=True)
        self.ff_3 = keras.layers.Dense(20, activation='relu', use_bias=True)
        self.ff_4 = keras.layers.Dense(1, activation='sigmoid')

@@ -230,10 +222,10 @@ class Classifier(keras.layers.Layer):
        x = self.max_pool(activations)
        x = self.conv(x)
        x = self.flatten(x)
-        x = self.ff_1(x)
-        x = self.dropout(x)
-        x = self.ff_2(x)
-        x = self.dropout(x)
+        # # x = self.ff_1(x)
+        # # x = self.dropout(x)
+        # # x = self.ff_2(x)
+        # # x = self.dropout(x)
        x = self.ff_3(x)
        x = self.dropout(x)
        x = self.ff_4(x)
@@ -269,6 +261,11 @@ class MobileModel(keras.Model):

    @tf.function
    def call(self, images):
+        images = tf.squeeze(tf.image.rgb_to_grayscale(images), axis=-1)
+        images = tf.transpose(images, perm=[0, 2, 3, 1])
+        images = images / 255
+        images = (images - 0.2592) / 0.1251
+
        if self.run_2d:
            activations = self.sparse_code(images, [tf.stop_gradient(self.filters_1), tf.stop_gradient(self.filters_2), tf.stop_gradient(self.filters_3), tf.stop_gradient(self.filters_4), tf.stop_gradient(self.filters_5)])
        else:

--- a/run_tflite.py
+++ b/run_tflite.py
+import torch
+import os
+import time
+import numpy as np
+import torchvision
+from sparse_coding_torch.video_loader import VideoGrayScaler, MinMaxScaler
+from torchvision.datasets.video_utils import VideoClips
+import csv
+from datetime import datetime
+from yolov4.get_bounding_boxes import YoloModel
+import argparse
+import tensorflow as tf
+import scipy.stats
+import cv2
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Process some integers.')
+    parser.add_argument('--fast', action='store_true',
+                    help='optimized for runtime')
+    parser.add_argument('--accurate', action='store_true',
+                    help='optimized for accuracy')
+    parser.add_argument('--verbose', action='store_true',
+                    help='output verbose')
+    args = parser.parse_args()
+    #print(args.accumulate(args.integers))
+    device = 'cpu'
+    batch_size = 1
+
+    interpreter = tf.lite.Interpreter("keras/mobile_output/tf_lite_model.tflite")
+    interpreter.allocate_tensors()
+
+    input_details = interpreter.get_input_details()
+    output_details = interpreter.get_output_details()
+
+    yolo_model = YoloModel()
+
+    transform = torchvision.transforms.Compose(
+    [VideoGrayScaler(),
+     # MinMaxScaler(0, 255),
+     # torchvision.transforms.Normalize((0.2592,), (0.1251,)),
+     torchvision.transforms.CenterCrop((100, 200))
+    ])
+
+    all_predictions = []
+
+    all_files = list(os.listdir('input_videos'))
+
+    for f in all_files:
+        print('Processing', f)
+        #start_time = time.time()
+
+        clipstride = 15
+        if args.fast:
+            clipstride = 20
+        if args.accurate:
+            clipstride = 10
+
+        vc = VideoClips([os.path.join('input_videos', f)],
+                        clip_length_in_frames=5,
+                        frame_rate=20,
+                       frames_between_clips=clipstride)
+
+        ### START time after loading video ###
+        start_time = time.time()
+        clip_predictions = []
+        i = 0
+        cliplist = []
+        countclips = 0
+        for i in range(vc.num_clips()):
+
+            clip, _, _, _ = vc.get_clip(i)
+            clip = clip.swapaxes(1, 3).swapaxes(0, 1).swapaxes(2, 3).numpy()
+
+            bounding_boxes = yolo_model.get_bounding_boxes(clip[:, 2, :, :].swapaxes(0, 2).swapaxes(0, 1)).squeeze(0)
+            # for bb in bounding_boxes:
+            #     print(bb[1])
+            if bounding_boxes.size == 0:
+                continue
+            #widths = []
+            countclips = countclips + len(bounding_boxes)
+
+            widths = [(bounding_boxes[i][3] - bounding_boxes[i][1]) for i in range(len(bounding_boxes))]
+
+            #for i in range(len(bounding_boxes)):
+            #    widths.append(bounding_boxes[i][3] - bounding_boxes[i][1])
+
+            ind =  np.argmax(np.array(widths))
+            #for bb in bounding_boxes:
+            bb = bounding_boxes[ind]
+            center_x = (bb[3] + bb[1]) / 2 * 1920
+            center_y = (bb[2] + bb[0]) / 2 * 1080
+
+            width=400
+            height=400
+
+            lower_y = round(center_y - height / 2)
+            upper_y = round(center_y + height / 2)
+            lower_x = round(center_x - width / 2)
+            upper_x = round(center_x + width / 2)
+
+            trimmed_clip = clip[:, :, lower_y:upper_y, lower_x:upper_x]
+
+            trimmed_clip = torch.tensor(trimmed_clip).to(torch.float)
+
+            trimmed_clip = transform(trimmed_clip)
+
+            # tensor_to_write = trimmed_clip.swapaxes(0, 1).swapaxes(1, 2).swapaxes(2, 3)
+            # tensor_to_write[0][0][0][0] = 100
+            # tensor_to_write[0][0][0][1] = 100
+            # tensor_to_write[0][0][0][2] = 100
+            # torchvision.io.write_video('clips_to_test_swift/' + str(countclips) + '.mp4', tensor_to_write, fps=20)
+            # countclips += 1
+            # trimmed_clip.pin_memory()
+            cliplist.append(trimmed_clip)
+
+        if len(cliplist) > 0:
+            with torch.no_grad():
+                for trimmed_clip in cliplist:
+                    interpreter.set_tensor(input_details[0]['index'], trimmed_clip)
+
+                    interpreter.invoke()
+
+                    output_array = np.array(interpreter.get_tensor(output_details[0]['index']))
+
+                    pred = output_array[0][0]
+                    print(pred)
+
+                    clip_predictions.append(pred.round())
+
+            if args.verbose:
+                print(clip_predictions)
+                print("num of clips: ", countclips)
+
+            final_pred = scipy.stats.mode(clip_predictions)[0][0]
+            # if len(clip_predictions) % 2 == 0 and torch.sum(clip_predictions).item() == len(clip_predictions)//2:
+            #     #print("I'm here")
+            #     final_pred = (torch.nn.Sigmoid()(pred)).mean().round().detach().cpu().to(torch.long).item()
+
+            if final_pred == 1:
+                str_pred = 'No Sliding'
+            else:
+                str_pred = 'Sliding'
+
+        else:
+            str_pred = "No Sliding"
+
+        end_time = time.time()
+
+        print(str_pred)
+
+        all_predictions.append({'FileName': f, 'Prediction': str_pred, 'TotalTimeSec': end_time - start_time})
+
+    with open('output_' + datetime.now().strftime("%Y%m%d-%H%M%S") + '.csv', 'w+', newline='') as csv_out:
+        writer = csv.DictWriter(csv_out, fieldnames=all_predictions[0].keys())
+
+        writer.writeheader()
+        writer.writerows(all_predictions)
--- a/stride_2_100_iter.pt
+++ b/stride_2_100_iter.pt