diff --git a/setup.cfg b/setup.cfg index 80007652319751997e8eb94212c7615763321224..5cf9a85a7c6c830d70d0c6528d0cf2f68726e80a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -2,7 +2,7 @@ name = sparse_coding_torch author = Christopher J. MacLellan author-email = christopher.maclellan@drexel.edu -summary = A library for doing sparse coding using PyTorch +summary = A library for doing sparse coding using Keras description-file = README.rst description-content-type = text/x-rst; charset=UTF-8 home-page = https://gitlab.cci.drexel.edu/teachable-ai-lab/sparse_coding_torch diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..f7c9db6eb67d60fc2be839926eb713a9d5fbeae0 --- /dev/null +++ b/setup.py @@ -0,0 +1,6 @@ +from setuptools import setup + +setup( + setup_requires=['pbr'], + pbr=True +) \ No newline at end of file diff --git a/keras/generate_tflite.py b/sparse_coding_torch/generate_tflite.py similarity index 93% rename from keras/generate_tflite.py rename to sparse_coding_torch/generate_tflite.py index ded7a3ec005f5e7bc6843c24e6894f9db615247b..53a69a09016f52b46fd1618b65252c25ddc0ab2e 100644 --- a/keras/generate_tflite.py +++ b/sparse_coding_torch/generate_tflite.py @@ -6,8 +6,8 @@ import cv2 import torchvision as tv import torch import torch.nn as nn -from video_loader import VideoGrayScaler, MinMaxScaler -from keras_model import MobileModel +from sparse_coding_torch.video_loader import VideoGrayScaler, MinMaxScaler +from sparse_coding_torch.keras_model import MobileModel inputs = keras.Input(shape=(5, 100, 200, 3)) diff --git a/keras/keras_model.py b/sparse_coding_torch/keras_model.py similarity index 99% rename from keras/keras_model.py rename to sparse_coding_torch/keras_model.py index 6244314f2737e80f93044e31b37d3911f76d817b..d932cfe612800529a7d19b460832399b09b84bcc 100644 --- a/keras/keras_model.py +++ b/sparse_coding_torch/keras_model.py @@ -6,7 +6,7 @@ import cv2 import torchvision as tv import torch import torch.nn as nn -from video_loader import VideoGrayScaler, MinMaxScaler +from sparse_coding_torch.video_loader import VideoGrayScaler, MinMaxScaler def load_pytorch_weights(file_path): pytorch_checkpoint = torch.load(file_path, map_location='cpu') diff --git a/keras/load_data.py b/sparse_coding_torch/load_data.py similarity index 89% rename from keras/load_data.py rename to sparse_coding_torch/load_data.py index c9206f0b5d4629f978cbe08264cb103f49ded44c..c13c71ac563fec912b2d5e1864b0cbbd90d43ba5 100644 --- a/keras/load_data.py +++ b/sparse_coding_torch/load_data.py @@ -2,9 +2,9 @@ import numpy as np import torchvision import torch from sklearn.model_selection import train_test_split -from video_loader import MinMaxScaler -from video_loader import YoloClipLoader, get_video_participants, PNBLoader -from video_loader import VideoGrayScaler +from sparse_coding_torch.video_loader import MinMaxScaler +from sparse_coding_torch.video_loader import YoloClipLoader, get_video_participants, PNBLoader +from sparse_coding_torch.video_loader import VideoGrayScaler import csv from sklearn.model_selection import train_test_split, GroupShuffleSplit, LeaveOneGroupOut, LeaveOneOut, StratifiedGroupKFold, StratifiedKFold, KFold @@ -54,7 +54,7 @@ def load_yolo_clips(batch_size, mode, num_clips=1, num_positives=100, device=Non return None -def load_pnb_videos(batch_size, mode, device=None, n_splits=None, sparse_model=None): +def load_pnb_videos(batch_size, mode, classify_mode=False, device=None, n_splits=None, sparse_model=None): video_path = "/shared_data/bamc_pnb_data/full_training_data" transforms = torchvision.transforms.Compose( @@ -70,7 +70,7 @@ def load_pnb_videos(batch_size, mode, device=None, n_splits=None, sparse_model=N torchvision.transforms.RandomAffine(degrees=0, translate=(0.05, 0)) # torchvision.transforms.CenterCrop((100, 200)) ]) - dataset = PNBLoader(video_path, num_frames=5, frame_rate=20, transform=transforms) + dataset = PNBLoader(video_path, classify_mode, num_frames=5, frame_rate=20, transform=transforms, augmentation=augment_transforms) targets = dataset.get_labels() diff --git a/keras/train_classifier.py b/sparse_coding_torch/train_classifier.py similarity index 79% rename from keras/train_classifier.py rename to sparse_coding_torch/train_classifier.py index 028c13f1669969bb71c9a2a8cb2de2549659c91c..1c6d15a03401ecff9aa89574d9cc7ab0b6ee5cdf 100644 --- a/keras/train_classifier.py +++ b/sparse_coding_torch/train_classifier.py @@ -4,8 +4,8 @@ import torch.nn.functional as F from tqdm import tqdm import argparse import os -from load_data import load_yolo_clips, load_pnb_videos -from keras_model import SparseCode, Classifier, ReconSparse +from sparse_coding_torch.load_data import load_yolo_clips, load_pnb_videos +from sparse_coding_torch.keras_model import SparseCode, Classifier, ReconSparse import time import numpy as np from sklearn.metrics import f1_score, accuracy_score, confusion_matrix @@ -64,8 +64,13 @@ if __name__ == "__main__": if args.sparse_checkpoint: recon_model = keras.models.load_model(args.sparse_checkpoint) - splits, dataset = load_pnb_videos(args.batch_size, mode='k_fold', device=None, n_splits=args.n_splits, sparse_model=None) + splits, dataset = load_pnb_videos(args.batch_size, classify_mode=True, mode='k_fold', device=None, n_splits=args.n_splits, sparse_model=None) i_fold = 0 + + overall_true = [] + overall_pred = [] + fn_ids = [] + fp_ids = [] for train_idx, test_idx in splits: @@ -87,11 +92,6 @@ if __name__ == "__main__": classifier_model = keras.Model(inputs=classifier_inputs, outputs=classifier_outputs) - overall_true = [] - overall_pred = [] - fn_ids = [] - fp_ids = [] - best_so_far = float('inf') criterion = keras.losses.BinaryCrossentropy(from_logits=False) @@ -224,15 +224,17 @@ if __name__ == "__main__": epoch_loss += loss * local_batch.size(0) for i, v_f in enumerate(vid_f): - if v_f not in pred_dict: - pred_dict[v_f] = tf.math.round(tf.math.sigmoid(pred[i])) - else: - pred_dict[v_f] = tf.concat((pred_dict[v_f], tf.math.round(tf.math.sigmoid(pred[i]))), axis=0) - - if v_f not in gt_dict: - gt_dict[v_f] = tf.constant(torch_labels[i]) - else: - gt_dict[v_f] = tf.concat((gt_dict[v_f], torch_labels[i]), axis=0) + final_pred = tf.math.round(pred[i])[0] + gt = torch_labels[i] + + overall_true.append(gt) + overall_pred.append(final_pred) + + if final_pred != gt: + if final_pred == 0: + fn_ids.append(v_f) + else: + fp_ids.append(v_f) if y_true is None: y_true = torch_labels @@ -243,55 +245,6 @@ if __name__ == "__main__": t2 = time.perf_counter() - vid_acc = [] - for k in pred_dict.keys(): - print(k) - print(pred_dict[k]) - print(gt_dict[k]) - gt_mode = torch.mode(torch.tensor(gt_dict[k]))[0].item() - perm = torch.randperm(torch.tensor(pred_dict[k]).size(0)) - cutoff = int(torch.tensor(pred_dict[k]).size(0)/4) - if cutoff < 3: - cutoff = 3 - idx = perm[:cutoff] - samples = pred_dict[k][idx] - pred_mode = torch.mode(torch.tensor(samples))[0].item() - overall_true.append(gt_mode) - overall_pred.append(pred_mode) - if pred_mode == gt_mode: - vid_acc.append(1) - else: - vid_acc.append(0) - if pred_mode == 0: - fn_ids.append(k) - else: - fp_ids.append(k) - - vid_acc = np.array(vid_acc) - - print('----------------------------------------------------------------------------') - for k in pred_dict.keys(): - print(k) - print('Predictions:') - print(pred_dict[k]) - print('Ground Truth:') - print(gt_dict[k]) - print('Overall Prediction:') - # pred_mode = 1 - # contiguous_zeros = 0 - # best_num = 0 - # for val in pred_dict[k]: - # if val.item() == 0: - # contiguous_zeros += 1 - # else: - # if contiguous_zeros > best_num: - # best_num = contiguous_zeros - # contiguous_zeros = 0 - # if best_num >= 4 or contiguous_zeros >= 4: - # pred_mode = 0 - print(torch.mode(pred_dict[k])[0].item()) - print('----------------------------------------------------------------------------') - print('fold={}, loss={:.2f}, time={:.2f}'.format(i_fold, loss, t2-t1)) y_true = tf.cast(y_true, tf.int32) @@ -299,9 +252,8 @@ if __name__ == "__main__": f1 = f1_score(y_true, y_pred, average='macro') accuracy = accuracy_score(y_true, y_pred) - all_errors.append(np.sum(vid_acc) / len(vid_acc)) - print("Test f1={:.2f}, clip_acc={:.2f}, vid_acc={:.2f} fold={}".format(f1, accuracy, np.sum(vid_acc) / len(vid_acc), i_fold)) + print("Test f1={:.2f}, clip_acc={:.2f}, fold={}".format(f1, accuracy, i_fold)) print(confusion_matrix(y_true, y_pred)) diff --git a/keras/train_sparse_model.py b/sparse_coding_torch/train_sparse_model.py similarity index 96% rename from keras/train_sparse_model.py rename to sparse_coding_torch/train_sparse_model.py index 9f9ec53a2c62b25827805bf7d9ff9a88f071e813..76226db631272bc98e9b13aa1c04663c40522e5b 100644 --- a/keras/train_sparse_model.py +++ b/sparse_coding_torch/train_sparse_model.py @@ -7,10 +7,10 @@ from matplotlib.animation import FuncAnimation from tqdm import tqdm import argparse import os -from load_data import load_yolo_clips, load_pnb_videos +from sparse_coding_torch.load_data import load_yolo_clips, load_pnb_videos import tensorflow.keras as keras import tensorflow as tf -from keras_model import normalize_weights_3d, normalize_weights, SparseCode, load_pytorch_weights, ReconSparse +from sparse_coding_torch.keras_model import normalize_weights_3d, normalize_weights, SparseCode, load_pytorch_weights, ReconSparse import random def plot_video(video): @@ -134,7 +134,7 @@ if __name__ == "__main__": out_f.write(str(args)) if args.dataset == 'pnb': - train_loader, _ = load_pnb_videos(args.batch_size, mode='all_train', device=device, n_splits=1, sparse_model=None) + train_loader, _ = load_pnb_videos(args.batch_size, classify_mode=False, mode='all_train', device=device, n_splits=1, sparse_model=None) elif args.dataset == 'ptx': train_loader, _ = load_yolo_clips(args.batch_size, num_clips=1, num_positives=15, mode='all_train', device=device, n_splits=1, sparse_model=None, whole_video=False, positive_videos='../positive_videos.json') else: diff --git a/keras/video_loader.py b/sparse_coding_torch/video_loader.py similarity index 92% rename from keras/video_loader.py rename to sparse_coding_torch/video_loader.py index 3e9b2541a13c5bf38f8f9b6612c7ba130b1d4a87..96ff478fa5320f5d62b1e0b9bf3e6554eaa57fdf 100644 --- a/keras/video_loader.py +++ b/sparse_coding_torch/video_loader.py @@ -61,19 +61,15 @@ class VideoGrayScaler(nn.Module): class PNBLoader(Dataset): - def __init__(self, video_path, num_frames=5, frame_rate=20, frames_between_clips=None, transform=None): + def __init__(self, video_path, classify_mode=False, num_frames=5, frame_rate=20, frames_between_clips=None, transform=None, augmentation=None): self.transform = transform + self.augmentation = augmentation self.labels = [name for name in listdir(video_path) if isdir(join(video_path, name))] self.videos = [] for label in self.labels: self.videos.extend([(label, abspath(join(video_path, label, f)), f) for f in glob.glob(join(video_path, label, '*', '*.mp4'))]) - #for v in self.videos: - # video, _, info = read_video(v[1]) - # print(video.shape) - # print(info) - if not frames_between_clips: frames_between_clips = num_frames @@ -82,19 +78,25 @@ class PNBLoader(Dataset): self.video_idx = [] vid_idx = 0 - for _, path, _ in self.videos: + for _, path, _ in tqdm(self.videos): vc = tv.io.read_video(path)[0].permute(3, 0, 1, 2) -# for j in range(vc.size(1), vc.size(1) - 10, -5): - for j in range(0, vc.size(1) - 5, 5): -# if j-5 < 0: -# continue -# vc_sub = vc_1 = vc[:, j-5:j, :, :] - vc_sub = vc[:, j:j+5, :, :] + if classify_mode: + if vc.size(1) < 5: + continue + vc_sub = vc[:, -5:, :, :] if self.transform: vc_sub = self.transform(vc_sub) self.clips.append((self.videos[vid_idx][0], vc_sub, self.videos[vid_idx][2])) self.video_idx.append(vid_idx) + else: + for j in range(0, vc.size(1) - 5, 5): + vc_sub = vc[:, j:j+5, :, :] + if self.transform: + vc_sub = self.transform(vc_sub) + + self.clips.append((self.videos[vid_idx][0], vc_sub, self.videos[vid_idx][2])) + self.video_idx.append(vid_idx) vid_idx += 1 def get_filenames(self): @@ -107,7 +109,12 @@ class PNBLoader(Dataset): return [self.clips[i][0] for i in range(len(self.clips))] def __getitem__(self, index): - return self.clips[index] + label, clip, vid_f = self.clips[index] + if self.augmentation: + clip = clip.swapaxes(0, 1) + clip = self.augmentation(clip) + clip = clip.swapaxes(0, 1) + return (label, clip, vid_f) def __len__(self): return len(self.clips)