diff --git a/sparse_coding_torch/onsd/train_classifier.py b/sparse_coding_torch/onsd/train_classifier.py index a879bf032944b5bec5f8a1b33a1bc907a5a888d3..210da9b885b8591a82f26b347a69707c87d3f08e 100644 --- a/sparse_coding_torch/onsd/train_classifier.py +++ b/sparse_coding_torch/onsd/train_classifier.py @@ -8,6 +8,7 @@ from sparse_coding_torch.onsd.load_data import load_onsd_videos from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d from sparse_coding_torch.onsd.classifier_model import ONSDClassifier +from sparse_coding_torch.onsd.video_loader import get_yolo_region_onsd import time import numpy as np from sklearn.metrics import f1_score, accuracy_score, confusion_matrix @@ -28,6 +29,52 @@ configproto.gpu_options.allow_growth = True sess = tf.compat.v1.Session(config=configproto) tf.compat.v1.keras.backend.set_session(sess) +def calculate_onsd_scores(input_videos, labels, yolo_model, classifier_model, transform): + all_predictions = [] + + numerical_labels = [] + for label in labels: + if label == 'Positives': + numerical_labels.append(1.0) + else: + numerical_labels.append(0.0) + + final_list = [] + fp_ids = [] + fn_ids = [] + for v_idx, f in tqdm(enumerate(input_videos)): + vc = torchvision.io.read_video(f)[0].permute(3, 0, 1, 2) + + all_preds = [] + for j in range(0, vc.size(1), 20): + + vc_sub = vc[:, j, :, :] + + frame = get_yolo_region_onsd(yolo_model, vc_sub) + + if frame is None: + continue + + frame = transform(frame).to(torch.float32).unsqueeze(3) + + pred = tf.math.round(tf.math.sigmoid(classifier_model(frame))) + + all_preds.append(pred) + + if all_preds: + final_pred = np.round(np.mean(np.array(all_preds))) + else: + final_pred = 1.0 + + if final_pred != numerical_labels[v_idx]: + if final_pred == 0: + fn_ids.append(f) + else: + fp_ids.append(f) + + final_list.append(final_pred) + + return np.array(final_list), np.array(numerical_labels), fn_ids, fp_ids if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -256,7 +303,19 @@ if __name__ == "__main__": t1 = time.perf_counter() # test_videos = [vid_f for labels, local_batch, vid_f in batch for batch in test_loader] - raise Exception('Not yet implemented') + transform = torchvision.transforms.Compose( + [torchvision.transforms.Grayscale(1), + MinMaxScaler(0, 255), + torchvision.transforms.Resize((image_height, image_width)) + ]) + + test_videos = set() + for labels, local_batch, vid_f in test_loader: + test_videos.update(vid_f) + + test_labels = [vid_f.split('/')[-3] for vid_f in test_videos] + + y_pred, y_true, fn, fp = calculate_onsd_scores(test_videos, test_labels, yolo_model, classifier_model, transform) t2 = time.perf_counter() diff --git a/sparse_coding_torch/pnb/classifier_model.py b/sparse_coding_torch/pnb/classifier_model.py index 8f363e6e623f719dc27e005f77238ae6d1daae67..8d6f11fd2200ca104f8a8915a29d4d40dcd22028 100644 --- a/sparse_coding_torch/pnb/classifier_model.py +++ b/sparse_coding_torch/pnb/classifier_model.py @@ -55,8 +55,9 @@ class PNBClassifier(keras.layers.Layer): class PNBTemporalClassifier(keras.layers.Layer): def __init__(self): super(PNBTemporalClassifier, self).__init__() - self.conv_1 = keras.layers.Conv3D(24, kernel_size=(5, 200, 50), strides=(1, 1, 10), activation='relu', padding='valid') - self.conv_2 = keras.layers.Conv1D(48, kernel_size=8, strides=4, activation='relu', padding='valid') + self.conv_1 = keras.layers.Conv3D(24, kernel_size=(1, 200, 50), strides=(1, 1, 10), activation='relu', padding='valid') + self.conv_2 = keras.layers.Conv2D(36, kernel_size=(5, 10), strides=(1, 5), activation='relu', padding='valid') + self.conv_3 = keras.layers.Conv1D(48, kernel_size=2, strides=2, activation='relu', padding='valid') self.ff_1 = keras.layers.Dense(100, activation='relu', use_bias=True) @@ -77,9 +78,10 @@ class PNBTemporalClassifier(keras.layers.Layer): # x = tf.reshape(clip, (-1, height, width, 1)) x = self.conv_1(x) - x = tf.squeeze(x, axis=1) - x = tf.squeeze(x, axis=1) + x = tf.squeeze(x, axis=2) + x = tf.reshape(x, (-1, 5, x.shape[2], x.shape[3])) x = self.conv_2(x) + x = self.conv_3(x) x = self.flatten(x) x = self.ff_1(x) diff --git a/sparse_coding_torch/pnb/train_classifier.py b/sparse_coding_torch/pnb/train_classifier.py index 556bd5c991b52eda1930384f61a24454860ff420..4255b170aaf654972b5d15ac27ec966d8836d590 100644 --- a/sparse_coding_torch/pnb/train_classifier.py +++ b/sparse_coding_torch/pnb/train_classifier.py @@ -8,6 +8,7 @@ from sparse_coding_torch.pnb.load_data import load_pnb_videos from sparse_coding_torch.utils import SubsetWeightedRandomSampler, get_sample_weights from sparse_coding_torch.sparse_model import SparseCode, ReconSparse, normalize_weights, normalize_weights_3d from sparse_coding_torch.pnb.classifier_model import PNBClassifier, PNBTemporalClassifier +from sparse_coding_torch.pnb.video_loader import classify_nerve_is_right, get_needle_bb, get_yolo_regions import time import numpy as np from sklearn.metrics import f1_score, accuracy_score, confusion_matrix @@ -42,7 +43,7 @@ def calculate_pnb_scores(input_videos, labels, yolo_model, sparse_model, recon_m fp_ids = [] fn_ids = [] for v_idx, f in tqdm(enumerate(input_videos)): - vc = tv.io.read_video(f)[0].permute(3, 0, 1, 2) + vc = torchvision.io.read_video(f)[0].permute(3, 0, 1, 2) is_right = classify_nerve_is_right(yolo_model, vc) needle_bb = get_needle_bb(yolo_model, vc) @@ -103,7 +104,7 @@ def calculate_pnb_scores_skipped_frames(input_videos, labels, yolo_model, sparse fp_ids = [] fn_ids = [] for v_idx, f in tqdm(enumerate(input_videos)): - vc = tv.io.read_video(f)[0].permute(3, 0, 1, 2) + vc = torchvision.io.read_video(f)[0].permute(3, 0, 1, 2) is_right = classify_nerve_is_right(yolo_model, vc) needle_bb = get_needle_bb(yolo_model, vc) @@ -139,6 +140,8 @@ def calculate_pnb_scores_skipped_frames(input_videos, labels, yolo_model, sparse else: fp_ids.append(f) + print(float(pred[0])) + raise Exception final_list.append(pred) return np.array(final_list), np.array(numerical_labels), fn_ids, fp_ids @@ -407,6 +410,9 @@ if __name__ == "__main__": y_true = tf.cast(y_true, tf.int32) y_pred = tf.cast(y_pred, tf.int32) + + print(y_true) + print(y_pred) f1 = f1_score(y_true, y_pred, average='macro') accuracy = accuracy_score(y_true, y_pred) diff --git a/sparse_coding_torch/ptx/classifier_model.py b/sparse_coding_torch/ptx/classifier_model.py index fefc2fcc6c63e72d14517cec5d0fe3cefefa8769..9abe6cbb41ca5dec247822017814537241477348 100644 --- a/sparse_coding_torch/ptx/classifier_model.py +++ b/sparse_coding_torch/ptx/classifier_model.py @@ -14,8 +14,8 @@ class PTXClassifier(keras.layers.Layer): super(PTXClassifier, self).__init__() self.max_pool = keras.layers.MaxPooling2D(pool_size=4, strides=4) - self.conv_1 = keras.layers.Conv2D(24, kernel_size=8, strides=4, activation='relu', padding='valid') -# self.conv_2 = keras.layers.Conv2D(24, kernel_size=4, strides=2, activation='relu', padding='valid') + self.conv_1 = keras.layers.Conv2D(48, kernel_size=8, strides=4, activation='relu', padding='valid') + self.conv_2 = keras.layers.Conv2D(24, kernel_size=4, strides=2, activation='relu', padding='valid') self.flatten = keras.layers.Flatten() @@ -44,21 +44,29 @@ class PTXClassifier(keras.layers.Layer): return x -class BaselinePTX(keras.layers.Layer): +class Sampling(keras.layers.Layer): + """Uses (z_mean, z_log_var) to sample z, the vector encoding a digit.""" + + def call(self, inputs): + z_mean, z_log_var = inputs + batch = tf.shape(z_mean)[0] + dim = tf.shape(z_mean)[1] + epsilon = tf.keras.backend.random_normal(shape=(batch, dim)) + return z_mean + tf.exp(0.5 * z_log_var) * epsilon + +class VAEEncoderPTX(keras.layers.Layer): def __init__(self): - super(BaselinePTX, self).__init__() + super(VAEEncoderPTX, self).__init__() self.conv_1 = keras.layers.Conv3D(64, kernel_size=(5, 8, 8), strides=(1, 4, 4), activation='relu', padding='valid') self.conv_2 = keras.layers.Conv2D(24, kernel_size=4, strides=2, activation='relu', padding='valid') self.flatten = keras.layers.Flatten() - self.dropout = keras.layers.Dropout(0.5) - -# self.ff_1 = keras.layers.Dense(1000, activation='relu', use_bias=True) -# self.ff_2 = keras.layers.Dense(500, activation='relu', use_bias=True) - self.ff_3 = keras.layers.Dense(20, activation='relu', use_bias=True) - self.ff_4 = keras.layers.Dense(1) + self.ff_mean = keras.layers.Dense(100, activation='relu', use_bias=True) + self.ff_var = keras.layers.Dense(100, activation='relu', use_bias=True) + + self.sample = Sampling() # @tf.function def call(self, images): @@ -66,24 +74,43 @@ class BaselinePTX(keras.layers.Layer): x = tf.squeeze(x, axis=1) x = self.conv_2(x) x = self.flatten(x) -# x = self.ff_1(x) -# x = self.dropout(x) -# x = self.ff_2(x) -# x = self.dropout(x) - x = self.ff_3(x) - x = self.dropout(x) - x = self.ff_4(x) + z_mean = self.ff_mean(x) + z_var = self.ff_var(x) + z = self.sample([z_mean, z_var]) + return z + +class VAEDecoderPTX(keras.layers.Layer): + def __init__(self): + super(VAEDecoderPTX, self).__init__() - return x + self.conv_1 = keras.layers.Conv3D(64, kernel_size=(5, 8, 8), strides=(1, 4, 4), activation='relu', padding='valid') + self.conv_2 = keras.layers.Conv2D(24, kernel_size=4, strides=2, activation='relu', padding='valid') + + self.flatten = keras.layers.Flatten() + + self.ff_mean = keras.layers.Dense(100, activation='relu', use_bias=True) + self.ff_var = keras.layers.Dense(100, activation='relu', use_bias=True) + + self.sample = Sampling() + +# @tf.function + def call(self, images): + x = self.conv_1(images) + x = tf.squeeze(x, axis=1) + x = self.conv_2(x) + x = self.flatten(x) + z_mean = self.ff_mean(x) + z_var = self.ff_var(x) + z = self.sample([z_mean, z_var]) + return z class MobileModelPTX(keras.Model): - def __init__(self, sparse_checkpoint, batch_size, in_channels, out_channels, kernel_size, stride, lam, activation_lr, max_activation_iter, run_2d): + def __init__(self, sparse_weights, classifier_model, batch_size, image_height, image_width, clip_depth, out_channels, kernel_size, kernel_depth, stride, lam, activation_lr, max_activation_iter, run_2d): super().__init__() - self.sparse_code = SparseCode(batch_size, in_channels, out_channels, kernel_size, stride, lam, activation_lr, max_activation_iter, run_2d) - self.classifier = Classifier() + self.sparse_code = SparseCode(batch_size=batch_size, image_height=image_height, image_width=image_width, clip_depth=clip_depth, in_channels=1, out_channels=out_channels, kernel_size=kernel_size, kernel_depth=kernel_depth, stride=stride, lam=lam, activation_lr=activation_lr, max_activation_iter=max_activation_iter, run_2d=run_2d, padding='VALID') + self.classifier = classifier_model self.out_channels = out_channels - self.in_channels = in_channels self.stride = stride self.lam = lam self.activation_lr = activation_lr @@ -91,21 +118,19 @@ class MobileModelPTX(keras.Model): self.batch_size = batch_size self.run_2d = run_2d - pytorch_weights = load_pytorch_weights(sparse_checkpoint) - if run_2d: - weight_list = np.split(pytorch_weights, 5, axis=0) + weight_list = np.split(sparse_weights, 5, axis=0) self.filters_1 = tf.Variable(initial_value=weight_list[0].squeeze(0), dtype='float32', trainable=False) self.filters_2 = tf.Variable(initial_value=weight_list[1].squeeze(0), dtype='float32', trainable=False) self.filters_3 = tf.Variable(initial_value=weight_list[2].squeeze(0), dtype='float32', trainable=False) self.filters_4 = tf.Variable(initial_value=weight_list[3].squeeze(0), dtype='float32', trainable=False) self.filters_5 = tf.Variable(initial_value=weight_list[4].squeeze(0), dtype='float32', trainable=False) else: - self.filters = tf.Variable(initial_value=pytorch_weights, dtype='float32', trainable=False) + self.filters = tf.Variable(initial_value=sparse_weights, dtype='float32', trainable=False) @tf.function def call(self, images): - images = tf.squeeze(tf.image.rgb_to_grayscale(images), axis=-1) +# images = tf.squeeze(tf.image.rgb_to_grayscale(images), axis=-1) images = tf.transpose(images, perm=[0, 2, 3, 1]) images = images / 255 images = (images - 0.2592) / 0.1251 @@ -115,6 +140,6 @@ class MobileModelPTX(keras.Model): else: activations = self.sparse_code(images, tf.stop_gradient(self.filters)) - pred = self.classifier(activations) + pred = tf.math.sigmoid(self.classifier(tf.expand_dims(activations, axis=1))) return pred \ No newline at end of file diff --git a/sparse_coding_torch/ptx/generate_tflite.py b/sparse_coding_torch/ptx/generate_tflite.py index 5240156eb831b845761a1a2815380dbe8ca3f215..3866a17dee6b91dd5b23241ce17cdf4a1ac63b68 100644 --- a/sparse_coding_torch/ptx/generate_tflite.py +++ b/sparse_coding_torch/ptx/generate_tflite.py @@ -12,20 +12,20 @@ import argparse if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--input_dir', default='/shared_data/bamc_pnb_data/revised_training_data', type=str) + parser.add_argument('--input_dir', default='/shared_data/bamc_ph1_test_data', type=str) parser.add_argument('--kernel_size', default=15, type=int) parser.add_argument('--kernel_depth', default=5, type=int) - parser.add_argument('--num_kernels', default=32, type=int) - parser.add_argument('--stride', default=4, type=int) + parser.add_argument('--num_kernels', default=48, type=int) + parser.add_argument('--stride', default=2, type=int) parser.add_argument('--max_activation_iter', default=150, type=int) parser.add_argument('--activation_lr', default=1e-2, type=float) parser.add_argument('--lam', default=0.05, type=float) - parser.add_argument('--sparse_checkpoint', default='sparse_coding_torch/output/sparse_pnb_32_long_train/sparse_conv3d_model-best.pt/', type=str) - parser.add_argument('--checkpoint', default='sparse_coding_torch/classifier_outputs/32_filters_no_aug_3/best_classifier.pt/', type=str) + parser.add_argument('--sparse_checkpoint', default='sparse_coding_torch/output/48_ptx/best_sparse.pt/', type=str) + parser.add_argument('--checkpoint', default='sparse_coding_torch/classifier_outputs/ptx_48_all_train_3/best_classifier_0.pt/', type=str) parser.add_argument('--run_2d', action='store_true') - parser.add_argument('--batch_size', default=1, type=int) - parser.add_argument('--image_height', type=int, default=285) - parser.add_argument('--image_width', type=int, default=400) + parser.add_argument('--batch_size', default=4, type=int) + parser.add_argument('--image_height', type=int, default=100) + parser.add_argument('--image_width', type=int, default=200) parser.add_argument('--clip_depth', type=int, default=5) args = parser.parse_args() @@ -48,7 +48,7 @@ if __name__ == "__main__": input_name = model.input_names[0] index = model.input_names.index(input_name) - model.inputs[index].set_shape([1, 5, image_height, image_width]) + model.inputs[index].set_shape([args.batch_size, 5, image_height, image_width]) converter = tf.lite.TFLiteConverter.from_keras_model(model) converter.optimizations = [tf.lite.Optimize.DEFAULT] diff --git a/sparse_coding_torch/ptx/train_classifier.py b/sparse_coding_torch/ptx/train_classifier.py index 7ed890767f4a77cb505932e4f28fb4d4e8386d38..8e3c5c59d1db86c6f0b77e42ae5a21c72d39bcca 100644 --- a/sparse_coding_torch/ptx/train_classifier.py +++ b/sparse_coding_torch/ptx/train_classifier.py @@ -18,6 +18,7 @@ from sparse_coding_torch.utils import VideoGrayScaler, MinMaxScaler from yolov4.get_bounding_boxes import YoloModel import torchvision import glob +from torchvision.datasets.video_utils import VideoClips import cv2 configproto = tf.compat.v1.ConfigProto() @@ -37,6 +38,7 @@ def calculate_ptx_scores(input_videos, labels, yolo_model, sparse_model, recon_m numerical_labels.append(0.0) final_list = [] + clip_correct = [] fp_ids = [] fn_ids = [] for v_idx, f in tqdm(enumerate(input_videos)): @@ -55,7 +57,7 @@ def calculate_ptx_scores(input_videos, labels, yolo_model, sparse_model, recon_m clip, _, _, _ = vc.get_clip(i) clip = clip.swapaxes(1, 3).swapaxes(0, 1).swapaxes(2, 3).numpy() - bounding_boxes, classes = yolo_model.get_bounding_boxes(clip[:, 2, :, :].swapaxes(0, 2).swapaxes(0, 1)) + bounding_boxes, classes, scores = yolo_model.get_bounding_boxes(clip[:, 2, :, :].swapaxes(0, 2).swapaxes(0, 1)) bounding_boxes = bounding_boxes.squeeze(0) if bounding_boxes.size == 0: continue @@ -63,12 +65,9 @@ def calculate_ptx_scores(input_videos, labels, yolo_model, sparse_model, recon_m countclips = countclips + len(bounding_boxes) widths = [(bounding_boxes[i][3] - bounding_boxes[i][1]) for i in range(len(bounding_boxes))] - - #for i in range(len(bounding_boxes)): - # widths.append(bounding_boxes[i][3] - bounding_boxes[i][1]) ind = np.argmax(np.array(widths)) - #for bb in bounding_boxes: + bb = bounding_boxes[ind] center_x = (bb[3] + bb[1]) / 2 * 1920 center_y = (bb[2] + bb[0]) / 2 * 1080 @@ -96,7 +95,7 @@ def calculate_ptx_scores(input_videos, labels, yolo_model, sparse_model, recon_m activations = tf.stop_gradient(sparse_model([images, tf.stop_gradient(tf.expand_dims(recon_model.weights[0], axis=0))])) pred = classifier_model(activations) - #print(torch.nn.Sigmoid()(pred)) + clip_predictions = tf.math.round(tf.math.sigmoid(pred)) final_pred = torch.mode(torch.tensor(clip_predictions.numpy()).view(-1))[0].item() @@ -114,7 +113,9 @@ def calculate_ptx_scores(input_videos, labels, yolo_model, sparse_model, recon_m final_list.append(final_pred) - return np.array(final_list), np.array(numerical_labels), fn_ids, fp_ids + clip_correct.extend([1 if clip_pred == numerical_labels[v_idx] else 0 for clip_pred in clip_predictions]) + + return np.array(final_list), np.array(numerical_labels), fn_ids, fp_ids, sum(clip_correct) / len(clip_correct) if __name__ == "__main__": parser = argparse.ArgumentParser() @@ -156,9 +157,9 @@ if __name__ == "__main__": batch_size = args.batch_size - random.seed(args.seed) - np.random.seed(args.seed) - torch.manual_seed(args.seed) +# random.seed(args.seed) +# np.random.seed(args.seed) +# torch.manual_seed(args.seed) output_dir = args.output_dir if not os.path.exists(output_dir): @@ -362,7 +363,7 @@ if __name__ == "__main__": test_videos = glob.glob(os.path.join(test_dir, '*', '*.*')) test_labels = [vid_f.split('/')[-2] for vid_f in test_videos] - y_pred, y_true, fn, fp = calculate_ptx_scores(test_videos, test_labels, yolo_model, sparse_model, recon_model, classifier_model, image_width, image_height, transform) + y_pred, y_true, fn, fp, clip_acc = calculate_ptx_scores(test_videos, test_labels, yolo_model, sparse_model, recon_model, classifier_model, image_width, image_height, transform) t2 = time.perf_counter() @@ -380,7 +381,7 @@ if __name__ == "__main__": overall_true.extend(y_true) overall_pred.extend(y_pred) - print("Test f1={:.2f}, vid_acc={:.2f}".format(f1, accuracy)) + print("Test f1={:.2f}, vid_acc={:.2f}, clip_acc={:.2f}".format(f1, accuracy, clip_acc)) print(confusion_matrix(y_true, y_pred)) diff --git a/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/onsd_prelim.names b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/onsd_prelim.names new file mode 100644 index 0000000000000000000000000000000000000000..d7c8020eb46548bdf2ca4654fb711bc5585afa18 --- /dev/null +++ b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/onsd_prelim.names @@ -0,0 +1 @@ +nerve \ No newline at end of file diff --git a/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/res215.mp4 b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/res215.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..d0e54aa517992e5545ec66a7a018029bf2b3c327 Binary files /dev/null and b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/res215.mp4 differ diff --git a/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/res239.mp4 b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/res239.mp4 new file mode 100644 index 0000000000000000000000000000000000000000..43c134027da1680bf6183ea78583641e87ad4fca Binary files /dev/null and b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/res239.mp4 differ diff --git a/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/yolov4-416.tflite b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/yolov4-416.tflite new file mode 100644 index 0000000000000000000000000000000000000000..8c32909989f178997aa2aaf7f4ba6cb2fafeda64 Binary files /dev/null and b/yolov4/Pleural_Line_TensorFlow/onsd_prelim_yolo/yolov4-416.tflite differ