From 054bdaca2e524cedac6a0f0b9f2a71917f1533c8 Mon Sep 17 00:00:00 2001
From: Philip Monaco <philmonaco34@gmail.com>
Date: Thu, 3 Mar 2022 10:43:49 -0500
Subject: [PATCH] Add additional processing for model creation

---
 Project_Notebook.ipynb | 473 +++++++++++++++++++++++++++++++++++++----
 data_processing.py     |   6 +-
 2 files changed, 431 insertions(+), 48 deletions(-)

diff --git a/Project_Notebook.ipynb b/Project_Notebook.ipynb
index 3fc1bd6..8a90228 100644
--- a/Project_Notebook.ipynb
+++ b/Project_Notebook.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "id": "d7e56e0e-7eec-429d-940b-c3337db4b4dc",
    "metadata": {},
    "outputs": [],
@@ -17,6 +17,7 @@
     "from tqdm import tqdm\n",
     "from data_processing import load_sort_data, transform\n",
     "from EDA import find_mean_img, eigenimages, plot_pca\n",
+    "from tensorflow.keras.preprocessing import image_dataset_from_directory\n",
     "import matplotlib.pyplot as plt\n",
     "%matplotlib inline\n"
    ]
@@ -90,7 +91,41 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
+   "id": "52d44e91",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train_meta = pd.read_csv('./data/ISIC2018_Task3_Training_GroundTruth.csv')\n",
+    "test_meta = pd.read_csv('./data/ISIC2018_Task3_Validation_GroundTruth.csv')\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "b25db3d1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Process metadata and decision labelslabels\n",
+    "lab = ['MEL', 'NV', 'BCC', 'AKIEC', 'BKL', 'DF', 'VASC']\n",
+    "\n",
+    "#the original metadata was one-hot encoded so we must reverse\n",
+    "train_labels = train_meta[lab].idxmax(axis=1)\n",
+    "test_labels = test_meta[lab].idxmax(axis=1)\n",
+    "\n",
+    "#combine the reversed one hot encoded with the image file name\n",
+    "ptrainmeta = pd.concat([train_meta['image'], train_labels],keys=['image_id', 'dx'], axis=1)\n",
+    "ptestmeta = pd.concat([test_meta['image'], test_labels],keys=['image_id', 'dx'], axis=1)\n",
+    "\n",
+    "#save off .csv's that contain decision labels and image names\n",
+    "ptrainmeta.to_csv('./data/train_metadata.csv')\n",
+    "ptestmeta.to_csv('./data/test_metadata.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
    "id": "b8c4f292",
    "metadata": {},
    "outputs": [
@@ -98,35 +133,60 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1099/1099 [00:22<00:00, 48.04it/s]\n",
-      "100%|██████████| 6705/6705 [02:20<00:00, 47.65it/s]\n",
-      "100%|██████████| 115/115 [00:02<00:00, 38.72it/s]\n",
-      "100%|██████████| 1113/1113 [00:23<00:00, 48.19it/s]\n",
-      "100%|██████████| 142/142 [00:03<00:00, 46.04it/s]\n",
-      "100%|██████████| 514/514 [00:09<00:00, 51.89it/s]\n",
-      "100%|██████████| 327/327 [00:06<00:00, 54.49it/s]\n",
-      "100%|██████████| 7/7 [03:32<00:00, 30.35s/it]\n"
+      "100%|██████████| 6705/6705 [01:09<00:00, 97.14it/s]\n",
+      "100%|██████████| 1113/1113 [00:12<00:00, 92.28it/s]\n",
+      "100%|██████████| 1099/1099 [00:10<00:00, 100.47it/s]\n",
+      "100%|██████████| 115/115 [00:01<00:00, 99.05it/s]\n",
+      "100%|██████████| 327/327 [00:02<00:00, 110.73it/s]\n",
+      "100%|██████████| 514/514 [00:04<00:00, 112.84it/s]\n",
+      "100%|██████████| 142/142 [00:01<00:00, 106.61it/s]\n",
+      "100%|██████████| 7/7 [01:46<00:00, 15.18s/it]\n"
      ]
     }
    ],
    "source": [
     "# function takes 3 parameters: metadata filename, the folder of the raw images, and the desired name of the destination directory. \n",
-    "metadata, dest_dir = load_sort_data('HAM10000_metadata', 'ISIC2018_Task3_Training_Input', 'Training_Images_')"
+    "metadata, dest_dir = load_sort_data('train_metadata.csv', 'ISIC2018_Task3_Training_Input', 'training/')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 20,
+   "id": "87405f6d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 123/123 [00:01<00:00, 86.44it/s]\n",
+      "100%|██████████| 15/15 [00:00<00:00, 94.34it/s]\n",
+      "100%|██████████| 22/22 [00:00<00:00, 92.05it/s]\n",
+      "100%|██████████| 21/21 [00:00<00:00, 114.75it/s]\n",
+      "100%|██████████| 8/8 [00:00<00:00, 142.86it/s]\n",
+      "100%|██████████| 1/1 [00:00<00:00, 100.00it/s]\n",
+      "100%|██████████| 3/3 [00:00<00:00, 85.72it/s]\n",
+      "100%|██████████| 7/7 [00:02<00:00,  3.11it/s]\n"
+     ]
+    }
+   ],
+   "source": [
+    "metadata, dest_dir = load_sort_data('test_metadata.csv', 'ISIC2018_Task3_Validation_Input', 'test/')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
    "id": "7e9702c3",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'h:\\\\School\\\\Winter 2022\\\\DS Projects\\\\2018\\\\hvm-image-clf/data/Training_Images_'"
+       "'h:\\\\School\\\\Winter 2022\\\\DS Projects\\\\2018\\\\hvm-image-clf/data/training/Training_Images_'"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -148,7 +208,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 26,
    "id": "0ba9148a",
    "metadata": {},
    "outputs": [
@@ -188,7 +248,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 27,
    "id": "e6d378d5",
    "metadata": {},
    "outputs": [
@@ -220,7 +280,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "<ipython-input-5-f268d13f0828>:5: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.\n",
+      "<ipython-input-27-f268d13f0828>:5: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.\n",
       "  metadata.drop('dx',1).isna().groupby(\n"
      ]
     }
@@ -240,7 +300,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 28,
    "id": "91aa284b",
    "metadata": {},
    "outputs": [
@@ -286,7 +346,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 72,
+   "execution_count": 29,
    "id": "05398a91",
    "metadata": {},
    "outputs": [
@@ -294,7 +354,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 327/327 [00:02<00:00, 139.03it/s]\n"
+      "100%|██████████| 327/327 [00:03<00:00, 92.87it/s] \n"
      ]
     }
    ],
@@ -305,7 +365,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 73,
+   "execution_count": 21,
    "id": "e8642d8d",
    "metadata": {},
    "outputs": [
@@ -313,7 +373,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 514/514 [00:06<00:00, 76.53it/s] \n"
+      "100%|██████████| 15/15 [00:00<00:00, 93.75it/s]\n"
      ]
     }
    ],
@@ -323,7 +383,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 74,
+   "execution_count": 22,
+   "id": "2060e363",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[179., 185., 183., ..., 197., 192., 190.],\n",
+       "       [163., 167., 168., ..., 135., 128., 119.],\n",
+       "       [170., 168., 167., ..., 179., 178., 182.],\n",
+       "       ...,\n",
+       "       [198., 199., 195., ..., 175., 176., 176.],\n",
+       "       [153., 150., 153., ..., 123., 115., 106.],\n",
+       "       [179., 182., 184., ..., 178., 178., 180.]], dtype=float32)"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "bcc_images"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
    "id": "5312b5de",
    "metadata": {},
    "outputs": [
@@ -331,7 +418,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1099/1099 [00:21<00:00, 51.53it/s]\n"
+      "100%|██████████| 1099/1099 [00:24<00:00, 45.15it/s]\n"
      ]
     }
    ],
@@ -341,7 +428,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 75,
+   "execution_count": 32,
    "id": "49338970",
    "metadata": {},
    "outputs": [
@@ -349,7 +436,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 115/115 [00:01<00:00, 111.11it/s]\n"
+      "100%|██████████| 115/115 [00:01<00:00, 107.58it/s]\n"
      ]
     }
    ],
@@ -359,7 +446,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 76,
+   "execution_count": 33,
    "id": "784d69cd",
    "metadata": {},
    "outputs": [
@@ -367,7 +454,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 1113/1113 [00:21<00:00, 50.65it/s]\n"
+      "100%|██████████| 1113/1113 [00:24<00:00, 46.00it/s]\n"
      ]
     }
    ],
@@ -377,7 +464,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 77,
+   "execution_count": 34,
    "id": "6cd167a7",
    "metadata": {},
    "outputs": [
@@ -385,7 +472,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 6705/6705 [08:49<00:00, 12.65it/s]\n"
+      "100%|██████████| 6705/6705 [09:36<00:00, 11.63it/s]\n"
      ]
     }
    ],
@@ -396,7 +483,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 78,
+   "execution_count": 35,
    "id": "4de5cec3",
    "metadata": {},
    "outputs": [
@@ -404,7 +491,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████| 142/142 [00:01<00:00, 103.95it/s]\n"
+      "100%|██████████| 142/142 [00:01<00:00, 92.27it/s]\n"
      ]
     }
    ],
@@ -414,23 +501,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 36,
    "id": "d92158fa",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[168., 164., 163., ..., 141., 140., 139.],\n",
-       "       [139., 145., 146., ..., 160., 160., 160.],\n",
-       "       [171., 174., 172., ..., 184., 188., 189.],\n",
+       "array([[171., 170., 166., ..., 163., 170., 171.],\n",
+       "       [158., 163., 163., ..., 189., 187., 174.],\n",
+       "       [173., 177., 176., ..., 184., 185., 183.],\n",
        "       ...,\n",
-       "       [104., 101.,  98., ...,  34.,  31.,  29.],\n",
-       "       [ 84.,  84.,  88., ...,  10.,   8.,   9.],\n",
-       "       [157., 157., 154., ..., 156., 157., 155.]], dtype=float32)"
+       "       [113., 112., 111., ...,  99., 104., 103.],\n",
+       "       [118., 120., 128., ..., 162., 148., 149.],\n",
+       "       [157., 153., 153., ..., 172., 175., 175.]], dtype=float32)"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1182,17 +1269,313 @@
    "id": "3157d03a",
    "metadata": {},
    "source": [
-    "# 4. Data Processing for Model Ingestion"
+    "# 5. Model Creation"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "64adf033",
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "5f0f70da",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 10015 files belonging to 7 classes.\n"
+     ]
+    }
+   ],
    "source": [
-    "# 5. Model Creation"
+    "#make test set\n",
+    "seed = 12345\n",
+    "training_data = image_dataset_from_directory(\n",
+    "    directory = './data/training/',\n",
+    "    labels = 'inferred',\n",
+    "    label_mode = 'categorical',\n",
+    "    class_names = lab,\n",
+    "    batch_size = 32,\n",
+    "    image_size = (600,450),\n",
+    "    shuffle = True,\n",
+    "    seed = seed,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "b8213cb9",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Found 193 files belonging to 7 classes.\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_data = image_dataset_from_directory(\n",
+    "    directory = './data/test/',\n",
+    "    labels = 'inferred',\n",
+    "    label_mode = 'categorical',\n",
+    "    class_names = lab,\n",
+    "    batch_size = 32,\n",
+    "    image_size = (600,450),\n",
+    "    shuffle = True,\n",
+    "    seed = seed,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "f1be00cd",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "<BatchDataset element_spec=(TensorSpec(shape=(None, 600, 450, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None, 7), dtype=tf.float32, name=None))>"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f8524747",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from tensorflow.keras import datasets,layers, models"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "25f8bceb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model = models.Sequential()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "17a95bfb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.add(layers.Conv2D(32, (3,3), activation='relu', input_shape=(600,450,3)))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "2c9a3491",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.add(layers.AveragePooling2D(2,2))\n",
+    "model.add(layers.Conv2D(64, (3, 3), activation='relu'))\n",
+    "model.add(layers.AveragePooling2D((2, 2)))\n",
+    "model.add(layers.Conv2D(64, (3, 3), activation='relu'))"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "d668b5de",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"sequential\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " conv2d (Conv2D)             (None, 598, 448, 32)      896       \n",
+      "                                                                 \n",
+      " average_pooling2d (AverageP  (None, 299, 224, 32)     0         \n",
+      " ooling2D)                                                       \n",
+      "                                                                 \n",
+      " conv2d_1 (Conv2D)           (None, 297, 222, 64)      18496     \n",
+      "                                                                 \n",
+      " average_pooling2d_1 (Averag  (None, 148, 111, 64)     0         \n",
+      " ePooling2D)                                                     \n",
+      "                                                                 \n",
+      " conv2d_2 (Conv2D)           (None, 146, 109, 64)      36928     \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 56,320\n",
+      "Trainable params: 56,320\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "9fef7a20",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.add(layers.Flatten())\n",
+    "model.add(layers.Dense(64, activation='relu'))\n",
+    "model.add(layers.Dense(10))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "b2c2de4d",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"sequential\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " conv2d (Conv2D)             (None, 598, 448, 32)      896       \n",
+      "                                                                 \n",
+      " average_pooling2d (AverageP  (None, 299, 224, 32)     0         \n",
+      " ooling2D)                                                       \n",
+      "                                                                 \n",
+      " conv2d_1 (Conv2D)           (None, 297, 222, 64)      18496     \n",
+      "                                                                 \n",
+      " average_pooling2d_1 (Averag  (None, 148, 111, 64)     0         \n",
+      " ePooling2D)                                                     \n",
+      "                                                                 \n",
+      " conv2d_2 (Conv2D)           (None, 146, 109, 64)      36928     \n",
+      "                                                                 \n",
+      " flatten (Flatten)           (None, 1018496)           0         \n",
+      "                                                                 \n",
+      " dense (Dense)               (None, 64)                65183808  \n",
+      "                                                                 \n",
+      " dense_1 (Dense)             (None, 10)                650       \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 65,240,778\n",
+      "Trainable params: 65,240,778\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "25c98f80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model.compile(optimizer='adam', \n",
+    "              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),\n",
+    "              metrics=['accuracy'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "acd6773b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"sequential\"\n",
+      "_________________________________________________________________\n",
+      " Layer (type)                Output Shape              Param #   \n",
+      "=================================================================\n",
+      " conv2d (Conv2D)             (None, 598, 448, 32)      896       \n",
+      "                                                                 \n",
+      " average_pooling2d (AverageP  (None, 299, 224, 32)     0         \n",
+      " ooling2D)                                                       \n",
+      "                                                                 \n",
+      " conv2d_1 (Conv2D)           (None, 297, 222, 64)      18496     \n",
+      "                                                                 \n",
+      " average_pooling2d_1 (Averag  (None, 148, 111, 64)     0         \n",
+      " ePooling2D)                                                     \n",
+      "                                                                 \n",
+      " conv2d_2 (Conv2D)           (None, 146, 109, 64)      36928     \n",
+      "                                                                 \n",
+      " flatten (Flatten)           (None, 1018496)           0         \n",
+      "                                                                 \n",
+      " dense (Dense)               (None, 64)                65183808  \n",
+      "                                                                 \n",
+      " dense_1 (Dense)             (None, 10)                650       \n",
+      "                                                                 \n",
+      "=================================================================\n",
+      "Total params: 65,240,778\n",
+      "Trainable params: 65,240,778\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n"
+     ]
+    }
+   ],
+   "source": [
+    "model.summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "ffe382a0",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10\n"
+     ]
+    }
+   ],
+   "source": [
+    "history = model.fit(training_data, epochs=10, validation_data = (test_data))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1fc861d9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "history"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "64adf033",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "id": "c3114115",
@@ -1229,7 +1612,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.2"
+   "version": "3.9.0"
   }
  },
  "nbformat": 4,
diff --git a/data_processing.py b/data_processing.py
index 37f6849..cd831cd 100644
--- a/data_processing.py
+++ b/data_processing.py
@@ -9,19 +9,19 @@ import numpy as np
 from tqdm import tqdm
 
 
-def load_sort_data(meta_filename = str, image_folder = str, output_folder = str):
+def load_sort_data(meta_filename = str, image_folder = str,sub_dir= str):
     """[summary]
 
     Args:
         meta_filename ([type], optional): [description]. Defaults to str.
         image_folder ([type], optional): [description]. Defaults to str.
-        output_folder ([type], optional): [description]. Defaults to str.
+        sub_dir (str,optional): Parent directory of the output folders. 
 
     Returns:
         [type]: [description]
     """
     data_dir = os.getcwd() + "/data/"
-    dest_dir = data_dir + output_folder
+    dest_dir = data_dir + sub_dir 
     metadata = pd.read_csv(data_dir + '/' + meta_filename)
     labels = metadata['dx'].unique()
     label_images = []
-- 
GitLab