diff --git a/image_preprocessing.ipynb b/image_preprocessing.ipynb
index c2737bfbc0a742b0ef6fc282b7c15f0eda81b6f6..08f64d7e153c9bbb678cb19f479c9802bbcce841 100644
--- a/image_preprocessing.ipynb
+++ b/image_preprocessing.ipynb
@@ -13,19 +13,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 3,
    "id": "d7e56e0e-7eec-429d-940b-c3337db4b4dc",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "The autoreload extension is already loaded. To reload it, use:\n",
-      "  %reload_ext autoreload\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%load_ext autoreload\n",
     "%autoreload 2\n",
@@ -109,7 +100,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 4,
    "id": "b8c4f292",
    "metadata": {},
    "outputs": [],
@@ -120,17 +111,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 5,
    "id": "7e9702c3",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'h:\\\\School\\\\Winter 2022\\\\DS Projects\\\\2018\\\\hvm-image-clf/data/Training_Images_'"
+       "'c:\\\\Users\\\\Bennett Nolan\\\\Desktop\\\\info442\\\\hvm-image-clf/data/Training_Images_'"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -152,7 +143,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "id": "0ba9148a",
    "metadata": {},
    "outputs": [
@@ -192,7 +183,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 7,
    "id": "e6d378d5",
    "metadata": {},
    "outputs": [
@@ -224,7 +215,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "<ipython-input-5-4c0ddd092066>:5: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.\n",
+      "C:\\Users\\Bennett Nolan\\AppData\\Local\\Temp\\ipykernel_10700\\2944415004.py:5: FutureWarning: In a future version of pandas all arguments of DataFrame.drop except for the argument 'labels' will be keyword-only.\n",
       "  metadata.drop('dx',1).isna().groupby(\n"
      ]
     }
@@ -290,7 +281,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 9,
    "id": "05398a91",
    "metadata": {},
    "outputs": [],
@@ -301,7 +292,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 10,
    "id": "e8642d8d",
    "metadata": {},
    "outputs": [],
@@ -311,7 +302,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 11,
    "id": "5312b5de",
    "metadata": {},
    "outputs": [],
@@ -321,7 +312,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 12,
    "id": "49338970",
    "metadata": {},
    "outputs": [],
@@ -331,7 +322,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 13,
    "id": "784d69cd",
    "metadata": {},
    "outputs": [],
@@ -341,23 +332,10 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 14,
    "id": "6cd167a7",
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "\u001b[1;32m<ipython-input-35-c19654a1d8a1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mnv_images\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mtransform\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdest_dir\u001b[0m \u001b[1;33m+\u001b[0m \u001b[1;34m'nv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0msize\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;36m200\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m150\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
-      "\u001b[1;32mh:\\School\\Winter 2022\\DS Projects\\2018\\hvm-image-clf\\data_processing.py\u001b[0m in \u001b[0;36mtransform\u001b[1;34m(path, size)\u001b[0m\n\u001b[0;32m     50\u001b[0m         \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     51\u001b[0m             \u001b[1;31m# concatenate different images\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 52\u001b[1;33m             \u001b[0mfull_mat\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mconcatenate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mfull_mat\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mimg_ts\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     53\u001b[0m         \u001b[1;32mexcept\u001b[0m \u001b[0mUnboundLocalError\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     54\u001b[0m             \u001b[1;31m# if not assigned yet, assign one\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#This takes a really long time to run even when cutting down the images size.\n",
     "nv_images = transform(dest_dir + 'nv', size=(200, 150))"
@@ -365,7 +343,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "id": "4de5cec3",
    "metadata": {},
    "outputs": [],
@@ -375,7 +353,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 16,
    "id": "d92158fa",
    "metadata": {},
    "outputs": [
@@ -385,7 +363,7 @@
        "(1113, 67500)"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -426,11 +404,200 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
+   "id": "5d475aed",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>image_id</th>\n",
+       "      <th>dx</th>\n",
+       "      <th>age</th>\n",
+       "      <th>sex</th>\n",
+       "      <th>localization</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>ISIC_0027419</td>\n",
+       "      <td>bkl</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>male</td>\n",
+       "      <td>scalp</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>ISIC_0025030</td>\n",
+       "      <td>bkl</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>male</td>\n",
+       "      <td>scalp</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>ISIC_0026769</td>\n",
+       "      <td>bkl</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>male</td>\n",
+       "      <td>scalp</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>ISIC_0025661</td>\n",
+       "      <td>bkl</td>\n",
+       "      <td>80.0</td>\n",
+       "      <td>male</td>\n",
+       "      <td>scalp</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>ISIC_0031633</td>\n",
+       "      <td>bkl</td>\n",
+       "      <td>75.0</td>\n",
+       "      <td>male</td>\n",
+       "      <td>ear</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       image_id   dx   age   sex localization\n",
+       "0  ISIC_0027419  bkl  80.0  male        scalp\n",
+       "1  ISIC_0025030  bkl  80.0  male        scalp\n",
+       "2  ISIC_0026769  bkl  80.0  male        scalp\n",
+       "3  ISIC_0025661  bkl  80.0  male        scalp\n",
+       "4  ISIC_0031633  bkl  75.0  male          ear"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "metadata.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
    "id": "6e579e93",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>age</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>0.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>85.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>median</th>\n",
+       "      <td>50.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>51.863828</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>skew</th>\n",
+       "      <td>-0.166802</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "              age\n",
+       "min      0.000000\n",
+       "max     85.000000\n",
+       "median  50.000000\n",
+       "mean    51.863828\n",
+       "skew    -0.166802"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "metadata.agg({\n",
+    "    \"age\":[\"min\", \"max\", \"median\", \"mean\", \"skew\"]    \n",
+    "})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "ea361300",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "sex\n",
+       "female     4552\n",
+       "male       5406\n",
+       "unknown      57\n",
+       "Name: sex, dtype: int64"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "metadata.groupby(\"sex\")[\"sex\"].count()"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -441,12 +608,92 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "markdown",
    "id": "bf51add8",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "Distributions for metadata including Age, Localization, Sex, and Diagnosis"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "6681e88c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Text(0.5, 1.0, 'Localization')"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1080x1080 with 2 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig =plt.figure(figsize=(15,15))\n",
+    "ax1 = fig.add_subplot(221)\n",
+    "metadata['sex'].value_counts().plot(kind='bar', ax=ax1)\n",
+    "ax1.set_title('Sex')\n",
+    "\n",
+    "ax2=fig.add_subplot(222)\n",
+    "metadata['localization'].value_counts().plot(kind='bar', ax=ax2)\n",
+    "ax2.set_title('Localization')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "f6596829",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Text(0.5, 1.0, 'Diagnosis')"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "data": {
+      "image/png": "",
+      "text/plain": [
+       "<Figure size 1080x1080 with 2 Axes>"
+      ]
+     },
+     "metadata": {
+      "needs_background": "light"
+     },
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "fig =plt.figure(figsize=(15,15))\n",
+    "ax1 = fig.add_subplot(221)\n",
+    "metadata['age'].value_counts().plot(kind='bar', ax=ax1)\n",
+    "ax1.set_title('Age')\n",
+    "\n",
+    "ax2=fig.add_subplot(222)\n",
+    "metadata['dx'].value_counts().plot(kind='bar', ax=ax2)\n",
+    "ax2.set_title('Diagnosis')"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -456,13 +703,122 @@
     "Correlation"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "fbc246dd",
+   "metadata": {},
+   "source": [
+    "Cross Tabulation of Age and Dx (Skin Lesion)\n",
+    "nv = Melanocytic nevi\n",
+    "mel = Melanoma\n",
+    "bkl = Benign keratosis-like lesions\n",
+    "bcc = Basal cell carcinoma\n",
+    "akiec = Actinic keratosis\n",
+    "vas = Vascular lesions\n",
+    "df = Dermatofibroma"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 34,
    "id": "a0602660",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dx    akiec  bcc  bkl  df  mel    nv  vasc\n",
+      "age                                       \n",
+      "0.0       0    0    5   0    0    30     4\n",
+      "5.0       0    0    1   0    1    81     3\n",
+      "10.0      0    0    0   0    0    39     2\n",
+      "15.0      0    0    0   0    0    73     4\n",
+      "20.0      0    1    0   0    6   158     4\n",
+      "25.0      0    3    0   2   16   221     5\n",
+      "30.0      1    4    6   4   34   410     5\n",
+      "35.0      0    5   24  12   36   668     8\n",
+      "40.0      9   23   46   9   49   846     3\n",
+      "45.0     10   26   59  14   74  1100    16\n",
+      "50.0     19   27   87  18   96   928    12\n",
+      "55.0     27   25   95  13  142   686    21\n",
+      "60.0     58   35  131   9  106   454    10\n",
+      "65.0     38   79  108  18  133   351     4\n",
+      "70.0     56   85  183   4  166   248    14\n",
+      "75.0     47   76  153   9   91   231    11\n",
+      "80.0     37   73   98   3   85    97    11\n",
+      "85.0     25   52   93   0   76    39     5\n"
+     ]
+    }
+   ],
+   "source": [
+    "ct = pd.crosstab(index=metadata['age'], columns=metadata['dx'])\n",
+    "print(ct)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "0a82f299",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "dx       akiec  bcc  bkl  df  mel    nv  vasc\n",
+      "sex                                          \n",
+      "female     106  197  463  52  424  3237    73\n",
+      "male       221  317  626  63  689  3421    69\n",
+      "unknown      0    0   10   0    0    47     0\n"
+     ]
+    }
+   ],
+   "source": [
+    "ct2 = pd.crosstab(index=metadata['sex'], columns=metadata['dx'])\n",
+    "print(ct2)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "ad813357",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PValue =  2.4464388098587195e-17\n"
+     ]
+    }
+   ],
+   "source": [
+    "from scipy.stats import chi2_contingency\n",
+    "#Sex & Localization\n",
+    "chi2= chi2_contingency(ct2)\n",
+    "print(\"PValue = \" , chi2[1])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "20474e6b",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "PValue =  0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "#Age & DX\n",
+    "chi2_2= chi2_contingency(ct)\n",
+    "print(\"PValue = \" , chi2_2[1])"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -633,7 +989,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.0"
+   "version": "3.10.2"
   }
  },
  "nbformat": 4,