Skip to content
Snippets Groups Projects
Commit c9f75671 authored by pjm363 (Philip Monaco)'s avatar pjm363 (Philip Monaco)
Browse files

Merge branch '11-installation-instructions' into 'main'

Fix EDA Errors

Closes #11

See merge request !8
parents 392ed00e 497a8fa7
No related branches found
No related tags found
1 merge request!8Fix EDA Errors
......@@ -2,60 +2,74 @@ from sklearn.decomposition import PCA
from math import ceil
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
def find_mean_img(full_mat, title):
"""[summary]
def find_mean_img(full_mat):
"""Calculates and plots the mean of each pixel in an image matrix.
Args:
full_mat ([type]): [description]
title ([type]): [description]
full_mat (np.ndarray): Vectorized array of the image matrix.
title (String): Name of the title for the plot.
Returns:
[type]: [description]
matplotlib.plt: A plot of the the mean pixels for each disease category.
"""
cols = 4
rows = len(full_mat)//cols + 1
fig = plt.figure(figsize = (12,6))
for i, mat in zip(range(0,len(full_mat)),full_mat):
# calculate the average
mean_img = np.mean(full_mat, axis = 0)
mean_img = np.mean(full_mat[mat], axis = 0)
# reshape it back to a matrix
mean_img = mean_img.reshape((300,225))
plt.imshow(mean_img, vmin=0, vmax=255, cmap='Greys_r')
plt.title(f'Average {title}')
mean_img = mean_img.reshape((200, 150))
ax = fig.add_subplot(rows, cols,i+1)
ax.imshow(mean_img, vmin=0, vmax=255, cmap='Greys_r')
ax.set_title('Average ' + mat)
plt.axis('off')
plt.show()
return mean_img
def eigenimages(full_mat, title, n_comp = 0.7, size = (300,225)):
"""[summary]
plt.tight_layout()
Args:
full_mat ([type]): [description]
title ([type]): [description]
n_comp (float, optional): [description]. Defaults to 0.7.
size (tuple, optional): [description]. Defaults to (300,225).
Returns:
[type]: [description]
"""
# fit PCA to describe n_comp * variability in the class
pca = PCA(n_components = n_comp, whiten = True)
pca.fit(full_mat)
print('Number of PC: ', pca.n_components_)
return pca
def plot_pca(pca, size = (300,225)):
"""[summary]
def plot_pca(pca, title, size = (200, 150)):
"""Plots each decomposed PCA image and labels the amount of variability for each image.
Args:
pca ([type]): [description]
size (tuple, optional): [description]. Defaults to (300,225).
pca (sklearn PCA object): A fitted PCA object.
title (String): Title of the plot.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
"""
# plot eigen images in a grid
n = pca.n_components_
print('Number of PC in ' + title + ':', n)
fig = plt.figure(figsize=(8, 8))
fig.suptitle('PCA Components of ' + title)
r = int(n**.5)
c = ceil(n/ r)
for i in range(n):
ax = fig.add_subplot(r, c, i + 1, xticks = [], yticks = [])
ax = fig.add_subplot(r, c, i + 1)
ax.imshow(pca.components_[i].reshape(size),
cmap='Greys_r')
ax.set_title("Variance " + "{0:.2f}%".format(pca.explained_variance_ratio_[i] * 100) )
plt.axis('off')
plt.tight_layout()
plt.show()
def eigenimages(full_mat,n_comp = 0.7, size = (200, 150)):
"""Creates creates and fits a PCA estimator from sklearn.
Args:
full_mat (np.ndarray): A vectorized array of images.
n_comp (float, optional): Percentage of desired variability. Defaults to 0.7.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
Returns:
sklearn PCA object: Fitted PCA model.
"""
# fit PCA to describe n_comp * variability in the class
pca = PCA(n_components = n_comp, whiten = True)
pca.fit(full_mat)
return pca
This diff is collapsed.
......@@ -4,6 +4,7 @@ import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing import image
import numpy as np
from tqdm import tqdm
def load_sort_data(meta_filename = str, image_folder = str, output_folder = str):
......@@ -23,13 +24,13 @@ def load_sort_data(meta_filename = str, image_folder = str, output_folder = str)
labels = metadata['dx'].unique()
label_images = []
for i in labels:
for i in tqdm(labels):
if os.path.exists(dest_dir + str(i) + '/'):
shutil.rmtree(dest_dir + str(i) + '/')
os.mkdir(dest_dir + str(i) + '/')
sample = metadata[metadata['dx'] == i]['image_id']
label_images.extend(sample)
for id in label_images:
for id in tqdm(label_images):
shutil.copyfile((data_dir + image_folder + '/' + id + '.jpg'), (dest_dir + i + '/' + id + '.jpg'))
label_images = []
......@@ -39,7 +40,7 @@ def transform(path, size = (300, 225)):
# create a list of images
img_list = [fn for fn in os.listdir(path) if fn.endswith('.jpg')]
#iterating over each .jpg
for fn in img_list:
for fn in tqdm(img_list):
fp = path + '/' + fn
current_image = image.load_img(fp, target_size = size,
color_mode = 'grayscale')
......
......@@ -2,3 +2,4 @@ numpy>=1.21.5
pandas>=1.3.5
tensorflow>=2.8.0
matplotlib>=3.3.2
tqdm>=4.*
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment