Skip to content
Snippets Groups Projects

Fix EDA Errors

4 files
+ 1202
42
Compare changes
  • Side-by-side
  • Inline

Files

+ 52
38
@@ -2,60 +2,74 @@ from sklearn.decomposition import PCA
from math import ceil
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
def find_mean_img(full_mat, title):
"""[summary]
def find_mean_img(full_mat):
"""Calculates and plots the mean of each pixel in an image matrix.
Args:
full_mat ([type]): [description]
title ([type]): [description]
full_mat (np.ndarray): Vectorized array of the image matrix.
title (String): Name of the title for the plot.
Returns:
[type]: [description]
matplotlib.plt: A plot of the the mean pixels for each disease category.
"""
cols = 4
rows = len(full_mat)//cols + 1
fig = plt.figure(figsize = (12,6))
for i, mat in zip(range(0,len(full_mat)),full_mat):
# calculate the average
mean_img = np.mean(full_mat, axis = 0)
mean_img = np.mean(full_mat[mat], axis = 0)
# reshape it back to a matrix
mean_img = mean_img.reshape((300,225))
plt.imshow(mean_img, vmin=0, vmax=255, cmap='Greys_r')
plt.title(f'Average {title}')
plt.axis('off')
mean_img = mean_img.reshape((200, 150))
ax = fig.add_subplot(rows, cols,i+1)
ax.imshow(mean_img, vmin=0, vmax=255, cmap='Greys_r')
ax.set_title('Average ' + mat)
plt.axis('off')
plt.tight_layout()
def plot_pca(pca, title, size = (200, 150)):
"""Plots each decomposed PCA image and labels the amount of variability for each image.
Args:
pca (sklearn PCA object): A fitted PCA object.
title (String): Title of the plot.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
"""
# plot eigen images in a grid
n = pca.n_components_
print('Number of PC in ' + title + ':', n)
fig = plt.figure(figsize=(8, 8))
fig.suptitle('PCA Components of ' + title)
r = int(n**.5)
c = ceil(n/ r)
for i in range(n):
ax = fig.add_subplot(r, c, i + 1)
ax.imshow(pca.components_[i].reshape(size),
cmap='Greys_r')
ax.set_title("Variance " + "{0:.2f}%".format(pca.explained_variance_ratio_[i] * 100) )
plt.axis('off')
plt.tight_layout()
plt.show()
return mean_img
def eigenimages(full_mat, title, n_comp = 0.7, size = (300,225)):
"""[summary]
def eigenimages(full_mat,n_comp = 0.7, size = (200, 150)):
"""Creates creates and fits a PCA estimator from sklearn.
Args:
full_mat ([type]): [description]
title ([type]): [description]
n_comp (float, optional): [description]. Defaults to 0.7.
size (tuple, optional): [description]. Defaults to (300,225).
full_mat (np.ndarray): A vectorized array of images.
n_comp (float, optional): Percentage of desired variability. Defaults to 0.7.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
Returns:
[type]: [description]
sklearn PCA object: Fitted PCA model.
"""
# fit PCA to describe n_comp * variability in the class
pca = PCA(n_components = n_comp, whiten = True)
pca.fit(full_mat)
print('Number of PC: ', pca.n_components_)
return pca
def plot_pca(pca, size = (300,225)):
"""[summary]
Args:
pca ([type]): [description]
size (tuple, optional): [description]. Defaults to (300,225).
"""
# plot eigenimages in a grid
n = pca.n_components_
fig = plt.figure(figsize=(8, 8))
r = int(n**.5)
c = ceil(n/ r)
for i in range(n):
ax = fig.add_subplot(r, c, i + 1, xticks = [], yticks = [])
ax.imshow(pca.components_[i].reshape(size),
cmap='Greys_r')
plt.axis('off')
plt.show()
\ No newline at end of file
return pca
Loading