from sklearn.decomposition import PCA
from math import ceil
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
def find_mean_img(full_mat):
"""Calculates and plots the mean of each pixel in an image matrix.
Args:
full_mat (np.ndarray): Vectorized array of the image matrix.
title (String): Name of the title for the plot.
Returns:
matplotlib.plt: A plot of the the mean pixels for each disease category.
"""
cols = 4
rows = len(full_mat)//cols + 1
fig = plt.figure(figsize = (12,6))
for i, mat in zip(range(0,len(full_mat)),full_mat):
# calculate the average
mean_img = np.mean(full_mat[mat], axis = 0)
# reshape it back to a matrix
mean_img = mean_img.reshape((200, 150))
ax = fig.add_subplot(rows, cols,i+1)
ax.imshow(mean_img, vmin=0, vmax=255, cmap='Greys_r')
ax.set_title('Average ' + mat)
plt.axis('off')
plt.tight_layout()
def plot_pca(pca, title, size = (200, 150)):
"""Plots each decomposed PCA image and labels the amount of variability for each image.
Args:
pca (sklearn PCA object): A fitted PCA object.
title (String): Title of the plot.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
"""
# plot eigen images in a grid
n = pca.n_components_
print('Number of PC in ' + title + ':', n)
fig = plt.figure(figsize=(8, 8))
fig.suptitle('PCA Components of ' + title)
r = int(n**.5)
c = ceil(n/ r)
for i in range(n):
ax = fig.add_subplot(r, c, i + 1)
ax.imshow(pca.components_[i].reshape(size),
cmap='Greys_r')
ax.set_title("Variance " + "{0:.2f}%".format(pca.explained_variance_ratio_[i] * 100) )
plt.axis('off')
plt.tight_layout()
plt.show()
def eigenimages(full_mat,n_comp = 0.7, size = (200, 150)):
"""Creates creates and fits a PCA estimator from sklearn.
Args:
full_mat (np.ndarray): A vectorized array of images.
n_comp (float, optional): Percentage of desired variability. Defaults to 0.7.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
Returns:
sklearn PCA object: Fitted PCA model.
"""
# fit PCA to describe n_comp * variability in the class
pca = PCA(n_components = n_comp, whiten = True)
pca.fit(full_mat)
return pca