Select Git revision
-
Vanshika Mohan Bongade authoredVanshika Mohan Bongade authored
EDA.py 1.65 KiB
from sklearn.decomposition import PCA
from math import ceil
import numpy as np
import matplotlib.pyplot as plt
def find_mean_img(full_mat, title):
"""[summary]
Args:
full_mat ([type]): [description]
title ([type]): [description]
Returns:
[type]: [description]
"""
# calculate the average
mean_img = np.mean(full_mat, axis = 0)
# reshape it back to a matrix
mean_img = mean_img.reshape((300,225))
plt.imshow(mean_img, vmin=0, vmax=255, cmap='Greys_r')
plt.title(f'Average {title}')
plt.axis('off')
plt.show()
return mean_img
def eigenimages(full_mat, title, n_comp = 0.7, size = (300,225)):
"""[summary]
Args:
full_mat ([type]): [description]
title ([type]): [description]
n_comp (float, optional): [description]. Defaults to 0.7.
size (tuple, optional): [description]. Defaults to (300,225).
Returns:
[type]: [description]
"""
# fit PCA to describe n_comp * variability in the class
pca = PCA(n_components = n_comp, whiten = True)
pca.fit(full_mat)
print('Number of PC: ', pca.n_components_)
return pca
def plot_pca(pca, size = (300,225)):
"""[summary]
Args:
pca ([type]): [description]
size (tuple, optional): [description]. Defaults to (300,225).
"""
# plot eigenimages in a grid
n = pca.n_components_
fig = plt.figure(figsize=(8, 8))
r = int(n**.5)
c = ceil(n/ r)
for i in range(n):
ax = fig.add_subplot(r, c, i + 1, xticks = [], yticks = [])
ax.imshow(pca.components_[i].reshape(size),
cmap='Greys_r')
plt.axis('off')
plt.show()