Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
H
HVM Image Clf
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pjm363 (Philip Monaco)
HVM Image Clf
Merge requests
!8
Fix EDA Errors
Code
Review changes
Check out branch
Download
Patches
Plain diff
Expand sidebar
Merged
Fix EDA Errors
11-installation-instructions
into
main
Overview
0
Commits
1
Pipelines
0
Changes
4
Merged
Fix EDA Errors
pjm363 (Philip Monaco)
requested to merge
11-installation-instructions
into
main
Feb 17, 2022
Overview
0
Commits
1
Pipelines
0
Changes
4
Closes
#11 (closed)
0
0
Merge request reports
Compare
main
main (base)
and
latest version
latest version
497a8fa7
1 commit,
Feb 17, 2022
4 files
+
1202
−
42
Inline
Compare changes
Side-by-side
Inline
Show whitespace changes
Show one file at a time
Files
4
EDA.py
+
52
−
38
View file @ 497a8fa7
Edit in single-file editor
Open in Web IDE
Show full file
@@ -2,60 +2,74 @@ from sklearn.decomposition import PCA
from
math
import
ceil
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
tqdm
import
tqdm
def
find_mean_img
(
full_mat
,
title
):
"""
[summary]
def
find_mean_img
(
full_mat
):
"""
Calculates and plots the mean of each pixel in an image matrix.
Args:
full_mat (
[type]): [description]
title (
[type]): [description]
full_mat (
np.ndarray): Vectorized array of the image matrix.
title (
String): Name of the title for the plot.
Returns:
[type]: [description]
matplotlib.plt: A plot of the the mean pixels for each disease category.
"""
cols
=
4
rows
=
len
(
full_mat
)
//
cols
+
1
fig
=
plt
.
figure
(
figsize
=
(
12
,
6
))
for
i
,
mat
in
zip
(
range
(
0
,
len
(
full_mat
)),
full_mat
):
# calculate the average
mean_img
=
np
.
mean
(
full_mat
,
axis
=
0
)
mean_img
=
np
.
mean
(
full_mat
[
mat
]
,
axis
=
0
)
# reshape it back to a matrix
mean_img
=
mean_img
.
reshape
((
300
,
225
))
plt
.
imshow
(
mean_img
,
vmin
=
0
,
vmax
=
255
,
cmap
=
'
Greys_r
'
)
plt
.
title
(
f
'
Average
{
title
}
'
)
plt
.
axis
(
'
off
'
)
mean_img
=
mean_img
.
reshape
((
200
,
150
))
ax
=
fig
.
add_subplot
(
rows
,
cols
,
i
+
1
)
ax
.
imshow
(
mean_img
,
vmin
=
0
,
vmax
=
255
,
cmap
=
'
Greys_r
'
)
ax
.
set_title
(
'
Average
'
+
mat
)
plt
.
axis
(
'
off
'
)
plt
.
tight_layout
()
def
plot_pca
(
pca
,
title
,
size
=
(
200
,
150
)):
"""
Plots each decomposed PCA image and labels the amount of variability for each image.
Args:
pca (sklearn PCA object): A fitted PCA object.
title (String): Title of the plot.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
"""
# plot eigen images in a grid
n
=
pca
.
n_components_
print
(
'
Number of PC in
'
+
title
+
'
:
'
,
n
)
fig
=
plt
.
figure
(
figsize
=
(
8
,
8
))
fig
.
suptitle
(
'
PCA Components of
'
+
title
)
r
=
int
(
n
**
.
5
)
c
=
ceil
(
n
/
r
)
for
i
in
range
(
n
):
ax
=
fig
.
add_subplot
(
r
,
c
,
i
+
1
)
ax
.
imshow
(
pca
.
components_
[
i
].
reshape
(
size
),
cmap
=
'
Greys_r
'
)
ax
.
set_title
(
"
Variance
"
+
"
{0:.2f}%
"
.
format
(
pca
.
explained_variance_ratio_
[
i
]
*
100
)
)
plt
.
axis
(
'
off
'
)
plt
.
tight_layout
()
plt
.
show
()
return
mean_img
def
eigenimages
(
full_mat
,
title
,
n_comp
=
0.7
,
size
=
(
300
,
225
)):
"""
[summary]
def
eigenimages
(
full_mat
,
n_comp
=
0.7
,
size
=
(
200
,
150
)):
"""
Creates creates and fits a PCA estimator from sklearn.
Args:
full_mat ([type]): [description]
title ([type]): [description]
n_comp (float, optional): [description]. Defaults to 0.7.
size (tuple, optional): [description]. Defaults to (300,225).
full_mat (np.ndarray): A vectorized array of images.
n_comp (float, optional): Percentage of desired variability. Defaults to 0.7.
size (tuple, optional): Shape of the image matrix. Defaults to (300,225).
Returns:
[type]: [description]
sklearn PCA object: Fitted PCA model.
"""
# fit PCA to describe n_comp * variability in the class
pca
=
PCA
(
n_components
=
n_comp
,
whiten
=
True
)
pca
.
fit
(
full_mat
)
print
(
'
Number of PC:
'
,
pca
.
n_components_
)
return
pca
def
plot_pca
(
pca
,
size
=
(
300
,
225
)):
"""
[summary]
Args:
pca ([type]): [description]
size (tuple, optional): [description]. Defaults to (300,225).
"""
# plot eigenimages in a grid
n
=
pca
.
n_components_
fig
=
plt
.
figure
(
figsize
=
(
8
,
8
))
r
=
int
(
n
**
.
5
)
c
=
ceil
(
n
/
r
)
for
i
in
range
(
n
):
ax
=
fig
.
add_subplot
(
r
,
c
,
i
+
1
,
xticks
=
[],
yticks
=
[])
ax
.
imshow
(
pca
.
components_
[
i
].
reshape
(
size
),
cmap
=
'
Greys_r
'
)
plt
.
axis
(
'
off
'
)
plt
.
show
()
\ No newline at end of file
return
pca
Loading