Skip to content
Snippets Groups Projects
Commit 2db31def authored by pjm363 (Philip Monaco)'s avatar pjm363 (Philip Monaco)
Browse files

Merge branch '10-add-faster-processing-of-transformation-of-dataset' into 'main'

Resolve "Add Faster Processing of Transformation of Dataset"

Closes #10

See merge request !5
parents f5a1060e 58eaffa3
Branches
No related tags found
1 merge request!5Resolve "Add Faster Processing of Transformation of Dataset"
EDA.py 0 → 100644
from sklearn.decomposition import PCA
from math import ceil
import numpy as np
import matplotlib.pyplot as plt
def find_mean_img(full_mat, title):
"""[summary]
Args:
full_mat ([type]): [description]
title ([type]): [description]
Returns:
[type]: [description]
"""
# calculate the average
mean_img = np.mean(full_mat, axis = 0)
# reshape it back to a matrix
mean_img = mean_img.reshape((300,225))
plt.imshow(mean_img, vmin=0, vmax=255, cmap='Greys_r')
plt.title(f'Average {title}')
plt.axis('off')
plt.show()
return mean_img
def eigenimages(full_mat, title, n_comp = 0.7, size = (300,225)):
"""[summary]
Args:
full_mat ([type]): [description]
title ([type]): [description]
n_comp (float, optional): [description]. Defaults to 0.7.
size (tuple, optional): [description]. Defaults to (300,225).
Returns:
[type]: [description]
"""
# fit PCA to describe n_comp * variability in the class
pca = PCA(n_components = n_comp, whiten = True)
pca.fit(full_mat)
print('Number of PC: ', pca.n_components_)
return pca
def plot_pca(pca, size = (300,225)):
"""[summary]
Args:
pca ([type]): [description]
size (tuple, optional): [description]. Defaults to (300,225).
"""
# plot eigenimages in a grid
n = pca.n_components_
fig = plt.figure(figsize=(8, 8))
r = int(n**.5)
c = ceil(n/ r)
for i in range(n):
ax = fig.add_subplot(r, c, i + 1, xticks = [], yticks = [])
ax.imshow(pca.components_[i].reshape(size),
cmap='Greys_r')
plt.axis('off')
plt.show()
\ No newline at end of file
import os
import cv2 #vision task package opencv-python
import shutil
import pandas as pd
import glob
import tensorflow as tf
from tensorflow.keras.preprocessing import image
import numpy as np
def load_transform_images(folder):
def load_sort_data(meta_filename = str, image_folder = str, output_folder = str):
"""[summary]
Args:
filename ([type]): [description]
meta_filename ([type], optional): [description]. Defaults to str.
image_folder ([type], optional): [description]. Defaults to str.
output_folder ([type], optional): [description]. Defaults to str.
Returns:
[type]: [description]
"""
images = [cv2.imread(file, flags=cv2.IMREAD_GRAYSCALE) for file in glob.glob("./data/"+ folder+"/*.jpg")]
return images
data_dir = os.getcwd() + "/data/"
dest_dir = data_dir + output_folder
metadata = pd.read_csv(data_dir + '/' + meta_filename)
labels = metadata['dx'].unique()
label_images = []
def transform(data):
flat = []
df = pd.DataFrame()
for i in labels:
if os.path.exists(dest_dir + str(i) + '/'):
shutil.rmtree(dest_dir + str(i) + '/')
os.mkdir(dest_dir + str(i) + '/')
sample = metadata[metadata['dx'] == i]['image_id']
label_images.extend(sample)
for id in label_images:
shutil.copyfile((data_dir + image_folder + '/' + id + '.jpg'), (dest_dir + i + '/' + id + '.jpg'))
label_images = []
for i,img in enumerate(data):
scale = (img.astype(np.float32) - 127.5)/127.5
scale = scale.reshape(1,-1)
df = df.append(pd.Series(scale[0]), ignore_index=True)
return metadata, dest_dir
return df
def transform(path, size = (300, 225)):
# create a list of images
img_list = [fn for fn in os.listdir(path) if fn.endswith('.jpg')]
#iterating over each .jpg
for fn in img_list:
fp = path + '/' + fn
current_image = image.load_img(fp, target_size = size,
color_mode = 'grayscale')
# covert image to a matrix
img_ts = image.img_to_array(current_image)
# turn that into a vector / 1D array
img_ts = [img_ts.ravel()]
try:
# concatenate different images
full_mat = np.concatenate((full_mat, img_ts))
except UnboundLocalError:
# if not assigned yet, assign one
full_mat = img_ts
return full_mat
# def batch_data(data):
Source diff could not be displayed: it is too large. Options to address this: view the blob.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment