Skip to content
Snippets Groups Projects
Commit 93601374 authored by ibe23's avatar ibe23
Browse files

75% Finished. Need to tweak classes and add comments.

parent 5059977d
No related branches found
No related tags found
No related merge requests found
Pipeline #1409 passed
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
class Load:
def __init__(self, fileName):
self.fileName = fileName
def loadCVS(filename):
data = pd.read_csv("filename.csv")
return data
def loadJSON(filename):
data = pd.read_json('data.json')
return data
class Split:
def __init__(self, fileName):
self.fileName = fileName
def splitIntoTrainandTestData(df):
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
return X_train, X_test, y_train, y_test
def allowUserToSelectSetsOfData(df):
return df
class Validation:
def __init__(self, fileName):
self.fileName = fileName
def checkFileType(fileName):
if not (os.path.splitext(fileName)[1] == ".cvs" or os.path.splitext(fileName)[1] == ".json"):
return False
return True
def checkNullValues(data):
for x in range(len(data)):
for y in range(len(data)):
if data[x][y] == " ":
return False
return True
def checkFeatureNames(df):
for col_name in df.columns:
print(col_name)
def checkInfValues(df):
count = np.isinf(df).values.sum()
if count > 0:
return False
return True
class DataIngestion:
def __init__(self, load,split,validation):
self.load = load
self.split = split
self.validation = validation
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment