diff --git a/DataIngestion.py b/DataIngestion.py
new file mode 100644
index 0000000000000000000000000000000000000000..7898617fc58d9af637f304fed1f8abaf47af6b2d
--- /dev/null
+++ b/DataIngestion.py
@@ -0,0 +1,66 @@
+import pandas as pd
+import numpy as np
+import os
+from sklearn.model_selection import train_test_split
+
+class Load:
+    def __init__(self, fileName):
+        self.fileName = fileName
+
+    def loadCVS(filename):
+        data = pd.read_csv("filename.csv") 
+        return data 
+    
+    def loadJSON(filename):
+        data = pd.read_json('data.json')
+        return data
+
+class Split:
+    def __init__(self, fileName):
+        self.fileName = fileName
+    
+    def splitIntoTrainandTestData(df):
+        X = df.iloc[:, :-1]
+        y = df.iloc[:, -1]
+        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=0)
+        return X_train, X_test, y_train, y_test 
+
+    
+    def allowUserToSelectSetsOfData(df):
+        return df 
+
+class Validation:
+    def __init__(self, fileName):
+        self.fileName = fileName
+    
+    def checkFileType(fileName):
+        if not (os.path.splitext(fileName)[1] == ".cvs" or os.path.splitext(fileName)[1] == ".json"):
+            return False
+        return True 
+    
+    def checkNullValues(data):
+        for x in range(len(data)):
+            for y in range(len(data)):
+                if data[x][y] == " ":
+                    return False
+        return True
+    
+    def checkFeatureNames(df):
+        for col_name in df.columns: 
+            print(col_name)
+    
+    def checkInfValues(df):
+        count = np.isinf(df).values.sum()
+        if count > 0:
+            return False
+        return True
+
+
+class DataIngestion:
+    def __init__(self, load,split,validation):
+        self.load = load
+        self.split = split        
+        self.validation = validation
+    
+
+