final commit

56f9810a · Abdullah Shah · 18e9dbdc · 56f9810a · 56f9810a · 56f9810a
Commit 56f9810a authored 3 years ago by Abdullah Shah
--- a/src/config.py
+++ b/src/config.py
@@ -3,10 +3,9 @@ This is a docstring for config.
 """
 from bokeh.models import Select, Slider, Row, Column, Dropdown, Paragraph
 from bokeh.layouts import column, row
-#from src.data_vis import *t

-# x = 0 
-# y = 0
+
+
 X_train = 0
 X_test = 0
 y_train = 0

--- a/src/data_vis.py
+++ b/src/data_vis.py
@@ -103,8 +103,6 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
    # grabbing unique class names
    class_names=model_logreg.classes_

-    # grabbing specific row for model to use to make prediction
-    #ex_specie = X_test[3, ]

    # lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
    # LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
@@ -135,66 +133,3 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
    p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")

    return p
-
-'''
-def decisionTreemodel():
-    """This is a docstring for decisionTreeVisuals
-
-    Returns:
-        _type_: _description_
-    """
-    # loading data
-    df = pd.read_csv("src/Iris.csv")
-
-    # data split to features matrix and target vector
-    # df stands for the dataframe
-    # feature matrix
-    X = df.iloc[:,0:-1]
-    # target vector 
-    Y = df.iloc[:,-1:]
-
-    # splitting data into training and test sets
-    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
-
-    # instantiating model
-    model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
-
-    # fit model on training set
-    model_logreg.fit(X_train, Y_train)
-
-    # grabbing unique class names
-    class_names=model_logreg.classes_
-
-    # grabbing specific row for model to use to make prediction
-    ex_specie = np.array(X_test.iloc[3]).reshape(1,-1)
-
-    # lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
-    # LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
-
-    explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns, 
-                                                    class_names=class_names, discretize_continuous=True)
-
-    # grab count of columns from feature matrix
-    featureCount = len(X.columns)
-
-    # explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
-    exp = explainer.explain_instance(X_test.iloc[3],model_logreg.predict_proba,num_features=featureCount,top_labels=1)
-
-    # converting explainations as list
-    tupleTest = exp.as_list()
-
-    # converting tuples to list
-    NewList = [list(x) for x in tupleTest]
-    # converting all elements in list to strings
-    doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
-
-   
-   # print(doubleStrList[0][0])
-    return doubleStrList[0][0]
-
-    
-
-#df = pd.read_csv("src/Iris.csv")
-decisionTreemodel()
-
-'''
--- a/src/modelling.py
+++ b/src/modelling.py
@@ -5,10 +5,7 @@ import config as config
 import pandas as pd
 from data_vis import vis_synthetic
 from synthetic import *
-# import numpy as np
-# import math
-# import matplotlib.pyplot as plt
-# import seaborn as sns
+

 # sklearn ML libraries/modules
 # from sklearn import preprocessing
@@ -21,13 +18,6 @@ from bokeh.models import Select, Slider, Row, Column
 from data_vis import decisionTreemodel, logisticRegressionmodel
 from bokeh.io import curdoc

-# bokeh libraries/modules
-# from bokeh.io import output_file, show
-# from bokeh.layouts import widgetbox
-#from bokeh.models.Column import widgetbox
-# from bokeh.models.widgets import Div
-# from bokeh.models.widgets import Paragraph
-# from bokeh.models.widgets import PreText

 # lime modules
 from lime import submodular_pick
@@ -58,10 +48,6 @@ class Models:
            return DecisionTreeClassifier()
            

-    # def fit_to_model(self):
-    #here in the fit method you'll need to use the config.X_train, y_test
-        
-
 def model_callback(attrname, old, new): 
    """Callback function that updates models with appropriate data and updates on 
    slider/selector changes. 
@@ -71,13 +57,10 @@ def model_callback(attrname, old, new):
        old (_type_): _description_
        new (_type_): _description_
    """
-    #controls parameters when solver updates, add more elif statements as needed.
 
-   # if config.models_select.value == "Logistic Regression":

    if config.models_select.value == "Logistic Regression" and (config.solver_select.value == "newton-cg" or config.solver_select.value == "sag" or config.solver_select.value == "lbfgs") and (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
-    # solver = "l2"
-    # config.normalization_select.value = "l2"
+
        inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
        s = int(config.index_slider.value)
        testModel = logisticRegressionmodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
@@ -88,14 +71,11 @@ def model_callback(attrname, old, new):
        curdoc().clear()
        curdoc().add_root(Row(inputs, b, testModel))

-# multilabel classification not working
    
-    elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == "Make Classification" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
+    elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
        s = int(config.index_slider.value)
        inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
        testModel = decisionTreemodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
-        #normalization_selected =  "l1", "l2"
-        #config.normalization_select.value = list(normalization_selected.values)
        b = vis_synthetic()

        curdoc().clear()
@@ -105,37 +85,13 @@ def model_callback(attrname, old, new):
        s = int(config.index_slider.value)
        inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select_liblinear, config.index_slider)
        testModel = logisticRegressionmodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
-        #normalization_selected =  "l1", "l2"
-        #config.normalization_select.value = list(normalization_selected.values)
-        b = vis_synthetic()
-
-        curdoc().clear()
-        curdoc().add_root(Row(inputs, b, testModel))
-    #if config.models_select.value == "Logistic Regression":
-
-
-        '''
-
-        if config.model_names.value == 'Decision Tree':
-            inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
-            s = int(config.index_slider.value)
-            testModel = decisionTreemodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
        b = vis_synthetic()

        curdoc().clear()
        curdoc().add_root(Row(inputs, b, testModel))
-        '''
-
-        
-    

-    #control statements for selecting the appropriate dataset needs to:
-    # 1. update the dataset ( this will automatically update the layout)
-    #have to figure out how to update the dataset from here.
-    #probably need to update the selector to only show binary datasets. So make 2 separate dataset selectors: binary and multiclass 
  
-# add more call backs for each selector
-#config.models_select.on_change('value', model_callback)
+config.models_select.on_change('value', model_callback)

 config.dataset_select.on_change('value', model_callback)

@@ -146,96 +102,3 @@ config.normalization_select.on_change('value', model_callback)
 config.index_slider.on_change('value', model_callback)


-
-#config.index_slider.on_change('value', model_callback)
-
-'''
-def decisionTreemodel(X, Y, indexValue):
-    """This is a docstring for decisionTreeVisuals
-
-    Returns:
-        _type_: _description_
-    """
-    # loading data
-    #df = pd.read_csv("Iris.csv")
-
-    # data split to features matrix and target vector
-    # df stands for the dataframe
-    # feature matrix
-    #X = df.iloc[:,0:-1]
-    # target vector 
-    #Y = df.iloc[:,-1:]
-
-    # splitting data into training and test sets
-    #not needed
-    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
-
-    # instantiating model
-    model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
-
-    # fit model on training set
-    model_logreg.fit(X_train, Y_train)
-
-    # grabbing unique class names
-    class_names=model_logreg.classes_
-
-    # grabbing specific row for model to use to make prediction
-    #ex_specie = X_test[3, ]
-
-    # lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
-    # LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
-
-    explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y, 
-                                                    class_names=class_names, discretize_continuous=True)
-
-    # grab count of columns from feature matrix
-    featureCount = len(X)
-
-    # explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
-    exp = explainer.explain_instance(X_test[indexValue],model_logreg.predict_proba,num_features=featureCount)
-
-    # converting explainations as list
-    tupleTest = exp.as_list()
-
-    # converting tuples to list
-    NewList = [list(x) for x in tupleTest]
-    # converting all elements in list to strings
-    doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
-
-    doublestring = ",\n ".join([' '.join([str(c) for c in lst]) for lst in NewList])
-
-    # separating comparisons from feature scores
-    Labels1 = [item[0] for item in NewList]
-   # outlst = " ".join([' '.join([str(c) for c in lst]) for lst in doubleStrList])
-
-   # separating feature scores from comparisons
-    featureNums = [item[1] for item in NewList]
-
-    # grabbing count of number of features to determing number of x axis ticks in the chart
-    count = 0
-    newList = []
-    for i in featureNums:
-        count += 1
-        newList.append(count)
-    #p = figure(width=400, height=400)
-
-
-    #fig = plt.bar(newList, featureNums, align='center')
-    #plt.xticks(newList, Labels1)
-    #plt.xticks(rotation=60, ha='right')
-    #plt.title("Feature Importance graph")
-    #plt.show()
-   
-    #print(doubleStrList[0][0])
-    #return Labels1#, doubleStrList[0][1], doubleStrList[0][2], doubleStrList[0][3]
-    #test = ', \n'.join([i for i in Labels1[0:]])
-
-    dfBokehChart = pd.DataFrame(list(zip(Labels1, featureNums)), columns =['Features', 'FeatureNumbers'])
-    p = figure(x_range = Labels1, plot_height = 400, title = "Feature Importance Scores")
-    p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
-
-    #return (Labels1, featureNums)
-    #return dfBokehChart
-    return p
-'''
-