diff --git a/src/config.py b/src/config.py
index e9fc1f7926bfc52ddf8b50eb1d67b6c721fe63a7..03afb9e9b7ffea2f48a6a2727940b8c6d8babf52 100644
--- a/src/config.py
+++ b/src/config.py
@@ -3,10 +3,9 @@ This is a docstring for config.
"""
from bokeh.models import Select, Slider, Row, Column, Dropdown, Paragraph
from bokeh.layouts import column, row
-#from src.data_vis import *t
-# x = 0
-# y = 0
+
+
X_train = 0
X_test = 0
y_train = 0
diff --git a/src/data_vis.py b/src/data_vis.py
index 373ef3d793d6918df664ebfa295d18a6c8174122..9f49c3dcf99ea0ee836fb5ca411e0d5ea19d768a 100644
--- a/src/data_vis.py
+++ b/src/data_vis.py
@@ -103,8 +103,6 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
# grabbing unique class names
class_names=model_logreg.classes_
- # grabbing specific row for model to use to make prediction
- #ex_specie = X_test[3, ]
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
@@ -135,66 +133,3 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
return p
-
-'''
-def decisionTreemodel():
- """This is a docstring for decisionTreeVisuals
-
- Returns:
- _type_: _description_
- """
- # loading data
- df = pd.read_csv("src/Iris.csv")
-
- # data split to features matrix and target vector
- # df stands for the dataframe
- # feature matrix
- X = df.iloc[:,0:-1]
- # target vector
- Y = df.iloc[:,-1:]
-
- # splitting data into training and test sets
- X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
-
- # instantiating model
- model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
-
- # fit model on training set
- model_logreg.fit(X_train, Y_train)
-
- # grabbing unique class names
- class_names=model_logreg.classes_
-
- # grabbing specific row for model to use to make prediction
- ex_specie = np.array(X_test.iloc[3]).reshape(1,-1)
-
- # lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
- # LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
-
- explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns,
- class_names=class_names, discretize_continuous=True)
-
- # grab count of columns from feature matrix
- featureCount = len(X.columns)
-
- # explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
- exp = explainer.explain_instance(X_test.iloc[3],model_logreg.predict_proba,num_features=featureCount,top_labels=1)
-
- # converting explainations as list
- tupleTest = exp.as_list()
-
- # converting tuples to list
- NewList = [list(x) for x in tupleTest]
- # converting all elements in list to strings
- doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
-
-
- # print(doubleStrList[0][0])
- return doubleStrList[0][0]
-
-
-
-#df = pd.read_csv("src/Iris.csv")
-decisionTreemodel()
-
-'''
diff --git a/src/modelling.py b/src/modelling.py
index c5cb2f712266e05db67431540aeda58f42b337e5..72f999ac1aeb41c12c9c6ab8f19a7867c26663aa 100644
--- a/src/modelling.py
+++ b/src/modelling.py
@@ -5,10 +5,7 @@ import config as config
import pandas as pd
from data_vis import vis_synthetic
from synthetic import *
-# import numpy as np
-# import math
-# import matplotlib.pyplot as plt
-# import seaborn as sns
+
# sklearn ML libraries/modules
# from sklearn import preprocessing
@@ -21,13 +18,6 @@ from bokeh.models import Select, Slider, Row, Column
from data_vis import decisionTreemodel, logisticRegressionmodel
from bokeh.io import curdoc
-# bokeh libraries/modules
-# from bokeh.io import output_file, show
-# from bokeh.layouts import widgetbox
-#from bokeh.models.Column import widgetbox
-# from bokeh.models.widgets import Div
-# from bokeh.models.widgets import Paragraph
-# from bokeh.models.widgets import PreText
# lime modules
from lime import submodular_pick
@@ -57,10 +47,6 @@ class Models:
elif self.model == "Decision Tree":
return DecisionTreeClassifier()
-
- # def fit_to_model(self):
- #here in the fit method you'll need to use the config.X_train, y_test
-
def model_callback(attrname, old, new):
"""Callback function that updates models with appropriate data and updates on
@@ -71,13 +57,10 @@ def model_callback(attrname, old, new):
old (_type_): _description_
new (_type_): _description_
"""
- #controls parameters when solver updates, add more elif statements as needed.
-
- # if config.models_select.value == "Logistic Regression":
+
if config.models_select.value == "Logistic Regression" and (config.solver_select.value == "newton-cg" or config.solver_select.value == "sag" or config.solver_select.value == "lbfgs") and (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
- # solver = "l2"
- # config.normalization_select.value = "l2"
+
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
s = int(config.index_slider.value)
testModel = logisticRegressionmodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
@@ -88,14 +71,11 @@ def model_callback(attrname, old, new):
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
-# multilabel classification not working
- elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == "Make Classification" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
+ elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
s = int(config.index_slider.value)
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
testModel = decisionTreemodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
- #normalization_selected = "l1", "l2"
- #config.normalization_select.value = list(normalization_selected.values)
b = vis_synthetic()
curdoc().clear()
@@ -105,37 +85,13 @@ def model_callback(attrname, old, new):
s = int(config.index_slider.value)
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select_liblinear, config.index_slider)
testModel = logisticRegressionmodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
- #normalization_selected = "l1", "l2"
- #config.normalization_select.value = list(normalization_selected.values)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
- #if config.models_select.value == "Logistic Regression":
-
- '''
-
- if config.model_names.value == 'Decision Tree':
- inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
- s = int(config.index_slider.value)
- testModel = decisionTreemodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
- b = vis_synthetic()
-
- curdoc().clear()
- curdoc().add_root(Row(inputs, b, testModel))
- '''
-
-
-
-
- #control statements for selecting the appropriate dataset needs to:
- # 1. update the dataset ( this will automatically update the layout)
- #have to figure out how to update the dataset from here.
- #probably need to update the selector to only show binary datasets. So make 2 separate dataset selectors: binary and multiclass
-
-# add more call backs for each selector
-#config.models_select.on_change('value', model_callback)
+
+config.models_select.on_change('value', model_callback)
config.dataset_select.on_change('value', model_callback)
@@ -146,96 +102,3 @@ config.normalization_select.on_change('value', model_callback)
config.index_slider.on_change('value', model_callback)
-
-#config.index_slider.on_change('value', model_callback)
-
-'''
-def decisionTreemodel(X, Y, indexValue):
- """This is a docstring for decisionTreeVisuals
-
- Returns:
- _type_: _description_
- """
- # loading data
- #df = pd.read_csv("Iris.csv")
-
- # data split to features matrix and target vector
- # df stands for the dataframe
- # feature matrix
- #X = df.iloc[:,0:-1]
- # target vector
- #Y = df.iloc[:,-1:]
-
- # splitting data into training and test sets
- #not needed
- X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
-
- # instantiating model
- model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
-
- # fit model on training set
- model_logreg.fit(X_train, Y_train)
-
- # grabbing unique class names
- class_names=model_logreg.classes_
-
- # grabbing specific row for model to use to make prediction
- #ex_specie = X_test[3, ]
-
- # lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
- # LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
-
- explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y,
- class_names=class_names, discretize_continuous=True)
-
- # grab count of columns from feature matrix
- featureCount = len(X)
-
- # explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
- exp = explainer.explain_instance(X_test[indexValue],model_logreg.predict_proba,num_features=featureCount)
-
- # converting explainations as list
- tupleTest = exp.as_list()
-
- # converting tuples to list
- NewList = [list(x) for x in tupleTest]
- # converting all elements in list to strings
- doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
-
- doublestring = ",\n ".join([' '.join([str(c) for c in lst]) for lst in NewList])
-
- # separating comparisons from feature scores
- Labels1 = [item[0] for item in NewList]
- # outlst = " ".join([' '.join([str(c) for c in lst]) for lst in doubleStrList])
-
- # separating feature scores from comparisons
- featureNums = [item[1] for item in NewList]
-
- # grabbing count of number of features to determing number of x axis ticks in the chart
- count = 0
- newList = []
- for i in featureNums:
- count += 1
- newList.append(count)
- #p = figure(width=400, height=400)
-
-
- #fig = plt.bar(newList, featureNums, align='center')
- #plt.xticks(newList, Labels1)
- #plt.xticks(rotation=60, ha='right')
- #plt.title("Feature Importance graph")
- #plt.show()
-
- #print(doubleStrList[0][0])
- #return Labels1#, doubleStrList[0][1], doubleStrList[0][2], doubleStrList[0][3]
- #test = ', \n'.join([i for i in Labels1[0:]])
-
- dfBokehChart = pd.DataFrame(list(zip(Labels1, featureNums)), columns =['Features', 'FeatureNumbers'])
- p = figure(x_range = Labels1, plot_height = 400, title = "Feature Importance Scores")
- p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
-
- #return (Labels1, featureNums)
- #return dfBokehChart
- return p
-'''
-