diff --git a/examples/decision_tree/main.py b/examples/decision_tree/main.py
index 03f935f1f915c5a39f1477433fda73f68de59d29..d7922e682c4f0c510cc14d463c8ca522d855a878 100644
--- a/examples/decision_tree/main.py
+++ b/examples/decision_tree/main.py
@@ -2,10 +2,11 @@ import numpy as np
# import pandas as pd
from synthetic import *
-from data_vis import *
-#from data_vis import decisionTreemodel
+from data_vis import vis_synthetic
+from data_vis import decisionTreemodel
+from data_vis import logisticRegressionmodel
import config as config
-# from modelling import *
+from modelling import *
from bokeh.io import curdoc
from bokeh.models import ColumnDataSource, Row
@@ -18,6 +19,12 @@ data = SyntheticData(shuffle=False)
config.X_train, config.X_test, config.y_train, config.y_test = data.generator()
#instantiate model here.
+#finalModel = Models()
+
+
+testModel = decisionTreemodel(config.X_train, config.X_test, config.y_train, config.y_test, 9)
+
+#testModel1 = logisticRegressionModel(config.X_train, config.X_test, config.y_train, config.y_test, 9)
#fit model using fit method on the model class
@@ -30,5 +37,5 @@ config.source = ColumnDataSource(dict(x=config.X_train[:,0], y=config.X_train[:,
b = vis_synthetic()
-curdoc().add_root(Row(config.inputs, b)) #, text_input_widget, stext_output2))
+curdoc().add_root(Row(config.inputs, b, testModel)) #, text_input_widget, stext_output2))
curdoc().title = "Decision Tree"
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
index da76ac122e9c10cadfca90ab60363cfff572ae72..e9fc1f7926bfc52ddf8b50eb1d67b6c721fe63a7 100644
--- a/src/config.py
+++ b/src/config.py
@@ -35,10 +35,15 @@ regularization_names = [
"elasticnet" #both l1 and l2 combined
]
+regularization_names_liblinear = [
+ "l2",
+ "l1"
+]
+
solver_names = [
"newton-cg", #dependent on l2 or none regularization
"lbfgs", #dependent on l1 or none regularization
- "libliniear", #dependent on l1 or l2 regularization
+ "liblinear", #dependent on l1 or l2 regularization
"sag", #dependent on l2 or none regularization
"saga" #dependent on elasticnet, l1, l2, or none regularization
]
@@ -95,6 +100,11 @@ normalization_select = Select(value='none',
width=200,
options=regularization_names)
+normalization_select_liblinear = Select(value='none',
+ title='Select Regularization',
+ width=200,
+ options=regularization_names_liblinear)
+
solver_select = Select(value = "newton-cg",
title='Select Solver',
width=200,
@@ -116,5 +126,5 @@ inputs = Column(selects,
classes_slider,
inf_slider,
features_slider,
- data_split_slider)
+ data_split_slider, models_select, solver_select, normalization_select, normalization_select_liblinear, index_slider)
\ No newline at end of file
diff --git a/src/data_vis.py b/src/data_vis.py
index 4cfe9dacfdc5a3ded9f9ded9fbcff414a7cb4284..373ef3d793d6918df664ebfa295d18a6c8174122 100644
--- a/src/data_vis.py
+++ b/src/data_vis.py
@@ -3,9 +3,10 @@ This is a docstrings for datavis
"""
from bokeh.models import Scatter
from bokeh.plotting import figure
-import config as config
-
-# import pandas as pd
+import config as config
+import pandas as pd
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.linear_model import LogisticRegression
# import numpy as np
# import math
# import matplotlib.pyplot as plt
@@ -16,9 +17,8 @@ import config as config
# from sklearn.model_selection import train_test_split
# lime modules
-# from lime import submodular_pick
-# import lime
-# from lime.lime_tabular import LimeTabularExplainer
+import lime
+from lime.lime_tabular import LimeTabularExplainer
# from lime import submodular_pick
def vis_synthetic():
@@ -36,6 +36,106 @@ def vis_synthetic():
return b
+
+def logisticRegressionmodel(X_train, X_test, Y_train, Y_test, record_value):
+ """This is a docstring for decisionTreeVisuals
+
+ Returns:
+ _type_: _description_
+ """
+
+ # instantiating model
+ model_logreg = LogisticRegression(random_state=7)
+
+ # fit model on training set
+ model_logreg.fit(X_train, Y_train)
+
+ # grabbing unique class names
+ class_names=model_logreg.classes_
+
+ # grabbing specific row for model to use to make prediction
+ #ex_specie = X_test[3, ]
+
+ # lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
+ # LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
+
+ explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y_train,
+ class_names=class_names, discretize_continuous=True)
+
+ # grab count of columns from feature matrix
+ featureCount = len(X_train)
+
+ # explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
+ exp = explainer.explain_instance(X_test[record_value],model_logreg.predict_proba,num_features=featureCount)
+
+ # converting explainations as list
+ tupleTest = exp.as_list()
+
+ # converting tuples to list
+ NewList = [list(x) for x in tupleTest]
+
+ # separating comparisons from feature scores
+ Labels1 = [item[0] for item in NewList]
+
+ # separating feature scores from comparisons
+ featureNums = [item[1] for item in NewList]
+
+
+ p = figure(x_range = Labels1, plot_height = 400, title = "Feature Importance Scores")
+ p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
+
+ return p
+
+
+def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
+ """This is a docstring for decisionTreeVisuals
+
+ Returns:
+ _type_: _description_
+ """
+
+ # instantiating model
+ model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
+
+ # fit model on training set
+ model_logreg.fit(X_train, Y_train)
+
+ # grabbing unique class names
+ class_names=model_logreg.classes_
+
+ # grabbing specific row for model to use to make prediction
+ #ex_specie = X_test[3, ]
+
+ # lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
+ # LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
+
+ explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y_train,
+ class_names=class_names, discretize_continuous=True)
+
+ # grab count of columns from feature matrix
+ featureCount = len(X_train)
+
+ # explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
+ exp = explainer.explain_instance(X_test[record_value],model_logreg.predict_proba,num_features=featureCount)
+
+ # converting explainations as list
+ tupleTest = exp.as_list()
+
+ # converting tuples to list
+ NewList = [list(x) for x in tupleTest]
+
+ # separating comparisons from feature scores
+ Labels1 = [item[0] for item in NewList]
+
+ # separating feature scores from comparisons
+ featureNums = [item[1] for item in NewList]
+
+
+ p = figure(x_range = Labels1, plot_height = 400, title = "Feature Importance Scores")
+ p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
+
+ return p
+
'''
def decisionTreemodel():
"""This is a docstring for decisionTreeVisuals
diff --git a/src/modelling.py b/src/modelling.py
index 3771f038bcb1422b416ac811de8ca4d205d23452..c5cb2f712266e05db67431540aeda58f42b337e5 100644
--- a/src/modelling.py
+++ b/src/modelling.py
@@ -3,6 +3,8 @@ This is a docstrings for decisionTreeVisuals
"""
import config as config
import pandas as pd
+from data_vis import vis_synthetic
+from synthetic import *
# import numpy as np
# import math
# import matplotlib.pyplot as plt
@@ -15,6 +17,9 @@ from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from synthetic import update_samples_or_dataset
+from bokeh.models import Select, Slider, Row, Column
+from data_vis import decisionTreemodel, logisticRegressionmodel
+from bokeh.io import curdoc
# bokeh libraries/modules
# from bokeh.io import output_file, show
@@ -67,15 +72,84 @@ def model_callback(attrname, old, new):
new (_type_): _description_
"""
#controls parameters when solver updates, add more elif statements as needed.
- if config.solver_select.value == "newton-cg" or config.solver_select.value == "sag", or config.solver_select.value == "lbfgs":
- solver = "l2"
+
+ # if config.models_select.value == "Logistic Regression":
+
+ if config.models_select.value == "Logistic Regression" and (config.solver_select.value == "newton-cg" or config.solver_select.value == "sag" or config.solver_select.value == "lbfgs") and (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
+ # solver = "l2"
+ # config.normalization_select.value = "l2"
+ inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
+ s = int(config.index_slider.value)
+ testModel = logisticRegressionmodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
+ normalization_selected = "l2"
+ config.normalization_select.value = normalization_selected
+ b = vis_synthetic()
+
+ curdoc().clear()
+ curdoc().add_root(Row(inputs, b, testModel))
+
+# multilabel classification not working
+
+ elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == "Make Classification" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
+ s = int(config.index_slider.value)
+ inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
+ testModel = decisionTreemodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
+ #normalization_selected = "l1", "l2"
+ #config.normalization_select.value = list(normalization_selected.values)
+ b = vis_synthetic()
+
+ curdoc().clear()
+ curdoc().add_root(Row(inputs, b, testModel))
+
+ elif config.models_select.value == "Logistic Regression" and (config.solver_select.value == "liblinear") and (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
+ s = int(config.index_slider.value)
+ inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select_liblinear, config.index_slider)
+ testModel = logisticRegressionmodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
+ #normalization_selected = "l1", "l2"
+ #config.normalization_select.value = list(normalization_selected.values)
+ b = vis_synthetic()
+
+ curdoc().clear()
+ curdoc().add_root(Row(inputs, b, testModel))
+ #if config.models_select.value == "Logistic Regression":
+
+
+ '''
+
+ if config.model_names.value == 'Decision Tree':
+ inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
+ s = int(config.index_slider.value)
+ testModel = decisionTreemodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
+ b = vis_synthetic()
+
+ curdoc().clear()
+ curdoc().add_root(Row(inputs, b, testModel))
+ '''
+
+
+
#control statements for selecting the appropriate dataset needs to:
# 1. update the dataset ( this will automatically update the layout)
#have to figure out how to update the dataset from here.
#probably need to update the selector to only show binary datasets. So make 2 separate dataset selectors: binary and multiclass
+# add more call backs for each selector
+#config.models_select.on_change('value', model_callback)
+config.dataset_select.on_change('value', model_callback)
+
+config.solver_select.on_change('value', model_callback)
+
+config.normalization_select.on_change('value', model_callback)
+
+config.index_slider.on_change('value', model_callback)
+
+
+
+#config.index_slider.on_change('value', model_callback)
+
+'''
def decisionTreemodel(X, Y, indexValue):
"""This is a docstring for decisionTreeVisuals
@@ -163,6 +237,5 @@ def decisionTreemodel(X, Y, indexValue):
#return (Labels1, featureNums)
#return dfBokehChart
return p
+'''
-# add more call backs for each selector
-config.models_select.on_change('value', model_callback)
diff --git a/src/synthetic.py b/src/synthetic.py
index 414a4714fe3492e501af386bed4c2274cce2f62c..f2a57ce73822731f085f0e47d65df5b04620bc37 100644
--- a/src/synthetic.py
+++ b/src/synthetic.py
@@ -213,21 +213,28 @@ def update_layout(attrname, old, new):
new (_type_): _description_
"""
if config.dataset_select.value == 'Blobs' or config.dataset_select.value == 'Multilabel Classification':
- inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.data_split_slider)
+ inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b))
elif config.dataset_select.value == 'Make Classification':
- inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.inf_slider, config.data_split_slider)
+ inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.inf_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b))
elif config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons':
- inputs = Column(config.selects, config.samples_slider, config.data_split_slider)
+ inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
+ b = vis_synthetic()
+
+ curdoc().clear()
+ curdoc().add_root(Row(inputs,b))
+
+ elif (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons') and (config.solver_select.value == "liblinear"):
+ inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select_liblinear, config.index_slider)
b = vis_synthetic()
curdoc().clear()