Skip to content
Snippets Groups Projects
Commit 845b690e authored by Abdullah Shah's avatar Abdullah Shah
Browse files

made edits and modifications to for visuals to update for both logit and...

made edits and modifications to for visuals to update for both logit and decision trees, this is not finalized
parent 546a0cfb
Branches
No related tags found
No related merge requests found
Pipeline #1806 passed
......@@ -2,10 +2,11 @@ import numpy as np
# import pandas as pd
from synthetic import *
from data_vis import *
#from data_vis import decisionTreemodel
from data_vis import vis_synthetic
from data_vis import decisionTreemodel
from data_vis import logisticRegressionmodel
import config as config
# from modelling import *
from modelling import *
from bokeh.io import curdoc
from bokeh.models import ColumnDataSource, Row
......@@ -18,6 +19,12 @@ data = SyntheticData(shuffle=False)
config.X_train, config.X_test, config.y_train, config.y_test = data.generator()
#instantiate model here.
#finalModel = Models()
testModel = decisionTreemodel(config.X_train, config.X_test, config.y_train, config.y_test, 9)
#testModel1 = logisticRegressionModel(config.X_train, config.X_test, config.y_train, config.y_test, 9)
#fit model using fit method on the model class
......@@ -30,5 +37,5 @@ config.source = ColumnDataSource(dict(x=config.X_train[:,0], y=config.X_train[:,
b = vis_synthetic()
curdoc().add_root(Row(config.inputs, b)) #, text_input_widget, stext_output2))
curdoc().add_root(Row(config.inputs, b, testModel)) #, text_input_widget, stext_output2))
curdoc().title = "Decision Tree"
\ No newline at end of file
......@@ -35,10 +35,15 @@ regularization_names = [
"elasticnet" #both l1 and l2 combined
]
regularization_names_liblinear = [
"l2",
"l1"
]
solver_names = [
"newton-cg", #dependent on l2 or none regularization
"lbfgs", #dependent on l1 or none regularization
"libliniear", #dependent on l1 or l2 regularization
"liblinear", #dependent on l1 or l2 regularization
"sag", #dependent on l2 or none regularization
"saga" #dependent on elasticnet, l1, l2, or none regularization
]
......@@ -95,6 +100,11 @@ normalization_select = Select(value='none',
width=200,
options=regularization_names)
normalization_select_liblinear = Select(value='none',
title='Select Regularization',
width=200,
options=regularization_names_liblinear)
solver_select = Select(value = "newton-cg",
title='Select Solver',
width=200,
......@@ -116,5 +126,5 @@ inputs = Column(selects,
classes_slider,
inf_slider,
features_slider,
data_split_slider)
data_split_slider, models_select, solver_select, normalization_select, normalization_select_liblinear, index_slider)
\ No newline at end of file
......@@ -4,8 +4,9 @@ This is a docstrings for datavis
from bokeh.models import Scatter
from bokeh.plotting import figure
import config as config
# import pandas as pd
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
# import numpy as np
# import math
# import matplotlib.pyplot as plt
......@@ -16,9 +17,8 @@ import config as config
# from sklearn.model_selection import train_test_split
# lime modules
# from lime import submodular_pick
# import lime
# from lime.lime_tabular import LimeTabularExplainer
import lime
from lime.lime_tabular import LimeTabularExplainer
# from lime import submodular_pick
def vis_synthetic():
......@@ -36,6 +36,106 @@ def vis_synthetic():
return b
def logisticRegressionmodel(X_train, X_test, Y_train, Y_test, record_value):
"""This is a docstring for decisionTreeVisuals
Returns:
_type_: _description_
"""
# instantiating model
model_logreg = LogisticRegression(random_state=7)
# fit model on training set
model_logreg.fit(X_train, Y_train)
# grabbing unique class names
class_names=model_logreg.classes_
# grabbing specific row for model to use to make prediction
#ex_specie = X_test[3, ]
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y_train,
class_names=class_names, discretize_continuous=True)
# grab count of columns from feature matrix
featureCount = len(X_train)
# explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
exp = explainer.explain_instance(X_test[record_value],model_logreg.predict_proba,num_features=featureCount)
# converting explainations as list
tupleTest = exp.as_list()
# converting tuples to list
NewList = [list(x) for x in tupleTest]
# separating comparisons from feature scores
Labels1 = [item[0] for item in NewList]
# separating feature scores from comparisons
featureNums = [item[1] for item in NewList]
p = figure(x_range = Labels1, plot_height = 400, title = "Feature Importance Scores")
p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
return p
def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
"""This is a docstring for decisionTreeVisuals
Returns:
_type_: _description_
"""
# instantiating model
model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
# fit model on training set
model_logreg.fit(X_train, Y_train)
# grabbing unique class names
class_names=model_logreg.classes_
# grabbing specific row for model to use to make prediction
#ex_specie = X_test[3, ]
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y_train,
class_names=class_names, discretize_continuous=True)
# grab count of columns from feature matrix
featureCount = len(X_train)
# explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
exp = explainer.explain_instance(X_test[record_value],model_logreg.predict_proba,num_features=featureCount)
# converting explainations as list
tupleTest = exp.as_list()
# converting tuples to list
NewList = [list(x) for x in tupleTest]
# separating comparisons from feature scores
Labels1 = [item[0] for item in NewList]
# separating feature scores from comparisons
featureNums = [item[1] for item in NewList]
p = figure(x_range = Labels1, plot_height = 400, title = "Feature Importance Scores")
p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
return p
'''
def decisionTreemodel():
"""This is a docstring for decisionTreeVisuals
......
......@@ -3,6 +3,8 @@ This is a docstrings for decisionTreeVisuals
"""
import config as config
import pandas as pd
from data_vis import vis_synthetic
from synthetic import *
# import numpy as np
# import math
# import matplotlib.pyplot as plt
......@@ -15,6 +17,9 @@ from sklearn.linear_model import LogisticRegression
# from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from synthetic import update_samples_or_dataset
from bokeh.models import Select, Slider, Row, Column
from data_vis import decisionTreemodel, logisticRegressionmodel
from bokeh.io import curdoc
# bokeh libraries/modules
# from bokeh.io import output_file, show
......@@ -67,15 +72,84 @@ def model_callback(attrname, old, new):
new (_type_): _description_
"""
#controls parameters when solver updates, add more elif statements as needed.
if config.solver_select.value == "newton-cg" or config.solver_select.value == "sag", or config.solver_select.value == "lbfgs":
solver = "l2"
# if config.models_select.value == "Logistic Regression":
if config.models_select.value == "Logistic Regression" and (config.solver_select.value == "newton-cg" or config.solver_select.value == "sag" or config.solver_select.value == "lbfgs") and (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
# solver = "l2"
# config.normalization_select.value = "l2"
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
s = int(config.index_slider.value)
testModel = logisticRegressionmodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
normalization_selected = "l2"
config.normalization_select.value = normalization_selected
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
# multilabel classification not working
elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == "Make Classification" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
s = int(config.index_slider.value)
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
testModel = decisionTreemodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
#normalization_selected = "l1", "l2"
#config.normalization_select.value = list(normalization_selected.values)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
elif config.models_select.value == "Logistic Regression" and (config.solver_select.value == "liblinear") and (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
s = int(config.index_slider.value)
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select_liblinear, config.index_slider)
testModel = logisticRegressionmodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
#normalization_selected = "l1", "l2"
#config.normalization_select.value = list(normalization_selected.values)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
#if config.models_select.value == "Logistic Regression":
'''
if config.model_names.value == 'Decision Tree':
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
s = int(config.index_slider.value)
testModel = decisionTreemodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
'''
#control statements for selecting the appropriate dataset needs to:
# 1. update the dataset ( this will automatically update the layout)
#have to figure out how to update the dataset from here.
#probably need to update the selector to only show binary datasets. So make 2 separate dataset selectors: binary and multiclass
# add more call backs for each selector
#config.models_select.on_change('value', model_callback)
config.dataset_select.on_change('value', model_callback)
config.solver_select.on_change('value', model_callback)
config.normalization_select.on_change('value', model_callback)
config.index_slider.on_change('value', model_callback)
#config.index_slider.on_change('value', model_callback)
'''
def decisionTreemodel(X, Y, indexValue):
"""This is a docstring for decisionTreeVisuals
......@@ -163,6 +237,5 @@ def decisionTreemodel(X, Y, indexValue):
#return (Labels1, featureNums)
#return dfBokehChart
return p
'''
# add more call backs for each selector
config.models_select.on_change('value', model_callback)
......@@ -213,21 +213,28 @@ def update_layout(attrname, old, new):
new (_type_): _description_
"""
if config.dataset_select.value == 'Blobs' or config.dataset_select.value == 'Multilabel Classification':
inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.data_split_slider)
inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b))
elif config.dataset_select.value == 'Make Classification':
inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.inf_slider, config.data_split_slider)
inputs = Column(config.selects, config.samples_slider, config.classes_slider, config.features_slider, config.inf_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b))
elif config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons':
inputs = Column(config.selects, config.samples_slider, config.data_split_slider)
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs,b))
elif (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons') and (config.solver_select.value == "liblinear"):
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select_liblinear, config.index_slider)
b = vis_synthetic()
curdoc().clear()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment