Skip to content
Snippets Groups Projects
Commit 56f9810a authored by Abdullah Shah's avatar Abdullah Shah
Browse files

final commit

parent 18e9dbdc
No related branches found
No related tags found
No related merge requests found
......@@ -3,10 +3,9 @@ This is a docstring for config.
"""
from bokeh.models import Select, Slider, Row, Column, Dropdown, Paragraph
from bokeh.layouts import column, row
#from src.data_vis import *t
# x = 0
# y = 0
X_train = 0
X_test = 0
y_train = 0
......
......@@ -103,8 +103,6 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
# grabbing unique class names
class_names=model_logreg.classes_
# grabbing specific row for model to use to make prediction
#ex_specie = X_test[3, ]
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
......@@ -135,66 +133,3 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
return p
'''
def decisionTreemodel():
"""This is a docstring for decisionTreeVisuals
Returns:
_type_: _description_
"""
# loading data
df = pd.read_csv("src/Iris.csv")
# data split to features matrix and target vector
# df stands for the dataframe
# feature matrix
X = df.iloc[:,0:-1]
# target vector
Y = df.iloc[:,-1:]
# splitting data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
# instantiating model
model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
# fit model on training set
model_logreg.fit(X_train, Y_train)
# grabbing unique class names
class_names=model_logreg.classes_
# grabbing specific row for model to use to make prediction
ex_specie = np.array(X_test.iloc[3]).reshape(1,-1)
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns,
class_names=class_names, discretize_continuous=True)
# grab count of columns from feature matrix
featureCount = len(X.columns)
# explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
exp = explainer.explain_instance(X_test.iloc[3],model_logreg.predict_proba,num_features=featureCount,top_labels=1)
# converting explainations as list
tupleTest = exp.as_list()
# converting tuples to list
NewList = [list(x) for x in tupleTest]
# converting all elements in list to strings
doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
# print(doubleStrList[0][0])
return doubleStrList[0][0]
#df = pd.read_csv("src/Iris.csv")
decisionTreemodel()
'''
......@@ -5,10 +5,7 @@ import config as config
import pandas as pd
from data_vis import vis_synthetic
from synthetic import *
# import numpy as np
# import math
# import matplotlib.pyplot as plt
# import seaborn as sns
# sklearn ML libraries/modules
# from sklearn import preprocessing
......@@ -21,13 +18,6 @@ from bokeh.models import Select, Slider, Row, Column
from data_vis import decisionTreemodel, logisticRegressionmodel
from bokeh.io import curdoc
# bokeh libraries/modules
# from bokeh.io import output_file, show
# from bokeh.layouts import widgetbox
#from bokeh.models.Column import widgetbox
# from bokeh.models.widgets import Div
# from bokeh.models.widgets import Paragraph
# from bokeh.models.widgets import PreText
# lime modules
from lime import submodular_pick
......@@ -58,10 +48,6 @@ class Models:
return DecisionTreeClassifier()
# def fit_to_model(self):
#here in the fit method you'll need to use the config.X_train, y_test
def model_callback(attrname, old, new):
"""Callback function that updates models with appropriate data and updates on
slider/selector changes.
......@@ -71,13 +57,10 @@ def model_callback(attrname, old, new):
old (_type_): _description_
new (_type_): _description_
"""
#controls parameters when solver updates, add more elif statements as needed.
# if config.models_select.value == "Logistic Regression":
if config.models_select.value == "Logistic Regression" and (config.solver_select.value == "newton-cg" or config.solver_select.value == "sag" or config.solver_select.value == "lbfgs") and (config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
# solver = "l2"
# config.normalization_select.value = "l2"
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
s = int(config.index_slider.value)
testModel = logisticRegressionmodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
......@@ -88,14 +71,11 @@ def model_callback(attrname, old, new):
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
# multilabel classification not working
elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == "Make Classification" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
elif config.models_select.value == "Decision Tree" and (config.dataset_select.value == "Make Classification" or config.dataset_select.value == "Blobs" or config.dataset_select.value == 'Noisy Circles' or config.dataset_select.value == 'Noisy Moons'):
s = int(config.index_slider.value)
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
testModel = decisionTreemodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
#normalization_selected = "l1", "l2"
#config.normalization_select.value = list(normalization_selected.values)
b = vis_synthetic()
curdoc().clear()
......@@ -105,37 +85,13 @@ def model_callback(attrname, old, new):
s = int(config.index_slider.value)
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select_liblinear, config.index_slider)
testModel = logisticRegressionmodel(config.X_train, config.X_test, config.y_train, config.y_test, s)
#normalization_selected = "l1", "l2"
#config.normalization_select.value = list(normalization_selected.values)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
#if config.models_select.value == "Logistic Regression":
'''
if config.model_names.value == 'Decision Tree':
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
s = int(config.index_slider.value)
testModel = decisionTreemodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
b = vis_synthetic()
curdoc().clear()
curdoc().add_root(Row(inputs, b, testModel))
'''
#control statements for selecting the appropriate dataset needs to:
# 1. update the dataset ( this will automatically update the layout)
#have to figure out how to update the dataset from here.
#probably need to update the selector to only show binary datasets. So make 2 separate dataset selectors: binary and multiclass
# add more call backs for each selector
#config.models_select.on_change('value', model_callback)
config.models_select.on_change('value', model_callback)
config.dataset_select.on_change('value', model_callback)
......@@ -146,96 +102,3 @@ config.normalization_select.on_change('value', model_callback)
config.index_slider.on_change('value', model_callback)
#config.index_slider.on_change('value', model_callback)
'''
def decisionTreemodel(X, Y, indexValue):
"""This is a docstring for decisionTreeVisuals
Returns:
_type_: _description_
"""
# loading data
#df = pd.read_csv("Iris.csv")
# data split to features matrix and target vector
# df stands for the dataframe
# feature matrix
#X = df.iloc[:,0:-1]
# target vector
#Y = df.iloc[:,-1:]
# splitting data into training and test sets
#not needed
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
# instantiating model
model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
# fit model on training set
model_logreg.fit(X_train, Y_train)
# grabbing unique class names
class_names=model_logreg.classes_
# grabbing specific row for model to use to make prediction
#ex_specie = X_test[3, ]
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y,
class_names=class_names, discretize_continuous=True)
# grab count of columns from feature matrix
featureCount = len(X)
# explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
exp = explainer.explain_instance(X_test[indexValue],model_logreg.predict_proba,num_features=featureCount)
# converting explainations as list
tupleTest = exp.as_list()
# converting tuples to list
NewList = [list(x) for x in tupleTest]
# converting all elements in list to strings
doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
doublestring = ",\n ".join([' '.join([str(c) for c in lst]) for lst in NewList])
# separating comparisons from feature scores
Labels1 = [item[0] for item in NewList]
# outlst = " ".join([' '.join([str(c) for c in lst]) for lst in doubleStrList])
# separating feature scores from comparisons
featureNums = [item[1] for item in NewList]
# grabbing count of number of features to determing number of x axis ticks in the chart
count = 0
newList = []
for i in featureNums:
count += 1
newList.append(count)
#p = figure(width=400, height=400)
#fig = plt.bar(newList, featureNums, align='center')
#plt.xticks(newList, Labels1)
#plt.xticks(rotation=60, ha='right')
#plt.title("Feature Importance graph")
#plt.show()
#print(doubleStrList[0][0])
#return Labels1#, doubleStrList[0][1], doubleStrList[0][2], doubleStrList[0][3]
#test = ', \n'.join([i for i in Labels1[0:]])
dfBokehChart = pd.DataFrame(list(zip(Labels1, featureNums)), columns =['Features', 'FeatureNumbers'])
p = figure(x_range = Labels1, plot_height = 400, title = "Feature Importance Scores")
p.vbar(x = Labels1, top = featureNums, width = 0.5, color = "#fc8d59")
#return (Labels1, featureNums)
#return dfBokehChart
return p
'''
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment