Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
W
WHY Senior Project
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
pjm363 (Philip Monaco)
WHY Senior Project
Commits
56f9810a
Commit
56f9810a
authored
3 years ago
by
Abdullah Shah
Browse files
Options
Downloads
Patches
Plain Diff
final commit
parent
18e9dbdc
No related branches found
No related tags found
No related merge requests found
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
src/config.py
+2
-3
2 additions, 3 deletions
src/config.py
src/data_vis.py
+0
-65
0 additions, 65 deletions
src/data_vis.py
src/modelling.py
+6
-143
6 additions, 143 deletions
src/modelling.py
with
8 additions
and
211 deletions
src/config.py
+
2
−
3
View file @
56f9810a
...
...
@@ -3,10 +3,9 @@ This is a docstring for config.
"""
from
bokeh.models
import
Select
,
Slider
,
Row
,
Column
,
Dropdown
,
Paragraph
from
bokeh.layouts
import
column
,
row
#from src.data_vis import *t
# x = 0
# y = 0
X_train
=
0
X_test
=
0
y_train
=
0
...
...
This diff is collapsed.
Click to expand it.
src/data_vis.py
+
0
−
65
View file @
56f9810a
...
...
@@ -103,8 +103,6 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
# grabbing unique class names
class_names
=
model_logreg
.
classes_
# grabbing specific row for model to use to make prediction
#ex_specie = X_test[3, ]
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
...
...
@@ -135,66 +133,3 @@ def decisionTreemodel(X_train, X_test, Y_train, Y_test, record_value):
p
.
vbar
(
x
=
Labels1
,
top
=
featureNums
,
width
=
0.5
,
color
=
"
#fc8d59
"
)
return
p
'''
def decisionTreemodel():
"""
This is a docstring for decisionTreeVisuals
Returns:
_type_: _description_
"""
# loading data
df = pd.read_csv(
"
src/Iris.csv
"
)
# data split to features matrix and target vector
# df stands for the dataframe
# feature matrix
X = df.iloc[:,0:-1]
# target vector
Y = df.iloc[:,-1:]
# splitting data into training and test sets
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
# instantiating model
model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
# fit model on training set
model_logreg.fit(X_train, Y_train)
# grabbing unique class names
class_names=model_logreg.classes_
# grabbing specific row for model to use to make prediction
ex_specie = np.array(X_test.iloc[3]).reshape(1,-1)
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
explainer = lime.lime_tabular.LimeTabularExplainer(X_train.values, feature_names=X_train.columns,
class_names=class_names, discretize_continuous=True)
# grab count of columns from feature matrix
featureCount = len(X.columns)
# explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
exp = explainer.explain_instance(X_test.iloc[3],model_logreg.predict_proba,num_features=featureCount,top_labels=1)
# converting explainations as list
tupleTest = exp.as_list()
# converting tuples to list
NewList = [list(x) for x in tupleTest]
# converting all elements in list to strings
doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
# print(doubleStrList[0][0])
return doubleStrList[0][0]
#df = pd.read_csv(
"
src/Iris.csv
"
)
decisionTreemodel()
'''
This diff is collapsed.
Click to expand it.
src/modelling.py
+
6
−
143
View file @
56f9810a
...
...
@@ -5,10 +5,7 @@ import config as config
import
pandas
as
pd
from
data_vis
import
vis_synthetic
from
synthetic
import
*
# import numpy as np
# import math
# import matplotlib.pyplot as plt
# import seaborn as sns
# sklearn ML libraries/modules
# from sklearn import preprocessing
...
...
@@ -21,13 +18,6 @@ from bokeh.models import Select, Slider, Row, Column
from
data_vis
import
decisionTreemodel
,
logisticRegressionmodel
from
bokeh.io
import
curdoc
# bokeh libraries/modules
# from bokeh.io import output_file, show
# from bokeh.layouts import widgetbox
#from bokeh.models.Column import widgetbox
# from bokeh.models.widgets import Div
# from bokeh.models.widgets import Paragraph
# from bokeh.models.widgets import PreText
# lime modules
from
lime
import
submodular_pick
...
...
@@ -58,10 +48,6 @@ class Models:
return
DecisionTreeClassifier
()
# def fit_to_model(self):
#here in the fit method you'll need to use the config.X_train, y_test
def
model_callback
(
attrname
,
old
,
new
):
"""
Callback function that updates models with appropriate data and updates on
slider/selector changes.
...
...
@@ -71,13 +57,10 @@ def model_callback(attrname, old, new):
old (_type_): _description_
new (_type_): _description_
"""
#controls parameters when solver updates, add more elif statements as needed.
# if config.models_select.value == "Logistic Regression":
if
config
.
models_select
.
value
==
"
Logistic Regression
"
and
(
config
.
solver_select
.
value
==
"
newton-cg
"
or
config
.
solver_select
.
value
==
"
sag
"
or
config
.
solver_select
.
value
==
"
lbfgs
"
)
and
(
config
.
dataset_select
.
value
==
'
Noisy Circles
'
or
config
.
dataset_select
.
value
==
'
Noisy Moons
'
):
# solver = "l2"
# config.normalization_select.value = "l2"
inputs
=
Column
(
config
.
selects
,
config
.
samples_slider
,
config
.
data_split_slider
,
config
.
models_select
,
config
.
solver_select
,
config
.
normalization_select
,
config
.
index_slider
)
s
=
int
(
config
.
index_slider
.
value
)
testModel
=
logisticRegressionmodel
(
config
.
X_train
,
config
.
X_test
,
config
.
y_train
,
config
.
y_test
,
s
)
...
...
@@ -88,14 +71,11 @@ def model_callback(attrname, old, new):
curdoc
().
clear
()
curdoc
().
add_root
(
Row
(
inputs
,
b
,
testModel
))
# multilabel classification not working
elif
config
.
models_select
.
value
==
"
Decision Tree
"
and
(
config
.
dataset_select
.
value
==
"
Make Classification
"
or
config
.
dataset_select
.
value
==
"
Blobs
"
or
config
.
dataset_select
.
value
==
"
Make Classification
"
or
config
.
dataset_select
.
value
==
'
Noisy Circles
'
or
config
.
dataset_select
.
value
==
'
Noisy Moons
'
):
elif
config
.
models_select
.
value
==
"
Decision Tree
"
and
(
config
.
dataset_select
.
value
==
"
Make Classification
"
or
config
.
dataset_select
.
value
==
"
Blobs
"
or
config
.
dataset_select
.
value
==
'
Noisy Circles
'
or
config
.
dataset_select
.
value
==
'
Noisy Moons
'
):
s
=
int
(
config
.
index_slider
.
value
)
inputs
=
Column
(
config
.
selects
,
config
.
samples_slider
,
config
.
data_split_slider
,
config
.
models_select
,
config
.
solver_select
,
config
.
normalization_select
,
config
.
index_slider
)
testModel
=
decisionTreemodel
(
config
.
X_train
,
config
.
X_test
,
config
.
y_train
,
config
.
y_test
,
s
)
#normalization_selected = "l1", "l2"
#config.normalization_select.value = list(normalization_selected.values)
b
=
vis_synthetic
()
curdoc
().
clear
()
...
...
@@ -105,37 +85,13 @@ def model_callback(attrname, old, new):
s
=
int
(
config
.
index_slider
.
value
)
inputs
=
Column
(
config
.
selects
,
config
.
samples_slider
,
config
.
data_split_slider
,
config
.
models_select
,
config
.
solver_select
,
config
.
normalization_select_liblinear
,
config
.
index_slider
)
testModel
=
logisticRegressionmodel
(
config
.
X_train
,
config
.
X_test
,
config
.
y_train
,
config
.
y_test
,
s
)
#normalization_selected = "l1", "l2"
#config.normalization_select.value = list(normalization_selected.values)
b
=
vis_synthetic
()
curdoc
().
clear
()
curdoc
().
add_root
(
Row
(
inputs
,
b
,
testModel
))
#if config.models_select.value == "Logistic Regression":
'''
if config.model_names.value ==
'
Decision Tree
'
:
inputs = Column(config.selects, config.samples_slider, config.data_split_slider, config.models_select, config.solver_select, config.normalization_select, config.index_slider)
s = int(config.index_slider.value)
testModel = decisionTreemodel( config.X_train, config.X_test, config.y_train, config.y_test, s)
b
=
vis_synthetic
()
curdoc
().
clear
()
curdoc
().
add_root
(
Row
(
inputs
,
b
,
testModel
))
'''
#control statements for selecting the appropriate dataset needs to:
# 1. update the dataset ( this will automatically update the layout)
#have to figure out how to update the dataset from here.
#probably need to update the selector to only show binary datasets. So make 2 separate dataset selectors: binary and multiclass
# add more call backs for each selector
#config.models_select.on_change('value', model_callback)
config
.
models_select
.
on_change
(
'
value
'
,
model_callback
)
config
.
dataset_select
.
on_change
(
'
value
'
,
model_callback
)
...
...
@@ -146,96 +102,3 @@ config.normalization_select.on_change('value', model_callback)
config
.
index_slider
.
on_change
(
'
value
'
,
model_callback
)
#config.index_slider.on_change('value', model_callback)
'''
def decisionTreemodel(X, Y, indexValue):
"""
This is a docstring for decisionTreeVisuals
Returns:
_type_: _description_
"""
# loading data
#df = pd.read_csv(
"
Iris.csv
"
)
# data split to features matrix and target vector
# df stands for the dataframe
# feature matrix
#X = df.iloc[:,0:-1]
# target vector
#Y = df.iloc[:,-1:]
# splitting data into training and test sets
#not needed
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.30, random_state=7)
# instantiating model
model_logreg = DecisionTreeClassifier(max_depth=8, random_state=0)
# fit model on training set
model_logreg.fit(X_train, Y_train)
# grabbing unique class names
class_names=model_logreg.classes_
# grabbing specific row for model to use to make prediction
#ex_specie = X_test[3, ]
# lime_tabular is a module that contains functions that explain classifiers which use tabular data (matrices).
# LimeTabularExplainer is a function that explains predictions of tabular (matrix) data.
explainer = lime.lime_tabular.LimeTabularExplainer(X_train, feature_names=Y,
class_names=class_names, discretize_continuous=True)
# grab count of columns from feature matrix
featureCount = len(X)
# explain_instance is a function that generates explanations for a prediction after using LimeTabularExplainer.
exp = explainer.explain_instance(X_test[indexValue],model_logreg.predict_proba,num_features=featureCount)
# converting explainations as list
tupleTest = exp.as_list()
# converting tuples to list
NewList = [list(x) for x in tupleTest]
# converting all elements in list to strings
doubleStrList = [[str(s) for s in sublist] for sublist in NewList]
doublestring =
"
,
\n
"
.join([
'
'
.join([str(c) for c in lst]) for lst in NewList])
# separating comparisons from feature scores
Labels1 = [item[0] for item in NewList]
# outlst =
"
"
.join([
'
'
.join([str(c) for c in lst]) for lst in doubleStrList])
# separating feature scores from comparisons
featureNums = [item[1] for item in NewList]
# grabbing count of number of features to determing number of x axis ticks in the chart
count = 0
newList = []
for i in featureNums:
count += 1
newList.append(count)
#p = figure(width=400, height=400)
#fig = plt.bar(newList, featureNums, align=
'
center
'
)
#plt.xticks(newList, Labels1)
#plt.xticks(rotation=60, ha=
'
right
'
)
#plt.title(
"
Feature Importance graph
"
)
#plt.show()
#print(doubleStrList[0][0])
#return Labels1#, doubleStrList[0][1], doubleStrList[0][2], doubleStrList[0][3]
#test =
'
,
\n
'
.join([i for i in Labels1[0:]])
dfBokehChart = pd.DataFrame(list(zip(Labels1, featureNums)), columns =[
'
Features
'
,
'
FeatureNumbers
'
])
p = figure(x_range = Labels1, plot_height = 400, title =
"
Feature Importance Scores
"
)
p.vbar(x = Labels1, top = featureNums, width = 0.5, color =
"
#fc8d59
"
)
#return (Labels1, featureNums)
#return dfBokehChart
return p
'''
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment