Skip to content
Snippets Groups Projects
Commit 97c238da authored by pjm363 (Philip Monaco)'s avatar pjm363 (Philip Monaco)
Browse files

Merge branch '16-decision-tree-data' into 'main'

remove deleted files

Closes #16

See merge request pjm363/why-senior-project!6
parents 01fefcac b2b3368e
No related branches found
No related tags found
No related merge requests found
Pipeline #1625 passed
Showing
with 0 additions and 149 deletions
File deleted
File deleted
from sklearn import tree
from sklearn.preprocessing import StandardScaler
def load_algorithm(algorithm):
# normalize dataset for easier parameter selection
# estimate bandwidth for mean shift
# bandwidth = cluster.estimate_bandwidth(X, quantile=0.3)
# connectivity matrix for structured Ward
# connectivity = kneighbors_graph(X, n_neighbors=10, include_self=False)
# make connectivity symmetric
# connectivity = 0.5 * (connectivity + connectivity.T)
# # Generate the new colors:
if algorithm=='MiniBatchKMeans':
model = tree.DecisionTreeClassifier()
# elif algorithm=='Birch':
# model = cluster.Birch(n_clusters=n_clusters)
# elif algorithm=='DBSCAN':
# model = cluster.DBSCAN(eps=.2)
# elif algorithm=='AffinityPropagation':
# model = cluster.AffinityPropagation(damping=.9,
# preference=-200)
# elif algorithm=='MeanShift':
# model = cluster.MeanShift(bandwidth=bandwidth,
# bin_seeding=True)
# elif algorithm=='SpectralClustering':
# model = cluster.SpectralClustering(n_clusters=n_clusters,
# eigen_solver='arpack',
# affinity="nearest_neighbors")
# elif algorithm=='Ward':
# model = cluster.AgglomerativeClustering(n_clusters=n_clusters,
# linkage='ward',
# connectivity=connectivity)
# elif algorithm=='AgglomerativeClustering':
# model = cluster.AgglomerativeClustering(linkage="average",
# affinity="cityblock",
# n_clusters=n_clusters,
# connectivity=connectivity)
File deleted
File deleted
File deleted
File deleted
import numpy as np
import math
from utils.data_processing.synthetic import synthetic_dataset
from bokeh.io import curdoc, show, output_notebook
from bokeh.layouts import column, row
from bokeh.models import ColumnDataSource, Select, Slider, Plot, Scatter
from bokeh.palettes import Spectral6
from bokeh.plotting import figure
spectral = np.hstack([Spectral6] * 20)
n_clusters_p_class = 1
def update_samples_or_dataset(attrname,
old,
new,
# dataset_select,
# samples_slider,
# classes_slider,
# features_slider,
# inf_slider,
# source
):
global x, y
dataset = dataset_select.value
n_samples = int(samples_slider.value)
n_classes = int(classes_slider.value)
n_features = int(features_slider.value)
n_inf = int(inf_slider.value)
if n_inf > n_features:
n_features = n_inf
features_slider.update(value=n_inf)
if n_classes * n_clusters_p_class > 2**n_inf:
# n_inf = math.floor(math.sqrt(n_classes*n_clusters_p_class)) + n_classes % 2
n_inf = (math.ceil(math.log2(n_classes)))
n_features = n_inf
# print("this is v", n_inf)
inf_slider.update(value=n_inf)
features_slider.update(value=n_features)
x, y = synthetic_dataset(dataset, n_samples, n_inf, n_features, n_classes)
colors = [spectral[i] for i in y]
source.data = dict(colors=colors, x=x[:, 0], y=x[:, 1])
\ No newline at end of file
import numpy as np
from sklearn import datasets
class SyntheticData:
def __init__(self,
dataset='Make Classification',
n_samples=1500,
n_features=4,
n_classes=3,
n_inf=2):
self.dataset = dataset
self.n_samples = n_samples
self.n_features = n_features
self.n_classes = n_classes
self.n_inf = n_inf
def generator(self):
if self.dataset == 'Blobs':
return datasets.make_blobs(n_samples=self.n_samples,
random_state=8)
elif self.dataset == 'Make Classification':
return datasets.make_classification(n_samples=self.n_samples,
n_features=self.n_features,
n_informative=self.n_inf,
n_redundant=0,
n_clusters_per_class=1,
n_classes=self.n_classes,
random_state=8)
# if dataset == 'Noisy Circles':
# return datasets.make_circles(n_samples=n_samples,
# factor=0.5,
# noise=0.05)
# elif dataset == 'Noisy Moons':
# return datasets.make_moons(n_samples=n_samples,
# noise=0.05)
# elif dataset == 'Multilabel Classification':
# return datasets.make_multilabel_classification(n_samples=n_samples,
# n_features=n_features,
# n_classes=n_classes,
# random_state=8)
elif self.dataset == "No Structure":
return np.random.rand(self.n_samples, 2), None
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment