From 8607c26fbaba318b8fa6c5110db1c4e8b6d17837 Mon Sep 17 00:00:00 2001
From: Philip Monaco <philmonaco34@gmail.com>
Date: Fri, 1 Apr 2022 18:19:31 -0400
Subject: [PATCH] Class formation of synthetic datagen
---
__init__.py | 0
examples/decision_tree/__init__.py | 0
examples/decision_tree/main.py | 133 ++++++++++++++++++
examples/decision_tree/theme.yaml | 12 ++
src/__init__.py | 0
src/utils/__init__.py | 0
src/utils/__pycache__/__init__.cpython-39.pyc | Bin 0 -> 161 bytes
src/utils/__pycache__/data.cpython-39.pyc | Bin 0 -> 822 bytes
src/utils/algorithms/__init__.py | 0
src/utils/algorithms/callbacks.py | 0
src/utils/algorithms/loader.py | 49 +++++++
src/utils/data_processing/__init__.py | 0
.../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 177 bytes
.../__pycache__/callbacks.cpython-39.pyc | Bin 0 -> 1482 bytes
.../__pycache__/synthetic.cpython-39.pyc | Bin 0 -> 1184 bytes
.../synthetic_generator.cpython-39.pyc | Bin 0 -> 1014 bytes
src/utils/data_processing/callbacks.py | 51 +++++++
src/utils/data_processing/synthetic.py | 49 +++++++
18 files changed, 294 insertions(+)
create mode 100644 __init__.py
create mode 100644 examples/decision_tree/__init__.py
create mode 100644 examples/decision_tree/main.py
create mode 100644 examples/decision_tree/theme.yaml
create mode 100644 src/__init__.py
create mode 100644 src/utils/__init__.py
create mode 100644 src/utils/__pycache__/__init__.cpython-39.pyc
create mode 100644 src/utils/__pycache__/data.cpython-39.pyc
create mode 100644 src/utils/algorithms/__init__.py
create mode 100644 src/utils/algorithms/callbacks.py
create mode 100644 src/utils/algorithms/loader.py
create mode 100644 src/utils/data_processing/__init__.py
create mode 100644 src/utils/data_processing/__pycache__/__init__.cpython-39.pyc
create mode 100644 src/utils/data_processing/__pycache__/callbacks.cpython-39.pyc
create mode 100644 src/utils/data_processing/__pycache__/synthetic.cpython-39.pyc
create mode 100644 src/utils/data_processing/__pycache__/synthetic_generator.cpython-39.pyc
create mode 100644 src/utils/data_processing/callbacks.py
create mode 100644 src/utils/data_processing/synthetic.py
diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/decision_tree/__init__.py b/examples/decision_tree/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/examples/decision_tree/main.py b/examples/decision_tree/main.py
new file mode 100644
index 0000000..90ab43a
--- /dev/null
+++ b/examples/decision_tree/main.py
@@ -0,0 +1,133 @@
+import numpy as np
+import math
+from sklearn import cluster, datasets
+from sklearn.neighbors import kneighbors_graph
+
+from sklearn.model_selection import train_test_split
+
+from sklearn.tree import export_graphviz
+from subprocess import call
+
+# from utils.data_processing.synthetic_generator import synthetic_dataset
+from utils.data_processing.synthetic import SyntheticData
+# from utils.data_processing.callbacks import update_samples_or_dataset
+# from utils.algorithms.loader import load_algorithm
+
+from bokeh.io import curdoc, show, output_notebook
+from bokeh.layouts import column, row
+from bokeh.models import ColumnDataSource, Select, Slider, Plot, Scatter
+from bokeh.palettes import Spectral6
+from bokeh.plotting import figure
+
+np.random.seed(0)
+
+data = SyntheticData()
+# print(type(data.generator()))
+x, y = data.generator()
+
+spectral = np.hstack([Spectral6] * 20)
+
+colors = [spectral[i] for i in y]
+
+source = ColumnDataSource(dict(x=x[:,0], y=x[:,1], colors=colors))
+
+b = figure(
+ title="Some title", width=400, height=400, min_border=0)
+
+glyph = Scatter(x="x", y="y", size=5, fill_color="colors")
+
+b.add_glyph(source, glyph)
+
+clf_algorithms = [
+ 'Decision Tree'
+]
+
+datasets_names = [
+ "Make Classification",
+ "Multilabel Classification",
+ "Blobs"
+]
+
+algorithm_select = Select(value = 'Decision Tree',
+ title='Select Algorithm:',
+ width=200,
+ options=clf_algorithms
+ )
+
+dataset_select = Select(value='Make Classification',
+ title='Select Dataset',
+ width=200,
+ options=datasets_names)
+
+samples_slider = Slider(title="Number of samples",
+ value=1500.0,
+ start=200.0,
+ end=3000.0,
+ step=100,
+ width=400)
+
+classes_slider = Slider(title="Number of Classes",
+ value = 3,
+ start=2,
+ end=20,
+ step=1,
+ width=400)
+
+features_slider = Slider(title="Number of Features",
+ value = 3,
+ start=2,
+ end=1000,
+ step=1,
+ width=400)
+
+inf_slider = Slider(title='Informative Classes',
+ value=3,
+ start=2,
+ end=100,
+ step=1,
+ width=400)
+
+def update_samples_or_dataset(attrname, old, new):
+ global x, y
+
+ dataset = dataset_select.value
+ n_samples = int(samples_slider.value)
+ n_classes = int(classes_slider.value)
+ n_features = int(features_slider.value)
+ n_inf = int(inf_slider.value)
+
+ if n_inf > n_features:
+ n_features = n_inf
+ features_slider.update(value=n_inf)
+
+ if n_classes > 2**n_inf:
+ # n_inf = math.floor(math.sqrt(n_classes*n_clusters_p_class)) + n_classes % 2
+
+ n_inf = (math.ceil(math.log2(n_classes)))
+ n_features = n_inf
+ # print("this is v", n_inf)
+
+ inf_slider.update(value=n_inf)
+ features_slider.update(value=n_features)
+
+ data = SyntheticData(dataset, n_samples, n_features, n_classes, n_inf)
+ x, y = data.generator()
+ # x, y = data.generator(dataset, n_samples, n_inf, n_features, n_classes)
+ colors = [spectral[i] for i in y]
+
+ source.data = dict(colors=colors, x=x[:, 0], y=x[:, 1])
+
+dataset_select.on_change('value', update_samples_or_dataset)
+samples_slider.on_change('value_throttled', update_samples_or_dataset)
+classes_slider.on_change('value_throttled', update_samples_or_dataset)
+features_slider.on_change('value', update_samples_or_dataset)
+inf_slider.on_change('value', update_samples_or_dataset)
+
+# set up layout
+selects = row(dataset_select, width=420)
+inputs = column(selects, samples_slider, classes_slider, inf_slider, features_slider)
+
+# add to document
+curdoc().add_root(row(inputs, b))
+curdoc().title = "Decision Tree"
+
diff --git a/examples/decision_tree/theme.yaml b/examples/decision_tree/theme.yaml
new file mode 100644
index 0000000..ce434d5
--- /dev/null
+++ b/examples/decision_tree/theme.yaml
@@ -0,0 +1,12 @@
+attrs:
+ Figure:
+ width: 400
+ height: 400
+ background_fill_color: 'lightgrey'
+ background_fill_alpha: 0.2
+
+ Grid:
+ grid_line_color: null
+
+ Title:
+ text_font_size: '13px'
\ No newline at end of file
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/utils/__init__.py b/src/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/utils/__pycache__/__init__.cpython-39.pyc b/src/utils/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..142bc23a60b5553413dc37265627f06b05e13b40
GIT binary patch
literal 161
zcmYe~<>g{vU|{H5Ymo$^AA<;F%*epN;K0DZP|U)>z>vZa%%I8Wx00a<B#a<_xn)?z
z6enlo=jX(v6s1<A=EN7L=4Iv=#TOLiXQd{W#FS@L>OzEcVM4`4$uZ#`kumY{nR%Hd
Z@$q^EmA5!-a`RJ4b5iX<=6?pc0{}kADHs3%
literal 0
HcmV?d00001
diff --git a/src/utils/__pycache__/data.cpython-39.pyc b/src/utils/__pycache__/data.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3f119a1c537929c5e708c11e68b31328964e0bd4
GIT binary patch
literal 822
zcmYe~<>g{vU|@)OC7i^|!octt#6iYP3=9ko3=9m#A`A=+DGVu$ISf&ZDNHHMEeugi
zDU87knk+9t>iv>IBn)#iFfgz&FfcfSbe;j}WK3boVaR2yWyoc!Wz1!+Wy)o#WoBe3
zVXa}vVryosWhr4TVK3pRVM$?>WN2mtvzS4wW~OGwTGkTI60RDS8dflmt%f0syM`@=
zrI)dmp@tF8W~^a?vzcm`;cVs-)*9w4o&~%M8JZbu*;80+IZF5z@YgVv@MQ@|Gc07R
z;gDvi<xF9#<*MOaAXvj$!<Egtka2-f4c9`(TFx4V8payV8s-|-8um1{U<OTgzamBk
z1_n)*TbyaBMJ1UznI)CC_;WJTQsYxA3R06x67!NPZ*djm7nJ5CmSpDV-C{3FP0!5F
zD}MR^|NsA*jJMd6^K(jb^NL@dU|?W)$qX_~I7%uhKRY!;uQ)NcASX2?u_RG1J+UA+
zGcP5zs0b7dMId!WEDQ_`MXU@A47XTvGK))!*g;IzqQt!P)LYD%c_p`4QZkcEZZTJ7
z7TjVhO3h2mO})kHUX)*2aEq-pFSD>T^%hfJ!7Y~J!lIH}Oa+;@Sj#g~i&Af~rDf*i
z<RxlyW3vltOJ!<eQE?PE#MF4OJv>mK#e*CZ#Z#P|n3EcxmY7_UUlhfU2!Z(G%&OEV
zj@-;VFl!}4kt72H!!IWftC-;AjQsqZ7?+~diqxF=;MBa#{G#}PqWrAX<dT^3j7r^N
zu#j#6RH(QpIVRjAG6oz%1(mmW3X1ZRQ;Un^K?0!ADV6~xLq-V}E=D;<J|-SUFl6Fm
zW?>XzWMLFxDv|~TIP)#mywco)$|509h=PQ`ZbBlYKtao4lbfGXnv-e=it=JmJa8}r
E0650nJOBUy
literal 0
HcmV?d00001
diff --git a/src/utils/algorithms/__init__.py b/src/utils/algorithms/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/utils/algorithms/callbacks.py b/src/utils/algorithms/callbacks.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/utils/algorithms/loader.py b/src/utils/algorithms/loader.py
new file mode 100644
index 0000000..eb34d3a
--- /dev/null
+++ b/src/utils/algorithms/loader.py
@@ -0,0 +1,49 @@
+from sklearn import tree
+from sklearn.preprocessing import StandardScaler
+
+
+def load_algorithm(algorithm):
+ # normalize dataset for easier parameter selection
+
+ # estimate bandwidth for mean shift
+ # bandwidth = cluster.estimate_bandwidth(X, quantile=0.3)
+
+ # connectivity matrix for structured Ward
+ # connectivity = kneighbors_graph(X, n_neighbors=10, include_self=False)
+
+ # make connectivity symmetric
+ # connectivity = 0.5 * (connectivity + connectivity.T)
+
+ # # Generate the new colors:
+ if algorithm=='MiniBatchKMeans':
+ model = tree.DecisionTreeClassifier()
+
+ # elif algorithm=='Birch':
+ # model = cluster.Birch(n_clusters=n_clusters)
+
+ # elif algorithm=='DBSCAN':
+ # model = cluster.DBSCAN(eps=.2)
+
+ # elif algorithm=='AffinityPropagation':
+ # model = cluster.AffinityPropagation(damping=.9,
+ # preference=-200)
+
+ # elif algorithm=='MeanShift':
+ # model = cluster.MeanShift(bandwidth=bandwidth,
+ # bin_seeding=True)
+
+ # elif algorithm=='SpectralClustering':
+ # model = cluster.SpectralClustering(n_clusters=n_clusters,
+ # eigen_solver='arpack',
+ # affinity="nearest_neighbors")
+
+ # elif algorithm=='Ward':
+ # model = cluster.AgglomerativeClustering(n_clusters=n_clusters,
+ # linkage='ward',
+ # connectivity=connectivity)
+
+ # elif algorithm=='AgglomerativeClustering':
+ # model = cluster.AgglomerativeClustering(linkage="average",
+ # affinity="cityblock",
+ # n_clusters=n_clusters,
+ # connectivity=connectivity)
diff --git a/src/utils/data_processing/__init__.py b/src/utils/data_processing/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/utils/data_processing/__pycache__/__init__.cpython-39.pyc b/src/utils/data_processing/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6cc1f6344c0ba9c5a6d4c2bd7fa332b1c5b4a5fd
GIT binary patch
literal 177
zcmYe~<>g{vU|{H3W03@+AA<;F%*epN;K0DZP|U)>z>vZa%%I8Wx00a<B#a<_g=JX9
z6enlo=jX(v6s1<A=EN7L=4Iv=#TOLiXQd{W#FS@L>OzEcVM4`4$uZ#`kufQWC5a$)
n$*IM~nR)3k@$s2?nI-Y@dIgoYIBatBQ%ZAE?LhW@2Du9WKpro*
literal 0
HcmV?d00001
diff --git a/src/utils/data_processing/__pycache__/callbacks.cpython-39.pyc b/src/utils/data_processing/__pycache__/callbacks.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a09fb3fae8aef45906a5c805b7542a8cf867fe4a
GIT binary patch
literal 1482
zcmYe~<>g{vU|@(yw@%u^#=!6x#6iX^3=9ko3=9m#Qy3T+QW#Pga~Pr+!8B78Qwl>0
zQx0=3OB4$uNRBy&HJ2@lEtfrtJ(nYj11!dp!<ow!#g)q)#SP}O=J4e5M)BtIMe*hG
zNAc$hL<!^yMhSxD*m8t&g`<SQZ1x<HT+t{|Mut?z1!Aef>5M5HA`A-|qr_7<qa;$e
zQn*_fq9juogBdh=UV_}~r^$Fru(&d>BqOyXGdVsbu_UoLwM3Ko7F%*@QA&RDEtcYp
z{PJ7;`K2WVr6uus`6a1I`T5zJOt;vQ^K(jb^KLO0<(F%+-V$&Ib6pZk5`*(gi;`1s
zu?45*q$Zc#Vhhg6Oi3-e#S)N{Uvi5*I61MTB(+GB@fK%rL27bIQDTmnCgUx(w9NF<
zqSTin3=9k}85tNDl9@s7gJMnw1_m|;1_ozPkX~S5U?^e8VytB@VJczHVr*utWhr4Q
zVF9yPOPESn!7R2CrV=(Vi@k=uhAo?=$gPIGmaT*}i@k<Dg|U~Bk)ejQghPZOg{g*J
zm?4{`sHuc=0app<0`3~tg^UXsYuUl-*%0c%df6a)OL%ISYgj-g*RVFT#xN!FCi0~)
zr?9jzl<>JQG&9z6l<?PZl<=l7q%gEHrLd+kNix8BY#?3=doOc|K)hfILokCT2Ph_3
z%Mx=+Q(rQHyblTksP7p-0-zKX4DvoGgwh#m7-D&98A}*y7$q4dG8M7}GpuCLWW2>u
z3=LCFrdv#U2Dcb9i-Z{%7{J7@$PBBP;^d6{{G6DSqST7iocQ9@yv+Qf_=2MRtkmR^
znDUHDU5Jn_OsKdhIi|EEGp9HPlr}&rlT(X}GxO48k`r@sk`j}%i}eaBZ*ke=WEPhs
z=jRsK88R?1d<MC^O1HEiC9xzmzBn<rASbmrKEDW-!1Qc#@{<#DitY3e`iek4E)rm1
zU;rn-{G9xv;#-Urw-_rmg>LbIY%fkNi7y5xsv<!K28LVAnRz9*_+X|NLy{R^a!z7#
zacXfql+T}*npjd=gb?7$%u9nwvO!#UODHcsIj6L^B(<nGz91fK!Y!8E#FC6#EXk>v
zIk#AH^3#oqK+#Yn4GK;fko7DnnV{rd3`y=RAQx(K-r`6sDJjZJ%uT(;oS&0&i#ac~
z{1!V%t~j;i7H1wTFhESOJ*maFxbos*j=aU17oVAzRwM<o1s-!n;voAZA&!NafE1kk
zpmd=C3UeMdMlJ|s;$UQ9<Y5wE5nyCt=3(Sv5@F<HDl!HIuhcEpywco)%3Dl%1x2bL
zqvgR-tOs%?QWWdK3kI;)Z*e5$XQyW9W#$)w@@5ezMHaDwwDUqFauO@^OG}E2KsmLD
z9VEd6k;u(YNzExP;sALHl$DD>g=Y~rh|2>~#s^VWkeHKNQj%I+#EUGLlV4JjnU`L~
z2NGn<C@x7%&MpFFf+7`=0U#fNlQ$9p4lWLx-29Z%oK!ndx+(@~;9%rn0)>MBqYNV`
JL<AUFm;gAimFWNg
literal 0
HcmV?d00001
diff --git a/src/utils/data_processing/__pycache__/synthetic.cpython-39.pyc b/src/utils/data_processing/__pycache__/synthetic.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bc1876944bcf2f5f237179b885d5b4a3e66c4d80
GIT binary patch
literal 1184
zcmYe~<>g{vU|?A8?w(}M!NBks#6iZ)3=9ko3=9m#Y77hvDGVu$ISf&ZDGVu0IZV0C
zQOt}C?hGl+DJ(4vDJ;!QQ7kEp!3>(LFF_{wX)@m8NJ%V7EKV&cP6mk}V^)~CatsU%
zsbDjsm{Pb?*izVA7^9d|I8r!U7@}BGxPlopd2aCrSLT&uq?TkRyCjw*{u1_0%uZEs
z&PgmT&P>ZpPAtjH&wF`?m4V?U3j+hgOOTsiGBGePB(ot|31Wi;ok6})VqjpXVXR?@
zXQ*MSVTfm}VXk3_XR2YTVTfn0VXa|^XGviQX3%8x(`3EH4)w$>&b;{I#N2|M)Z$xQ
zdGTqfi6x~)sl_1SWRO!*i*K>!#b@TFX|mm7DNfBvE8=8eU?>8GZxJ_$#RDRE85kH=
zG8FMMFfjaz%&>|nPR_{B&xuJXO07uEi7!sg%gir|FDS~-N=+__DbJ|Xg$U`wgo=xj
zV@gXhbBbd?&H|}SPAx9Z%uA0chJ}${LFFxu`1s7c%#!$cP(T-h+`z!b#t4B`Vkp6>
z2UD8Nh!k8PHYWoE130+SK*7b3#gM|7%~T{+!kEQW!;r<C!j!_?%M{E|1Cn9MW-4+i
zVa#Fy$<#1pvDPqTv8Ax4u+=bRv8OPlu=lb<RdQrA6(yB$WN|Lw0;x^mT*#Edkj50u
z01k34zgw(MIr&M&FF6<(7&Mu{;dhItC^0W3KR3R(B(Ws52o%djAP2q#5t{5^HQ+G5
z#S4ys{G!~%lFYKyTiki^MX4#Jc`1o`CAUQK;*)bqi%U|AisK7Xi{intRs@a}5e5c^
zDjvUlh2WB+(qvG)7J;qjhQuK#-Q42JP0UV>PXf6OY~d|oumIQ;crpaLq6iepV1sWl
z<rUmwgE;;c3y9WaEaC&D15g40r-C8@1_p*(oaw1~sYQt;`9*@D^a;up42)ci9E?o=
z*_c5z3nL376C)Fd{KY2D#aJbd8i{cIewwV100PB)5h&Fcv4boG`-c^jF!B;}Q{&@r
zamB~y=BJeAfY?0o@r9*{IS?7J6N*4ifH)A<sYRgd1&V=UkUuz>7#N}OHy00BPLuT(
uYhGz?L1mFN$Y<=u**U3+MR`S_Km;qmX6h{t8;E1<Kv7x@GM|HyhY0{^@))WB
literal 0
HcmV?d00001
diff --git a/src/utils/data_processing/__pycache__/synthetic_generator.cpython-39.pyc b/src/utils/data_processing/__pycache__/synthetic_generator.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..74cac755b843e94a96475186e69e96217deaa4dd
GIT binary patch
literal 1014
zcmYe~<>g{vU|^Ve!6HeQje+4Yh=Yuo7#J8F7#J9e6&M&8QW#Pga~Pr+Qy5a1a+q?N
zqnH^PQkYX%S{R~OQW%37G+AGQ^!jNs-r`6}EJ-X*Eh$cB1qnehh|S5sz~BrraSj6m
zLk&X;V>VNfL<vI{V+~^p6Uek)=3tOGYc^AnUI{}MQw?JZa|&B86Ihfzo2kg9gdvN$
zhB1XBg%c{umCaNXSHh6RQo~roT*H#Wk;2`}0#?tH%~VuZ!jQ!ZmSIZaP2mH}^Y^lY
z<pr{tie{9sWw9^ds9{VIT*#Edkj50ukirnmpef{6#p{=!SzM{$oLQ8dlUke(@x}xD
z^jR}OV5Pk#^DWN2_~OLef}GUiTWo2G$tC$kw^;M?GmBHJxFMQ-^YinHHJOT-85kIf
zKp}98)hQ=GsrV%a0|Nt8{1#79VqQvqZhUb`Vo7S1q;F|XNoG!BQfiKZb53G$ab{X(
za$-qlex4=^*qmEjdGTqfi6x~)sl^}{B!d*C78ikxtP=K3%uYqqSH#G`!0-}8XtIM%
zC<4X9E#AEN%)GSxqTIxi%(B#5+<EausVSv-DT#R{w?y*dlXFUoOHzx9;|o%Y;=y(l
zfszZv8XmuVh2WB+(qxb;UV;*eCT9_dy2X>5n4KD*3<<PbTwq=<C={UVB#`%S$w9>7
zK808S_vtNRusqmSgdkI1!7VmOSlwa)(VDEc*r90+?5|s_U@w8a2X^dAh9W5j28LgW
z8CEgH$r<_iIWZ|ksTHX?@x`fmnfXQW1x5K;smUcV<r$T_5FuTdP;pUmOt?p649EhI
zy5!X2;>^7CnBvO3l8n@n%;fm=)V$Q9#FG3Xy@JYHf(UVtj^fl3Hc&#h0VNwo4n{Uc
zCPt=zY%E~be>P?m8YK6ZjTI~gVngsRHVzKPA{kIZVZFtgSDIT;StQE9z;KJbI6Egb
nu_zB5;9vz{Um_9GAY~jjx%nxjIjMG_m@j5wU|`^2<Y55-lgSv;
literal 0
HcmV?d00001
diff --git a/src/utils/data_processing/callbacks.py b/src/utils/data_processing/callbacks.py
new file mode 100644
index 0000000..6df69d2
--- /dev/null
+++ b/src/utils/data_processing/callbacks.py
@@ -0,0 +1,51 @@
+import numpy as np
+import math
+
+from utils.data_processing.synthetic import synthetic_dataset
+
+from bokeh.io import curdoc, show, output_notebook
+from bokeh.layouts import column, row
+from bokeh.models import ColumnDataSource, Select, Slider, Plot, Scatter
+from bokeh.palettes import Spectral6
+from bokeh.plotting import figure
+
+spectral = np.hstack([Spectral6] * 20)
+n_clusters_p_class = 1
+
+def update_samples_or_dataset(attrname,
+ old,
+ new,
+ # dataset_select,
+ # samples_slider,
+ # classes_slider,
+ # features_slider,
+ # inf_slider,
+ # source
+ ):
+ global x, y
+
+ dataset = dataset_select.value
+ n_samples = int(samples_slider.value)
+ n_classes = int(classes_slider.value)
+ n_features = int(features_slider.value)
+ n_inf = int(inf_slider.value)
+
+ if n_inf > n_features:
+ n_features = n_inf
+ features_slider.update(value=n_inf)
+
+ if n_classes * n_clusters_p_class > 2**n_inf:
+
+ # n_inf = math.floor(math.sqrt(n_classes*n_clusters_p_class)) + n_classes % 2
+
+ n_inf = (math.ceil(math.log2(n_classes)))
+ n_features = n_inf
+ # print("this is v", n_inf)
+
+ inf_slider.update(value=n_inf)
+ features_slider.update(value=n_features)
+
+ x, y = synthetic_dataset(dataset, n_samples, n_inf, n_features, n_classes)
+ colors = [spectral[i] for i in y]
+
+ source.data = dict(colors=colors, x=x[:, 0], y=x[:, 1])
\ No newline at end of file
diff --git a/src/utils/data_processing/synthetic.py b/src/utils/data_processing/synthetic.py
new file mode 100644
index 0000000..9cfb806
--- /dev/null
+++ b/src/utils/data_processing/synthetic.py
@@ -0,0 +1,49 @@
+import numpy as np
+from sklearn import datasets
+
+class SyntheticData:
+ def __init__(self,
+ dataset='Make Classification',
+ n_samples=1500,
+ n_features=4,
+ n_classes=3,
+ n_inf=2):
+ self.dataset = dataset
+ self.n_samples = n_samples
+ self.n_features = n_features
+ self.n_classes = n_classes
+ self.n_inf = n_inf
+
+ def generator(self):
+ if self.dataset == 'Blobs':
+ return datasets.make_blobs(n_samples=self.n_samples,
+ random_state=8)
+
+ elif self.dataset == 'Make Classification':
+ return datasets.make_classification(n_samples=self.n_samples,
+ n_features=self.n_features,
+ n_informative=self.n_inf,
+ n_redundant=0,
+ n_clusters_per_class=1,
+ n_classes=self.n_classes,
+ random_state=8)
+
+ # if dataset == 'Noisy Circles':
+ # return datasets.make_circles(n_samples=n_samples,
+ # factor=0.5,
+ # noise=0.05)
+
+ # elif dataset == 'Noisy Moons':
+ # return datasets.make_moons(n_samples=n_samples,
+ # noise=0.05)
+
+ # elif dataset == 'Multilabel Classification':
+ # return datasets.make_multilabel_classification(n_samples=n_samples,
+ # n_features=n_features,
+ # n_classes=n_classes,
+ # random_state=8)
+
+ elif self.dataset == "No Structure":
+ return np.random.rand(self.n_samples, 2), None
+
+
\ No newline at end of file
--
GitLab