Skip to content
Snippets Groups Projects
Commit a73d1f67 authored by Chris MacLellan's avatar Chris MacLellan
Browse files

working with decision tree and cobweb for tutor env

parent e607c7b6
No related branches found
No related tags found
No related merge requests found
import gym
from stable_baselines.common import make_vec_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import PPO2
import tutorenvs
from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv
from tutorenvs.multicolumn import MultiColumnAdditionSymbolic
import numpy as np
from pprint import pprint
from concept_formation.cobweb3 import Cobweb3Tree
from concept_formation.visualize import visualize
from tutorenvs.utils import DataShopLogger
def train_tree(n=10, logger=None):
tree = Cobweb3Tree()
env = MultiColumnAdditionSymbolic(logger=logger)
p = 0
nhints = 0
while p < n:
# make a copy of the state
state = {a: env.state[a] for a in env.state}
env.render()
concept = tree.categorize(state)
sel = concept.predict('selection')
inp = concept.predict('input')
if sel == "done":
act = 'ButtonPressed'
else:
act = "UpdateField"
sai = (sel, act, inp)
if sel is None or inp is None:
nhints += 1
sai = env.request_demo()
sai = (sai[0], sai[1], sai[2]['value'])
reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
# print('reward', reward)
if reward < 0:
nhints += 1
sai = env.request_demo()
sai = (sai[0], sai[1], sai[2]['value'])
reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
state['selection'] = sai[0]
state['input'] = str(sai[2])
tree.ifit(state)
if sai[0] == "done" and reward == 1.0:
print('# hints =', nhints)
nhints = 0
print("Problem %s of %s" % (p, n))
p += 1
return tree
if __name__ == "__main__":
logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
for _ in range(1):
tree = train_tree(200, logger)
visualize(tree)
# env = MultiColumnAdditionSymbolic()
# while True:
# sai = env.request_demo()
# env.apply_sai(sai[0], sai[1], sai[2])
# env.render()
...@@ -8,54 +8,70 @@ from tutorenvs.multicolumn import MultiColumnAdditionSymbolic ...@@ -8,54 +8,70 @@ from tutorenvs.multicolumn import MultiColumnAdditionSymbolic
import numpy as np import numpy as np
from sklearn.tree import DecisionTreeClassifier from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction import DictVectorizer
def train_tree(n=10): from tutorenvs.utils import DataShopLogger
def train_tree(n=10, logger=None):
X = [] X = []
y = [] y = []
dv = DictVectorizer()
actions = []
action_mapping = {}
rev_action_mapping = {}
tree = DecisionTreeClassifier() tree = DecisionTreeClassifier()
env = MultiColumnAdditionSymbolic() env = MultiColumnAdditionSymbolic(logger=logger)
p = 0 p = 0
while p < n: while p < n:
state = env.get_rl_state() # make a copy of the state
state = {a: env.state[a] for a in env.state}
env.render() env.render()
try: if rev_action_mapping == {}:
response = decision_tree.predict(state) sai = None
except: else:
response = None vstate = dv.transform([state])
sai = rev_action_mapping[tree.predict(vstate)[0]]
if response is None: if sai is None:
print('hint') print('hint')
sai = env.request_demo() sai = env.request_demo()
sai = (sai[0], sai[1], sai[2]['value'])
else: reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
sai = (response['selection'],
response['action'],
response['inputs'])
reward = env.apply_sai(sai[0], sai[1], sai[2])
print('reward', reward) print('reward', reward)
if reward < 0: if reward < 0:
print('hint') print('hint')
sai = env.request_demo() sai = env.request_demo()
reward = env.apply_sai(sai[0], sai[1], sai[2]) sai = (sai[0], sai[1], sai[2]['value'])
reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
X.append(state) X.append(state)
y.append(sai) y.append(sai)
if sai.selection == "done" and reward == 1.0: Xv = dv.fit_transform(X)
actions = list(set(y))
action_mapping = {l: i for i, l in enumerate(actions)}
rev_action_mapping = {i: l for i, l in enumerate(actions)}
yv = [action_mapping[l] for l in y]
tree.fit(Xv, yv)
if sai[0] == "done" and reward == 1.0:
p += 1 p += 1
return tree return tree
if __name__ == "__main__": if __name__ == "__main__":
# tree = train_tree(10) logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
env = MultiColumnAdditionSymbolic() for _ in range(10):
tree = train_tree(100, logger)
# env = MultiColumnAdditionSymbolic()
while True: # while True:
sai = env.request_demo() # sai = env.request_demo()
env.apply_sai(sai[0], sai[1], sai[2]) # env.apply_sai(sai[0], sai[1], sai[2])
env.render() # env.render()
import gym
from stable_baselines.common import make_vec_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import PPO2
import tutorenvs
from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv
from tutorenvs.multicolumn import MultiColumnAdditionSymbolic
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction import DictVectorizer
from tutorenvs.utils import DataShopLogger
def train_tree(n=10, logger=None):
X = []
y_sel = []
y_inp = []
dv = DictVectorizer()
selections = []
selection_mapping = {}
rev_selection_mapping = {}
selection_tree = DecisionTreeClassifier()
inputs = []
input_mapping = {}
rev_input_mapping = {}
input_tree = DecisionTreeClassifier()
env = MultiColumnAdditionSymbolic(logger=logger)
p = 0
while p < n:
# make a copy of the state
state = {a: env.state[a] for a in env.state}
env.render()
if rev_selection_mapping == {}:
sai = None
else:
vstate = dv.transform([state])
sel = rev_selection_mapping[selection_tree.predict(vstate)[0]]
if sel == 'done':
act = 'ButtonPressed'
else:
act = "UpdateField"
inp = rev_input_mapping[input_tree.predict(vstate)[0]]
sai = (sel, act, inp)
if sai is None:
print('hint')
sai = env.request_demo()
sai = (sai[0], sai[1], sai[2]['value'])
reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
print('reward', reward)
if reward < 0:
print('hint')
sai = env.request_demo()
sai = (sai[0], sai[1], sai[2]['value'])
reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
X.append(state)
y_sel.append(sai[0])
y_inp.append(sai[2])
Xv = dv.fit_transform(X)
selections = list(set(y_sel))
selection_mapping = {l: i for i, l in enumerate(selections)}
rev_selection_mapping = {i: l for i, l in enumerate(selections)}
inputs = list(set(y_inp))
input_mapping = {l: i for i, l in enumerate(inputs)}
rev_input_mapping = {i: l for i, l in enumerate(inputs)}
yv_sel = [selection_mapping[l] for l in y_sel]
yv_inp = [input_mapping[l] for l in y_inp]
selection_tree.fit(Xv, yv_sel)
input_tree.fit(Xv, yv_inp)
if sai[0] == "done" and reward == 1.0:
print("Problem %s of %s" % (p, n))
p += 1
return selection_tree, input_tree
if __name__ == "__main__":
logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
for _ in range(1):
tree = train_tree(1000, logger)
# env = MultiColumnAdditionSymbolic()
# while True:
# sai = env.request_demo()
# env.apply_sai(sai[0], sai[1], sai[2])
# env.render()
import gym
from stable_baselines.common import make_vec_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines import PPO2
import tutorenvs
from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv
from tutorenvs.multicolumn import MultiColumnAdditionSymbolic
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction import DictVectorizer
from tutorenvs.utils import DataShopLogger
def train_tree(n=10, logger=None):
X = []
y = []
dv = DictVectorizer()
actions = []
action_mapping = {}
rev_action_mapping = {}
selection_tree = DecisionTreeClassifier()
input_tree = DecisionTreeClassifier()
env = MultiColumnAdditionSymbolic(logger=logger)
p = 0
while p < n:
# make a copy of the state
state = {a: env.state[a] for a in env.state}
env.render()
if rev_action_mapping == {}:
sai = None
else:
vstate = dv.transform([state])
sai = rev_action_mapping[tree.predict(vstate)[0]]
if sai is None:
print('hint')
sai = env.request_demo()
sai = (sai[0], sai[1], sai[2]['value'])
reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
print('reward', reward)
if reward < 0:
print('hint')
sai = env.request_demo()
sai = (sai[0], sai[1], sai[2]['value'])
reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]})
X.append(state)
y.append(sai)
Xv = dv.fit_transform(X)
actions = set(y)
action_mapping = {l: i for i, l in enumerate(actions)}
rev_action_mapping = {i: l for i, l in enumerate(actions)}
yv = [action_mapping[l] for l in y]
tree.fit(Xv, yv)
if sai[0] == "done" and reward == 1.0:
p += 1
return tree
if __name__ == "__main__":
logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
for _ in range(10):
tree = train_tree(100, logger)
# env = MultiColumnAdditionSymbolic()
# while True:
# sai = env.request_demo()
# env.apply_sai(sai[0], sai[1], sai[2])
# env.render()
...@@ -12,6 +12,7 @@ import numpy as np ...@@ -12,6 +12,7 @@ import numpy as np
from PIL import Image, ImageDraw from PIL import Image, ImageDraw
from tutorenvs.utils import BaseOppEnv from tutorenvs.utils import BaseOppEnv
from tutorenvs.utils import DataShopLogger
def custom_add(a, b): def custom_add(a, b):
if a == '': if a == '':
...@@ -22,10 +23,16 @@ def custom_add(a, b): ...@@ -22,10 +23,16 @@ def custom_add(a, b):
class MultiColumnAdditionSymbolic: class MultiColumnAdditionSymbolic:
def __init__(self): def __init__(self, logger=None):
""" """
Creates a state and sets a random problem. Creates a state and sets a random problem.
""" """
if logger is None:
print("CREATING LOGGER")
self.logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
else:
self.logger = logger
self.logger.set_student()
self.set_random_problem() self.set_random_problem()
# self.reset("", "", "", "", "") # self.reset("", "", "", "", "")
...@@ -87,6 +94,7 @@ class MultiColumnAdditionSymbolic: ...@@ -87,6 +94,7 @@ class MultiColumnAdditionSymbolic:
self.num_correct_steps = 0 self.num_correct_steps = 0
self.num_incorrect_steps = 0 self.num_incorrect_steps = 0
self.num_hints = 0
self.state = { self.state = {
'hundreds_carry': '', 'hundreds_carry': '',
...@@ -190,6 +198,7 @@ class MultiColumnAdditionSymbolic: ...@@ -190,6 +198,7 @@ class MultiColumnAdditionSymbolic:
if add_counts: if add_counts:
d.text((0, 0), str(self.num_incorrect_steps), fill="red") d.text((0, 0), str(self.num_incorrect_steps), fill="red")
d.text((0, 10), str(self.num_correct_steps), fill="green") d.text((0, 10), str(self.num_correct_steps), fill="green")
d.text((0, 20), str(self.num_hints), fill="blue")
if add_dot: if add_dot:
d.ellipse((add_dot[0]-3, add_dot[1]-3, add_dot[0]+3, add_dot[1]+3), d.ellipse((add_dot[0]-3, add_dot[1]-3, add_dot[0]+3, add_dot[1]+3),
...@@ -225,9 +234,12 @@ class MultiColumnAdditionSymbolic: ...@@ -225,9 +234,12 @@ class MultiColumnAdditionSymbolic:
return state_output return state_output
def set_random_problem(self): def set_random_problem(self):
upper = str(randint(1,999)) # upper = str(randint(1,999))
lower = str(randint(1,999)) # lower = str(randint(1,999))
upper = str(randint(1,9))
lower = str(randint(1,9))
self.reset(upper=upper, lower=lower) self.reset(upper=upper, lower=lower)
self.logger.set_problem("%s_%s" % (upper, lower))
def apply_sai(self, selection, action, inputs): def apply_sai(self, selection, action, inputs):
""" """
...@@ -236,10 +248,14 @@ class MultiColumnAdditionSymbolic: ...@@ -236,10 +248,14 @@ class MultiColumnAdditionSymbolic:
reward = self.evaluate_sai(selection, action, inputs) reward = self.evaluate_sai(selection, action, inputs)
if reward > 0: if reward > 0:
outcome = "CORRECT"
self.num_correct_steps += 1 self.num_correct_steps += 1
else: else:
outcome = "INCORRECT"
self.num_incorrect_steps += 1 self.num_incorrect_steps += 1
self.logger.log_step(selection, action, inputs['value'], outcome, [selection])
if reward == -1.0: if reward == -1.0:
return reward return reward
...@@ -352,6 +368,15 @@ class MultiColumnAdditionSymbolic: ...@@ -352,6 +368,15 @@ class MultiColumnAdditionSymbolic:
# TODO still need to rewrite for multi column arith # TODO still need to rewrite for multi column arith
def request_demo(self): def request_demo(self):
demo = self.get_demo()
feedback_text = "selection: %s, action: %s, input: %s" % (demo[0],
demo[1], demo[2]['value'])
self.logger.log_hint(feedback_text, [demo[0]])
self.num_hints += 1
return demo
def get_demo(self):
""" """
Returns a correct next-step SAI Returns a correct next-step SAI
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment