diff --git a/sandbox/run_cobweb_multi.py b/sandbox/run_cobweb_multi.py new file mode 100644 index 0000000000000000000000000000000000000000..9ade3af5a848eeafa60a4d22ce16e02637bb40c2 --- /dev/null +++ b/sandbox/run_cobweb_multi.py @@ -0,0 +1,76 @@ +import gym +from stable_baselines.common import make_vec_env +from stable_baselines.common.policies import MlpPolicy +from stable_baselines import PPO2 +import tutorenvs +from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv +from tutorenvs.multicolumn import MultiColumnAdditionSymbolic +import numpy as np +from pprint import pprint + +from concept_formation.cobweb3 import Cobweb3Tree +from concept_formation.visualize import visualize + +from tutorenvs.utils import DataShopLogger + +def train_tree(n=10, logger=None): + tree = Cobweb3Tree() + env = MultiColumnAdditionSymbolic(logger=logger) + + p = 0 + nhints = 0 + while p < n: + # make a copy of the state + state = {a: env.state[a] for a in env.state} + env.render() + + concept = tree.categorize(state) + sel = concept.predict('selection') + inp = concept.predict('input') + + if sel == "done": + act = 'ButtonPressed' + else: + act = "UpdateField" + + sai = (sel, act, inp) + + if sel is None or inp is None: + nhints += 1 + sai = env.request_demo() + sai = (sai[0], sai[1], sai[2]['value']) + + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) + # print('reward', reward) + + if reward < 0: + nhints += 1 + sai = env.request_demo() + sai = (sai[0], sai[1], sai[2]['value']) + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) + + state['selection'] = sai[0] + state['input'] = str(sai[2]) + tree.ifit(state) + + if sai[0] == "done" and reward == 1.0: + print('# hints =', nhints) + nhints = 0 + print("Problem %s of %s" % (p, n)) + p += 1 + + return tree + +if __name__ == "__main__": + + logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field']) + for _ in range(1): + tree = train_tree(200, logger) + visualize(tree) + + # env = MultiColumnAdditionSymbolic() + + # while True: + # sai = env.request_demo() + # env.apply_sai(sai[0], sai[1], sai[2]) + # env.render() diff --git a/sandbox/run_decision_tree_multi-v1.py b/sandbox/run_decision_tree_multi-v1.py index 225331a9bcdeb4e4068ba3e2a755fa61c13d09b1..bfe2d7d3706ea03245fae08e14b02f4bfc7fd811 100644 --- a/sandbox/run_decision_tree_multi-v1.py +++ b/sandbox/run_decision_tree_multi-v1.py @@ -8,54 +8,70 @@ from tutorenvs.multicolumn import MultiColumnAdditionSymbolic import numpy as np from sklearn.tree import DecisionTreeClassifier +from sklearn.feature_extraction import DictVectorizer -def train_tree(n=10): +from tutorenvs.utils import DataShopLogger + +def train_tree(n=10, logger=None): X = [] y = [] + dv = DictVectorizer() + actions = [] + action_mapping = {} + rev_action_mapping = {} tree = DecisionTreeClassifier() - env = MultiColumnAdditionSymbolic() + env = MultiColumnAdditionSymbolic(logger=logger) p = 0 while p < n: - state = env.get_rl_state() + # make a copy of the state + state = {a: env.state[a] for a in env.state} env.render() - try: - response = decision_tree.predict(state) - except: - response = None + if rev_action_mapping == {}: + sai = None + else: + vstate = dv.transform([state]) + sai = rev_action_mapping[tree.predict(vstate)[0]] - if response is None: + if sai is None: print('hint') sai = env.request_demo() + sai = (sai[0], sai[1], sai[2]['value']) - else: - sai = (response['selection'], - response['action'], - response['inputs']) - - reward = env.apply_sai(sai[0], sai[1], sai[2]) + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) print('reward', reward) if reward < 0: print('hint') sai = env.request_demo() - reward = env.apply_sai(sai[0], sai[1], sai[2]) + sai = (sai[0], sai[1], sai[2]['value']) + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) X.append(state) y.append(sai) - if sai.selection == "done" and reward == 1.0: + Xv = dv.fit_transform(X) + actions = list(set(y)) + action_mapping = {l: i for i, l in enumerate(actions)} + rev_action_mapping = {i: l for i, l in enumerate(actions)} + yv = [action_mapping[l] for l in y] + + tree.fit(Xv, yv) + + if sai[0] == "done" and reward == 1.0: p += 1 return tree if __name__ == "__main__": - # tree = train_tree(10) - env = MultiColumnAdditionSymbolic() + logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field']) + for _ in range(10): + tree = train_tree(100, logger) + # env = MultiColumnAdditionSymbolic() - while True: - sai = env.request_demo() - env.apply_sai(sai[0], sai[1], sai[2]) - env.render() + # while True: + # sai = env.request_demo() + # env.apply_sai(sai[0], sai[1], sai[2]) + # env.render() diff --git a/sandbox/run_dual_decision_tree_multi.py b/sandbox/run_dual_decision_tree_multi.py new file mode 100644 index 0000000000000000000000000000000000000000..21e5fed3b8dafce41b56ef44722f3aec19b27973 --- /dev/null +++ b/sandbox/run_dual_decision_tree_multi.py @@ -0,0 +1,100 @@ +import gym +from stable_baselines.common import make_vec_env +from stable_baselines.common.policies import MlpPolicy +from stable_baselines import PPO2 +import tutorenvs +from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv +from tutorenvs.multicolumn import MultiColumnAdditionSymbolic +import numpy as np + +from sklearn.tree import DecisionTreeClassifier +from sklearn.feature_extraction import DictVectorizer + +from tutorenvs.utils import DataShopLogger + +def train_tree(n=10, logger=None): + X = [] + y_sel = [] + y_inp = [] + dv = DictVectorizer() + selections = [] + selection_mapping = {} + rev_selection_mapping = {} + selection_tree = DecisionTreeClassifier() + + inputs = [] + input_mapping = {} + rev_input_mapping = {} + input_tree = DecisionTreeClassifier() + + env = MultiColumnAdditionSymbolic(logger=logger) + + p = 0 + while p < n: + # make a copy of the state + state = {a: env.state[a] for a in env.state} + env.render() + + if rev_selection_mapping == {}: + sai = None + else: + vstate = dv.transform([state]) + sel = rev_selection_mapping[selection_tree.predict(vstate)[0]] + if sel == 'done': + act = 'ButtonPressed' + else: + act = "UpdateField" + inp = rev_input_mapping[input_tree.predict(vstate)[0]] + sai = (sel, act, inp) + + if sai is None: + print('hint') + sai = env.request_demo() + sai = (sai[0], sai[1], sai[2]['value']) + + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) + print('reward', reward) + + if reward < 0: + print('hint') + sai = env.request_demo() + sai = (sai[0], sai[1], sai[2]['value']) + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) + + X.append(state) + y_sel.append(sai[0]) + y_inp.append(sai[2]) + + Xv = dv.fit_transform(X) + + selections = list(set(y_sel)) + selection_mapping = {l: i for i, l in enumerate(selections)} + rev_selection_mapping = {i: l for i, l in enumerate(selections)} + + inputs = list(set(y_inp)) + input_mapping = {l: i for i, l in enumerate(inputs)} + rev_input_mapping = {i: l for i, l in enumerate(inputs)} + + yv_sel = [selection_mapping[l] for l in y_sel] + yv_inp = [input_mapping[l] for l in y_inp] + + selection_tree.fit(Xv, yv_sel) + input_tree.fit(Xv, yv_inp) + + if sai[0] == "done" and reward == 1.0: + print("Problem %s of %s" % (p, n)) + p += 1 + + return selection_tree, input_tree + +if __name__ == "__main__": + + logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field']) + for _ in range(1): + tree = train_tree(1000, logger) + # env = MultiColumnAdditionSymbolic() + + # while True: + # sai = env.request_demo() + # env.apply_sai(sai[0], sai[1], sai[2]) + # env.render() diff --git a/sandbox/run_single_decision_tree_multi.py b/sandbox/run_single_decision_tree_multi.py new file mode 100644 index 0000000000000000000000000000000000000000..c360acd4d19376956841f97d3e140e2b23e95959 --- /dev/null +++ b/sandbox/run_single_decision_tree_multi.py @@ -0,0 +1,78 @@ +import gym +from stable_baselines.common import make_vec_env +from stable_baselines.common.policies import MlpPolicy +from stable_baselines import PPO2 +import tutorenvs +from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv +from tutorenvs.multicolumn import MultiColumnAdditionSymbolic +import numpy as np + +from sklearn.tree import DecisionTreeClassifier +from sklearn.feature_extraction import DictVectorizer + +from tutorenvs.utils import DataShopLogger + +def train_tree(n=10, logger=None): + X = [] + y = [] + dv = DictVectorizer() + actions = [] + action_mapping = {} + rev_action_mapping = {} + selection_tree = DecisionTreeClassifier() + input_tree = DecisionTreeClassifier() + env = MultiColumnAdditionSymbolic(logger=logger) + + p = 0 + while p < n: + # make a copy of the state + state = {a: env.state[a] for a in env.state} + env.render() + + if rev_action_mapping == {}: + sai = None + else: + vstate = dv.transform([state]) + sai = rev_action_mapping[tree.predict(vstate)[0]] + + if sai is None: + print('hint') + sai = env.request_demo() + sai = (sai[0], sai[1], sai[2]['value']) + + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) + print('reward', reward) + + if reward < 0: + print('hint') + sai = env.request_demo() + sai = (sai[0], sai[1], sai[2]['value']) + reward = env.apply_sai(sai[0], sai[1], {'value': sai[2]}) + + X.append(state) + y.append(sai) + + Xv = dv.fit_transform(X) + actions = set(y) + action_mapping = {l: i for i, l in enumerate(actions)} + rev_action_mapping = {i: l for i, l in enumerate(actions)} + yv = [action_mapping[l] for l in y] + + tree.fit(Xv, yv) + + if sai[0] == "done" and reward == 1.0: + p += 1 + + return tree + +if __name__ == "__main__": + + logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field']) + for _ in range(10): + tree = train_tree(100, logger) + # env = MultiColumnAdditionSymbolic() + + # while True: + # sai = env.request_demo() + # env.apply_sai(sai[0], sai[1], sai[2]) + # env.render() diff --git a/tutorenvs/multicolumn.py b/tutorenvs/multicolumn.py index b3c7e848adfe8e2dd57964cdf9b63bca210f8ec9..c5e70d4600d677d1ed6b9a7352f8692ffdd63560 100644 --- a/tutorenvs/multicolumn.py +++ b/tutorenvs/multicolumn.py @@ -12,6 +12,7 @@ import numpy as np from PIL import Image, ImageDraw from tutorenvs.utils import BaseOppEnv +from tutorenvs.utils import DataShopLogger def custom_add(a, b): if a == '': @@ -22,10 +23,16 @@ def custom_add(a, b): class MultiColumnAdditionSymbolic: - def __init__(self): + def __init__(self, logger=None): """ Creates a state and sets a random problem. """ + if logger is None: + print("CREATING LOGGER") + self.logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field']) + else: + self.logger = logger + self.logger.set_student() self.set_random_problem() # self.reset("", "", "", "", "") @@ -87,6 +94,7 @@ class MultiColumnAdditionSymbolic: self.num_correct_steps = 0 self.num_incorrect_steps = 0 + self.num_hints = 0 self.state = { 'hundreds_carry': '', @@ -190,6 +198,7 @@ class MultiColumnAdditionSymbolic: if add_counts: d.text((0, 0), str(self.num_incorrect_steps), fill="red") d.text((0, 10), str(self.num_correct_steps), fill="green") + d.text((0, 20), str(self.num_hints), fill="blue") if add_dot: d.ellipse((add_dot[0]-3, add_dot[1]-3, add_dot[0]+3, add_dot[1]+3), @@ -225,9 +234,12 @@ class MultiColumnAdditionSymbolic: return state_output def set_random_problem(self): - upper = str(randint(1,999)) - lower = str(randint(1,999)) + # upper = str(randint(1,999)) + # lower = str(randint(1,999)) + upper = str(randint(1,9)) + lower = str(randint(1,9)) self.reset(upper=upper, lower=lower) + self.logger.set_problem("%s_%s" % (upper, lower)) def apply_sai(self, selection, action, inputs): """ @@ -236,10 +248,14 @@ class MultiColumnAdditionSymbolic: reward = self.evaluate_sai(selection, action, inputs) if reward > 0: + outcome = "CORRECT" self.num_correct_steps += 1 else: + outcome = "INCORRECT" self.num_incorrect_steps += 1 + self.logger.log_step(selection, action, inputs['value'], outcome, [selection]) + if reward == -1.0: return reward @@ -352,6 +368,15 @@ class MultiColumnAdditionSymbolic: # TODO still need to rewrite for multi column arith def request_demo(self): + demo = self.get_demo() + feedback_text = "selection: %s, action: %s, input: %s" % (demo[0], + demo[1], demo[2]['value']) + self.logger.log_hint(feedback_text, [demo[0]]) + self.num_hints += 1 + + return demo + + def get_demo(self): """ Returns a correct next-step SAI """