diff --git a/sandbox/run_decision_tree_multi-v1.py b/sandbox/run_decision_tree_multi-v1.py new file mode 100644 index 0000000000000000000000000000000000000000..225331a9bcdeb4e4068ba3e2a755fa61c13d09b1 --- /dev/null +++ b/sandbox/run_decision_tree_multi-v1.py @@ -0,0 +1,61 @@ +import gym +from stable_baselines.common import make_vec_env +from stable_baselines.common.policies import MlpPolicy +from stable_baselines import PPO2 +import tutorenvs +from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv +from tutorenvs.multicolumn import MultiColumnAdditionSymbolic +import numpy as np + +from sklearn.tree import DecisionTreeClassifier + +def train_tree(n=10): + X = [] + y = [] + tree = DecisionTreeClassifier() + env = MultiColumnAdditionSymbolic() + + p = 0 + while p < n: + state = env.get_rl_state() + env.render() + + try: + response = decision_tree.predict(state) + except: + response = None + + if response is None: + print('hint') + sai = env.request_demo() + + else: + sai = (response['selection'], + response['action'], + response['inputs']) + + reward = env.apply_sai(sai[0], sai[1], sai[2]) + print('reward', reward) + + if reward < 0: + print('hint') + sai = env.request_demo() + reward = env.apply_sai(sai[0], sai[1], sai[2]) + + X.append(state) + y.append(sai) + + if sai.selection == "done" and reward == 1.0: + p += 1 + + return tree + +if __name__ == "__main__": + + # tree = train_tree(10) + env = MultiColumnAdditionSymbolic() + + while True: + sai = env.request_demo() + env.apply_sai(sai[0], sai[1], sai[2]) + env.render() diff --git a/sandbox/run_ppo_fractions.py b/sandbox/run_ppo_fractions.py index ab52f3cf1173fa1d4464b7534751e893e2b46246..3df1aaf0c32492e281e9c6b29211ced133ff9973 100644 --- a/sandbox/run_ppo_fractions.py +++ b/sandbox/run_ppo_fractions.py @@ -10,7 +10,7 @@ import numpy as np if __name__ == "__main__": # multiprocess environment - env = make_vec_env('FractionArith-v1', n_envs=8) + env = make_vec_env('FractionArith-v0', n_envs=1) model = PPO2(MlpPolicy, env, verbose=1, gamma=0.5, tensorboard_log="./ppo_FractionArith-v0/") diff --git a/sandbox/run_ppo_multi-v3.py b/sandbox/run_ppo_multi-v3.py index 5270a23d05a6f09e08308bca85e7e7acb14bc588..4bfa8ec3270b3059a7a8c74e946adaadc6f4528c 100644 --- a/sandbox/run_ppo_multi-v3.py +++ b/sandbox/run_ppo_multi-v3.py @@ -1,8 +1,10 @@ import gym -from stable_baselines.common import make_vec_env -from stable_baselines.common.policies import MlpPolicy -from stable_baselines.common.policies import CnnPolicy -from stable_baselines import PPO2 +from stable_baselines3 import PPO +from stable_baselines3.ppo import CnnPolicy +# from stable_baselines3.ppo import MlpPolicy +from stable_baselines3.common import make_vec_env +# from stable_baselines3 import DQN +# from stable_baselines3.dqn import CnnPolicy import tutorenvs import numpy as np @@ -10,14 +12,27 @@ import numpy as np if __name__ == "__main__": # multiprocess environment - env = make_vec_env('MultiColumnArith-v3', n_envs=9) - model = PPO2(CnnPolicy, env, verbose=1, - gamma=0.95, + env = make_vec_env('MultiColumnArith-v3', n_envs=1) + model = PPO(CnnPolicy, env, verbose=1, + gamma=0.9, policy_kwargs={'net_arch': [65, 65, {'vf': [65], 'pi': [65]}]}, tensorboard_log="./tensorboard/v3/") + # env = make_vec_env('MultiColumnArith-v3', n_envs=1) + # model = DQN(CnnPolicy, env, verbose=1, + # gamma=0.8, + # # exploration_fraction=.0, + # # exploration_final_eps=0.3, + # # prioritized_replay=True, + # # policy_kwargs={'layers': [64, 64, 64]}, + # # policy_kwargs={'net_arch': [65, 65, {'vf': [65], 'pi': [65]}]}, + # tensorboard_log="./tensorboard/v3/") + + # model = PPO.load('multi-v3') + while True: - model.learn(total_timesteps=10) + model.learn(total_timesteps=100) + model.save('multi-v3') # To demonstrate saving and loading # model.save("ppo2_multicolumn-v0") @@ -27,7 +42,7 @@ if __name__ == "__main__": # Enjoy trained agent obs = env.reset() rwd = 0 - for _ in range(100000): + for _ in range(3000000): action, _states = model.predict(obs) obs, rewards, dones, info = env.step(action) rwd += np.sum(rewards) diff --git a/tutorenvs/fractions.py b/tutorenvs/fractions.py index 3e1944fcc8b75c6e07f49a0a69499e867aa68697..793c8d7109c86979ed0480590515fca675b09361 100644 --- a/tutorenvs/fractions.py +++ b/tutorenvs/fractions.py @@ -2,6 +2,8 @@ from random import randint from random import choice from pprint import pprint +import cv2 # pytype:disable=import-error +from PIL import Image, ImageDraw import gym from gym import error, spaces, utils from gym.utils import seeding @@ -9,6 +11,8 @@ from sklearn.feature_extraction import FeatureHasher from sklearn.feature_extraction import DictVectorizer import numpy as np +from tutorenvs.utils import DataShopLogger + class FractionArithSymbolic: @@ -16,6 +20,12 @@ class FractionArithSymbolic: """ Creates a state and sets a random problem. """ + self.num_correct_steps = 0 + self.num_incorrect_steps = 0 + self.num_hints = 0 + + self.logger = DataShopLogger('FractionsTutor', extra_kcs=['ptype_field']) + self.logger.set_student() self.set_random_problem() # self.reset("", "", "", "", "") @@ -65,8 +75,13 @@ class FractionArithSymbolic: 'answer_denom' ] - def render(self): - output = "%s\t\t%s\n---\t%s\t---\t=\n%s\t\t%s\n\nConvert? | %s |\n\n%s\t\t%s\t\t%s\n---\t%s\t---\t=\t---\n%s\t\t%s\t\t%s\n" % (self.state['initial_num_left'], + def render(self, add_dot=None): + img = self.get_image(add_counts=True, add_dot=add_dot) + cv2.imshow('vecenv', np.array(img)) + cv2.waitKey(1) + + def get_image(self, add_counts=False, add_dot=None): + output = "{:>3} {:>3}\n---- {} ---- =\n{:>3} {:>3}\n\nConvert? | {} |\n\n{:>3} {:>3} {:>3}\n---- {} ---- = ----\n{:>3} {:>3} {:>3}\n".format(self.state['initial_num_left'], self.state['initial_num_right'], self.state['initial_operator'], self.state['initial_denom_left'], @@ -80,10 +95,28 @@ class FractionArithSymbolic: self.state['convert_denom_right'], self.state['answer_denom']) - print("------------------------------------------------------") - print(output) - print("------------------------------------------------------") - print() + img = Image.new('RGB', (125, 150), color="white") + d = ImageDraw.Draw(img) + d.text((10, 10), output, fill='black') + + # Draw input fields + + # ones + # if state['answer_ones'] == " ": + # d.rectangle(((34, 71), (38, 79)), fill=None, outline='black') + + # append correct/incorrect counts + if add_counts: + d.text((100, 0), str(self.num_hints), fill="yellow") + d.text((100, 10), str(self.num_incorrect_steps), fill="red") + d.text((100, 20), str(self.num_correct_steps), fill="green") + + # for eyes :) + # if add_dot: + # d.ellipse((add_dot[0]-3, add_dot[1]-3, add_dot[0]+3, add_dot[1]+3), + # fill=None, outline='blue') + + return img def get_state(self): """ @@ -120,6 +153,14 @@ class FractionArithSymbolic: operator = choice(['+', '*']) self.reset(num1, denom1, operator, num2, denom2) + self.logger.set_problem("%s_%s_%s_%s_%s" % (num1, denom1, operator, num2, denom2)) + + if operator == "+" and denom1 == denom2: + self.ptype = 'AS' + if operator == "+" and denom1 != denom2: + self.ptype = 'AD' + else: + self.ptype = 'M' def apply_sai(self, selection, action, inputs): """ @@ -127,6 +168,18 @@ class FractionArithSymbolic: """ self.steps += 1 reward = self.evaluate_sai(selection, action, inputs) + + if reward > 0: + outcome = "CORRECT" + self.num_correct_steps += 1 + else: + outcome = "INCORRECT" + self.num_incorrect_steps += 1 + + self.logger.log_step(selection, action, inputs['value'], outcome, [self.ptype + '_' + selection]) + + # Render output? + self.render() if reward == -1.0: return reward @@ -140,6 +193,7 @@ class FractionArithSymbolic: else: self.state[selection] = inputs['value'] + return reward def evaluate_sai(self, selection, action, inputs): @@ -237,6 +291,15 @@ class FractionArithSymbolic: raise Exception("evaluate_sai logic missing") def request_demo(self): + demo = self.get_demo() + feedback_text = "selection: %s, action: %s, input: %s" % (demo[0], + demo[1], demo[2]['value']) + self.logger.log_hint(feedback_text, [self.ptype + '_' + demo[0]]) + self.num_hints += 1 + + return demo + + def get_demo(self): """ Returns a correct next-step SAI """ diff --git a/tutorenvs/multicolumn.py b/tutorenvs/multicolumn.py index e5091f50bb94e7231760426af082cbf377ca0b88..b3c7e848adfe8e2dd57964cdf9b63bca210f8ec9 100644 --- a/tutorenvs/multicolumn.py +++ b/tutorenvs/multicolumn.py @@ -244,7 +244,7 @@ class MultiColumnAdditionSymbolic: return reward if selection == "done": - # print("DONE! Only took %i steps." % (self.num_correct_steps + self.num_incorrect_steps)) + print("DONE! Only took %i steps." % (self.num_correct_steps + self.num_incorrect_steps)) # self.render() # print() # pprint(self.state) @@ -355,68 +355,50 @@ class MultiColumnAdditionSymbolic: """ Returns a correct next-step SAI """ - if (self.state['initial_operator'] == '+' and - self.state['initial_denom_left'] == self.state['initial_denom_right']): - if self.state['answer_num'] == "": - return ('answer_num', "UpdateField", - {'value': str(int(self.state['initial_num_left']) + - int(self.state['initial_num_right']))}) - - if self.state['answer_denom'] == "": - return ('answer_denom', "UpdateField", - {'value': self.state['initial_denom_left']}) - - return ('done', "ButtonPressed", {'value': -1}) - - if (self.state['initial_operator'] == "+" and - self.state['initial_denom_left'] != self.state['initial_denom_right']): - - if self.state['check_convert'] == "": - return ('check_convert', 'UpdateField', {"value": 'x'}) - - if self.state['convert_denom_left'] == "": - return ('convert_denom_left', "UpdateField", - {'value': str(int(self.state['initial_denom_left']) * - int(self.state['initial_denom_right']))}) - - if self.state['convert_num_left'] == "": - return ('convert_num_left', "UpdateField", - {'value': str(int(self.state['initial_num_left']) * - int(self.state['initial_denom_right']))}) - - if self.state['convert_denom_right'] == "": - return ('convert_denom_right', "UpdateField", - {'value': str(int(self.state['initial_denom_left']) * - int(self.state['initial_denom_right']))}) - - if self.state['convert_num_right'] == "": - return ('convert_num_right', "UpdateField", - {'value': str(int(self.state['initial_denom_left']) * - int(self.state['initial_num_right']))}) - - if self.state['answer_num'] == "": - return ('answer_num', "UpdateField", - {'value': str(int(self.state['convert_num_left']) + - int(self.state['convert_num_right']))}) - - if self.state['answer_denom'] == "": - return ('answer_denom', "UpdateField", - {'value': self.state['convert_denom_right']}) + if (self.state['answer_ones'] == self.correct_ones and + self.state['answer_tens'] == self.correct_tens and + self.state['answer_hundreds'] == self.correct_hundreds and + self.state['answer_thousands'] == self.correct_thousands): return ('done', "ButtonPressed", {'value': -1}) - if (self.state['initial_operator'] == "*"): - if self.state['answer_num'] == "": - return ('answer_num', "UpdateField", - {'value': str(int(self.state['initial_num_left']) * - int(self.state['initial_num_right']))}) + if self.state['answer_ones'] == '': + return ('answer_ones', 'UpdateField', {'value': str(self.correct_ones)}) - if self.state['answer_denom'] == "": - return ('answer_denom', "UpdateField", - {'value': str(int(self.state['initial_denom_left']) * - int(self.state['initial_denom_right']))}) - - return ('done', "ButtonPressed", {'value': -1}) + if (self.state["ones_carry"] == '' and + len(custom_add(self.state['upper_ones'], + self.state['lower_ones'])) == 2): + return ('ones_carry', 'UpdateField', + {'value': custom_add(self.state['upper_ones'], + self.state['lower_ones'])[0]}) + + if self.state['answer_tens'] == '': + return ('answer_tens', 'UpdateField', {'value': str(self.correct_tens)}) + + if self.state["tens_carry"] == '': + if (len(custom_add(custom_add(self.state['upper_tens'], + self.state['lower_tens']), self.state['ones_carry'])) == 2): + return ('tens_carry', 'UpdateField', + {'value': + custom_add(custom_add(self.state['upper_tens'], + self.state['lower_tens']), + self.state['ones_carry'])[0]}) + + if self.state['answer_hundreds'] == '': + return ('answer_hundreds', 'UpdateField', {'value': str(self.correct_hundreds)}) + + if self.state["hundreds_carry"] == '': + if (len(custom_add(custom_add(self.state['upper_hundreds'], + self.state['lower_hundreds']), + self.state['tens_carry'])) == 2): + return ('hundreds_carry', 'UpdateField', + {'value': + custom_add(custom_add(self.state['upper_hundreds'], + self.state['lower_hundreds']), + self.state['tens_carry'])[0]}) + + if self.state['answer_thousands'] == '': + return ('answer_thousands', 'UpdateField', {'value': str(self.correct_thousands)}) raise Exception("request demo - logic missing") @@ -511,7 +493,7 @@ class MultiColumnAdditionPixelEnv(gym.Env): def get_rl_state(self): img = self.tutor.get_image().convert('L') - return np.expand_dims(np.array(img)/255, axis=2) + return np.expand_dims(np.array(img), axis=2) def __init__(self): self.tutor = MultiColumnAdditionSymbolic() @@ -519,8 +501,8 @@ class MultiColumnAdditionPixelEnv(gym.Env): print('shape = ', self.get_rl_state().shape) - self.observation_space = spaces.Box(low=0.0, - high=1.0, shape=self.get_rl_state().shape, dtype=np.float32) + self.observation_space = spaces.Box(low=0, + high=255, shape=self.get_rl_state().shape, dtype=np.uint8) self.action_space = spaces.MultiDiscrete([n_selections, 10]) def step(self, action): @@ -577,13 +559,13 @@ class MultiColumnAdditionPerceptEnv(gym.Env): 'tens_carry', 'answer_hundreds', 'hundreds_carry', 'answer_thousands'] self.target_xy = [ - (36, 83), + (36, 75), (30, 15), - (30, 83), + (30, 75), (24, 15), - (24, 83), + (24, 75), (18, 15), - (18, 83) + (18, 75) ] self.current_target = 0 @@ -595,41 +577,52 @@ class MultiColumnAdditionPerceptEnv(gym.Env): print('shape = ', self.get_rl_state().shape) - self.observation_space = spaces.Box(low=0.0, - high=1.0, shape=self.get_rl_state().shape, dtype=np.float32) + self.observation_space = spaces.Box(low=0, + high=255, shape=self.get_rl_state().shape, dtype=np.uint8) # self.action_space = spaces.MultiDiscrete([n_selections, 10]) - self.action_space = spaces.Discrete(15) + self.action_space = spaces.Discrete(12) def set_xy(self): self.x, self.y = self.target_xy[self.current_target] def get_rl_state(self): img = self.tutor.get_image().convert('L') - x = self.x - 50 - y = self.y - 90 + x_multiplier = 0.75 + y_multiplier = 1.5 + x = round(self.x - (25 * x_multiplier)) + y = round(self.y - (45 * y_multiplier)) + + translate = img.transform((round(img.size[0]*x_multiplier), + round(img.size[1]*y_multiplier)), Image.AFFINE, (1, 0, x, 0, 1, y), fillcolor='white') + + # Pretty output + cv2.imshow('translated', np.array(translate)) + cv2.waitKey(1) + self.render() - translate = img.transform((img.size[0]*2, img.size[1]*2), Image.AFFINE, (1, 0, x, 0, 1, y)) - # cv2.imshow('translated', np.array(translate)) - # cv2.waitKey(1) - return np.expand_dims(np.array(translate)/255, axis=2) + return np.expand_dims(np.array(translate), axis=2) def step(self, action): s = None - reward = -0.01 - + reward = -1 + + # if action == 0: + # # left + # self.x -= 5 + # elif action == 1: + # # right + # self.x += 5 + # elif action == 2: + # # up + # self.y += 5 + # elif action == 3: + # # down + # self.y -= 5 if action == 0: - # left - self.x -= 5 + self.current_target = (self.current_target + 1) % len(self.targets) + self.set_xy() + elif action == 1: - # right - self.x += 5 - elif action == 2: - # up - self.y += 5 - elif action == 3: - # down - self.y -= 5 - elif action == 4: s = "done" a = "ButtonPressed" i = -1 @@ -654,28 +647,30 @@ class MultiColumnAdditionPerceptEnv(gym.Env): s = "hundreds_carry" a = 'UpdateField' - i = {'value': str(action - 5)} + i = {'value': str(action - 2)} if s != None: reward = self.tutor.apply_sai(s, a, i) + # code to skip completed fields + # skipper = 0 + # original_target = self.current_target + # while self.tutor.state[self.targets[self.current_target]] != '': + # self.current_target = (self.current_target + 1) % len(self.targets) + # skipper += 1 + # if skipper > 7: + # self.current_target = original_target + # break + # self.set_xy() + self.x = min(max(self.x, 0), 50) self.y = min(max(self.y, 0), 90) obs = self.get_rl_state() done = (s == 'done' and reward == 1.0) info = {} - return obs, reward, done, info - # s, a, i = self.decode(action) - # # print(s, a, i) - # # print() - # reward = self.tutor.apply_sai(s, a, i) - # # print(reward) - # - # obs = self.get_rl_state() - # # pprint(state) - # info = {} + # self.render() return obs, reward, done, info diff --git a/tutorenvs/utils.py b/tutorenvs/utils.py index 43c100c5a6d1bf1affac01db5839306adfc1b5db..07decd2cecbb6a3313ca2b282a6d987a10328196 100644 --- a/tutorenvs/utils.py +++ b/tutorenvs/utils.py @@ -1,3 +1,7 @@ +import os +import time +import uuid +from datetime import datetime from pprint import pprint import gym @@ -5,6 +9,145 @@ from gym import error, spaces, utils from sklearn.feature_extraction import DictVectorizer import numpy as np +class DataShopLogger(): + + def __init__(self, domain = "tutorenv", extra_kcs=None): + # Create log file + if not os.path.exists("log/"): + os.mkdir("log/") + self.filename = "log/" + domain + "_" + time.strftime("%Y-%m-%d-%H-%M-%s") + ".txt" + + headers = ['Anon Student Id', + 'Session Id', + 'Transaction Id', + 'Time', + 'Time Zone', + 'Student Response Type', + 'Tutor Response Type', + 'Level (Domain)', + 'Problem Name', + 'Problem Start Time', + 'Step Name', + 'Selection', + 'Action', + 'Input', + 'Feedback Text', + 'Outcome', + 'CF (Problem Context)', + 'KC (Single-KC)'] + + if extra_kcs is not None: + for kc in extra_kcs: + headers.append('KC ({})'.format(kc)) + + with open(self.filename, 'a+') as fout: + fout.write("\t".join(headers) + "\n") + + self.time = datetime.now().timestamp() + + self.student_id = None + self.session_id = None + self.level_domain = domain + self.timezone = "UTC" + + def set_student(self, student_id=None): + if student_id is None: + student_id = uuid.uuid4() + self.student_id = student_id + self.session_id = uuid.uuid4() + + def set_problem(self, problem_name=None): + if problem_name is None: + problem_name = uuid.uuid4() + self.problem_name = problem_name + self.time += 1 + self.problem_start = datetime.fromtimestamp(self.time).strftime('%m/%d/%Y %H:%M:%S') + self.step_count = 1 + + def log_hint(self, feedback_text, kcs=None): + if self.student_id is None: + raise Exception("No student ID") + if self.problem_name is None: + raise Exception("No problem name") + + transaction_id = uuid.uuid4() + self.time += 1 + time = datetime.fromtimestamp(self.time).strftime('%m/%d/%Y %H:%M:%S') + student_response = "" + tutor_response = "HINT_MSG" + self.step_count += 1 + selection = "" + action = "" + inp = "" + outcome = "HINT" + + datum = [self.student_id, + self.session_id, + transaction_id, + time, + self.timezone, + student_response, + tutor_response, + self.level_domain, + self.problem_name, + self.problem_start, + self.step_count, + selection, + action, + inp, + feedback_text, + outcome, + "", + "Single-KC"] + + if kcs is not None: + for kc in kcs: + datum.append(kc) + + with open(self.filename, 'a+') as fout: + fout.write("\t".join(str(v) for v in datum) + "\n") + + def log_step(self, selection, action, inp, outcome, kcs=None): + if self.student_id is None: + raise Exception("No student ID") + if self.problem_name is None: + raise Exception("No problem name") + + transaction_id = uuid.uuid4() + self.time += 1 + time = datetime.fromtimestamp(self.time).strftime('%m/%d/%Y %H:%M:%S') + student_response = "ATTEMPT" + tutor_response = "HINT_MSG" + self.step_count += 1 + feedback_text = "" + + datum = [self.student_id, + self.session_id, + transaction_id, + time, + self.timezone, + student_response, + tutor_response, + self.level_domain, + self.problem_name, + self.problem_start, + self.step_count, + selection, + action, + inp, + feedback_text, + outcome, + "", + "Single-KC"] + + if kcs is not None: + for kc in kcs: + datum.append(kc) + + with open(self.filename, 'a+') as fout: + fout.write("\t".join(str(v) for v in datum) + "\n") + + class OnlineDictVectorizer(): def __init__(self, n_features):