diff --git a/sandbox/fractions/run_dual_decision_tree.py b/sandbox/fractions/run_dual_decision_tree.py
index 5a6443f34752e8eca0a700281ef148d807937312..4ecae226cdb953c4cdf5ea64ef7727d3eb8531b7 100644
--- a/sandbox/fractions/run_dual_decision_tree.py
+++ b/sandbox/fractions/run_dual_decision_tree.py
@@ -3,6 +3,8 @@ from tutorenvs.fractions import FractionArithSymbolic
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction import DictVectorizer
+from tutorenvs.utils import DataShopLogger
+
def train_tree(n=10, logger=None):
X = []
@@ -19,7 +21,7 @@ def train_tree(n=10, logger=None):
rev_input_mapping = {}
input_tree = DecisionTreeClassifier()
- env = FractionArithSymbolic()
+ env = FractionArithSymbolic(logger)
p = 0
hints = 0
@@ -89,5 +91,6 @@ def train_tree(n=10, logger=None):
if __name__ == "__main__":
+ logger = DataShopLogger('FractionsTutor', extra_kcs=['field'])
for _ in range(1):
- tree = train_tree(500)
+ tree = train_tree(800, logger)
diff --git a/sandbox/fractions/train_ppo.py b/sandbox/fractions/train_ppo_number.py
similarity index 100%
rename from sandbox/fractions/train_ppo.py
rename to sandbox/fractions/train_ppo_number.py
diff --git a/sandbox/fractions/tune_ppo.py b/sandbox/fractions/tune_ppo_number.py
similarity index 100%
rename from sandbox/fractions/tune_ppo.py
rename to sandbox/fractions/tune_ppo_number.py
diff --git a/sandbox/multicolumn/train_ppo.py b/sandbox/multicolumn/train_ppo_number.py
similarity index 100%
rename from sandbox/multicolumn/train_ppo.py
rename to sandbox/multicolumn/train_ppo_number.py
diff --git a/sandbox/multicolumn/tune_ppo.py b/sandbox/multicolumn/tune_ppo_number.py
similarity index 100%
rename from sandbox/multicolumn/tune_ppo.py
rename to sandbox/multicolumn/tune_ppo_number.py
diff --git a/tutorenvs/__init__.py b/tutorenvs/__init__.py
index 5f23777d8fbf804a565a7bd682f0e37953e74a26..04a1fef8ba481e8fd183da3f5eca58660dda109c 100644
--- a/tutorenvs/__init__.py
+++ b/tutorenvs/__init__.py
@@ -13,12 +13,12 @@ register(
register(
id='FractionArith-v1',
- entry_point='tutorenvs:FractionArithDigitsEnv',
+ entry_point='tutorenvs:FractionArithOppEnv',
)
register(
id='FractionArith-v2',
- entry_point='tutorenvs:FractionArithOppEnv',
+ entry_point='tutorenvs:FractionArithDigitsEnv',
)
# TODO no pixel fractions yet.
diff --git a/tutorenvs/fractions.py b/tutorenvs/fractions.py
index 1637756750fee9514f49d869ba566e63c85e4a3f..34d64f9c9868be407359102b0c3a06229d98536f 100644
--- a/tutorenvs/fractions.py
+++ b/tutorenvs/fractions.py
@@ -73,7 +73,7 @@ class FractionArithSymbolic:
def get_possible_args(self):
return ['initial_num_left',
'initial_denom_left',
- 'initial_operator',
+ # 'initial_operator',
'initial_num_right',
'initial_denom_right',
'convert_num_left',
@@ -396,9 +396,6 @@ class FractionArithNumberEnv(gym.Env):
self.n_steps = 0
self.max_steps = 100000
- def get_rl_state(self):
- return self.tutor.state
-
def step(self, action):
self.n_steps += 1
@@ -446,7 +443,7 @@ class FractionArithNumberEnv(gym.Env):
self.n_steps = 0
self.tutor.set_random_problem()
# self.render()
- state = self.get_rl_state()
+ state = self.tutor.state
obs = self.dv.fit_transform([state])[0]
return obs
@@ -560,59 +557,40 @@ class FractionArithDigitsEnv(gym.Env):
class FractionArithOppEnv(gym.Env):
metadata = {'render.modes': ['human']}
+ def __init__(self):
+ self.tutor = FractionArithSymbolic()
+ n_selections = len(self.tutor.get_possible_selections())
+ n_features = 2000
+ n_operators = len(self.get_rl_operators())
+ n_args = len(self.tutor.get_possible_args())
+ self.dv = OnlineDictVectorizer(n_features)
+ self.observation_space = spaces.Box(
+ low=0.0, high=1.0, shape=(1, n_features), dtype=np.float32)
+ self.action_space = spaces.MultiDiscrete([n_selections, n_operators,
+ n_args, n_args])
+ self.n_steps = 0
+ self.max_steps = 100000
+
+
def get_rl_operators(self):
return ['copy',
'add',
'multiply']
def get_rl_state(self):
- # self.state = {
- # 'initial_num_left': num1,
- # 'initial_denom_left': denom1,
- # 'initial_operator': operator,
- # 'initial_num_right': num2,
- # 'initial_denom_right': denom2,
- # 'check_convert': '',
- # 'convert_num_left': '',
- # 'convert_denom_left': '',
- # 'convert_operator': operator,
- # 'convert_num_right': '',
- # 'convert_denom_right': '',
- # 'answer_num': '',
- # 'answer_denom': '',
- # }
-
- state = {}
+ state = self.tutor.state.copy()
for attr in self.tutor.state:
- if attr == "initial_operator" or attr == "convert_operator":
- state[attr] = self.tutor.state[attr] == "+"
- continue
-
- # just whether or not there is a value
- state[attr] = self.tutor.state[attr] != ""
-
- # equality
for attr2 in self.tutor.state:
+ if attr == "initial_operator" or attr == "convert_operator":
+ continue
+ if attr2 == "initial_operator" or attr2 == "convert_operator":
+ continue
if attr >= attr2:
continue
state['eq(%s,%s)' % (attr, attr2)] = self.tutor.state[attr] == self.tutor.state[attr2]
return state
- def __init__(self):
- self.tutor = FractionArithSymbolic()
- n_selections = len(self.tutor.get_possible_selections())
- n_features = len(self.get_rl_state())
- n_operators = len(self.get_rl_operators())
- n_args = len(self.tutor.get_possible_args())
- self.dv = DictVectorizer()
- self.dv.fit([self.get_rl_state()])
-
- self.observation_space = spaces.Box(low=0.0,
- high=1.0, shape=(1, n_features), dtype=np.float32)
- self.action_space = spaces.MultiDiscrete([n_selections, n_operators,
- n_args, n_args])
-
def step(self, action):
try:
s, a, i = self.decode(action)
@@ -628,7 +606,7 @@ class FractionArithOppEnv(gym.Env):
state = self.get_rl_state()
# pprint(state)
- obs = self.dv.transform([state])[0].toarray()
+ obs = self.dv.fit_transform([state])[0]
info = {}
return obs, reward, done, info
@@ -668,7 +646,7 @@ class FractionArithOppEnv(gym.Env):
def reset(self):
self.tutor.set_random_problem()
state = self.get_rl_state()
- obs = self.dv.transform([state])[0].toarray()
+ obs = self.dv.fit_transform([state])[0]
return obs
def render(self, mode='human', close=False):