Skip to content
Snippets Groups Projects
Commit 07b125ab authored by Chris MacLellan's avatar Chris MacLellan
Browse files

set fractions back to normal difficulty

parent 1c5ca9c8
No related branches found
No related tags found
No related merge requests found
......@@ -136,35 +136,12 @@ class TrialEvalCallback(EvalCallback):
if __name__ == "__main__":
# params = {
# 'batch_size': 32,
# 'n_steps': 16,
# 'gamma': 0.0,
# 'lr': 0.00017980950834568327,
# 'lr_schedule': 'constant',
# 'ent_coef': 0.07439893598338435,
# 'clip_range': 0.4,
# 'n_epochs': 10,
# 'gae_lambda': 0.95,
# 'max_grad_norm': 0.8,
# 'vf_coef': 0.13214811411452415,
# 'net_arch': 'medium',
# 'shared_arch': False,
# 'activation_fn': 'tanh'
# }
# params = {'activation_fn': 'relu', 'batch_size': 32, 'clip_range': 0.1,
# 'ent_coef': 0.008425259906148678, 'gae_lambda': 0.98, 'gamma':
# 0.0, 'lr': 0.0014548935455020253, 'lr_schedule': 'linear',
# 'max_grad_norm': 0.6, 'n_epochs': 5, 'n_steps': 64, 'net_arch':
# 'medium', 'shared_arch': True, 'vf_coef': 0.6725952403531438}
params = {'n_step_pow': 5.0, 'batches_pow': 5.0, 'gamma': 0.0, 'lr':
0.0014291278312354846, 'lr_schedule': 'linear', 'ent_coef':
0.042102094710275415, 'clip_range': 0.2, 'n_epochs': 5,
'gae_lambda': 0.92, 'max_grad_norm': 0.7, 'vf_coef':
0.40158288555773314, 'net_arch': 'medium', 'shared_arch': False,
'activation_fn': 'relu'}
params = {'activation_fn': 'relu', 'batches_pow': 9.0, 'clip_range': 0.2,
'ent_coef': 0.013748019113722395, 'gae_lambda': 0.99, 'gamma':
0.0, 'lr': 0.005533935261484844, 'lr_schedule': 'linear',
'max_grad_norm': 2, 'n_epochs': 5, 'n_step_pow': 9.0, 'net_arch':
'small', 'shared_arch': False, 'vf_coef': 0.5470657324084635}
kwargs = get_args(params)
......
......@@ -136,35 +136,13 @@ class TrialEvalCallback(EvalCallback):
if __name__ == "__main__":
# params = {
# 'batch_size': 32,
# 'n_steps': 16,
# 'gamma': 0.0,
# 'lr': 0.00017980950834568327,
# 'lr_schedule': 'constant',
# 'ent_coef': 0.07439893598338435,
# 'clip_range': 0.4,
# 'n_epochs': 10,
# 'gae_lambda': 0.95,
# 'max_grad_norm': 0.8,
# 'vf_coef': 0.13214811411452415,
# 'net_arch': 'medium',
# 'shared_arch': False,
# 'activation_fn': 'tanh'
# }
# params = {'activation_fn': 'relu', 'batch_size': 32, 'clip_range': 0.1,
# 'ent_coef': 0.008425259906148678, 'gae_lambda': 0.98, 'gamma':
# 0.0, 'lr': 0.0014548935455020253, 'lr_schedule': 'linear',
# 'max_grad_norm': 0.6, 'n_epochs': 5, 'n_steps': 64, 'net_arch':
# 'medium', 'shared_arch': True, 'vf_coef': 0.6725952403531438}
params = {'n_step_pow': 5.0, 'batches_pow': 5.0, 'gamma': 0.0, 'lr':
0.0014291278312354846, 'lr_schedule': 'linear', 'ent_coef':
0.042102094710275415, 'clip_range': 0.2, 'n_epochs': 5,
'gae_lambda': 0.92, 'max_grad_norm': 0.7, 'vf_coef':
0.40158288555773314, 'net_arch': 'medium', 'shared_arch': False,
'activation_fn': 'relu'}
# Best objective 6.266
params = {'activation_fn': 'tanh', 'batches_pow': 5.0, 'clip_range': 0.1,
'ent_coef': 0.032794340644757655, 'gae_lambda': 0.99, 'gamma':
0.0, 'lr': 4.5573009134737684e-05, 'lr_schedule': 'constant',
'max_grad_norm': 0.5, 'n_epochs': 10, 'n_step_pow': 8.0,
'net_arch': 'tiny', 'shared_arch': True, 'vf_coef':
0.23962206187507926}
kwargs = get_args(params)
......
......@@ -136,34 +136,11 @@ class TrialEvalCallback(EvalCallback):
if __name__ == "__main__":
# params = {
# 'batch_size': 32,
# 'n_steps': 16,
# 'gamma': 0.0,
# 'lr': 0.00017980950834568327,
# 'lr_schedule': 'constant',
# 'ent_coef': 0.07439893598338435,
# 'clip_range': 0.4,
# 'n_epochs': 10,
# 'gae_lambda': 0.95,
# 'max_grad_norm': 0.8,
# 'vf_coef': 0.13214811411452415,
# 'net_arch': 'medium',
# 'shared_arch': False,
# 'activation_fn': 'tanh'
# }
# params = {'activation_fn': 'relu', 'batch_size': 32, 'clip_range': 0.1,
# 'ent_coef': 0.008425259906148678, 'gae_lambda': 0.98, 'gamma':
# 0.0, 'lr': 0.0014548935455020253, 'lr_schedule': 'linear',
# 'max_grad_norm': 0.6, 'n_epochs': 5, 'n_steps': 64, 'net_arch':
# 'medium', 'shared_arch': True, 'vf_coef': 0.6725952403531438}
params = {'n_step_pow': 5.0, 'batches_pow': 5.0, 'gamma': 0.0, 'lr':
0.0014291278312354846, 'lr_schedule': 'linear', 'ent_coef':
0.042102094710275415, 'clip_range': 0.2, 'n_epochs': 5,
'gae_lambda': 0.92, 'max_grad_norm': 0.7, 'vf_coef':
0.40158288555773314, 'net_arch': 'medium', 'shared_arch': False,
params = {'n_step_pow': 7.0, 'batches_pow': 7.0, 'gamma': 0.0, 'lr':
0.0002916406263715553, 'lr_schedule': 'constant', 'ent_coef':
0.005743227072532813, 'clip_range': 0.4, 'n_epochs': 10,
'gae_lambda': 0.99, 'max_grad_norm': 0.5, 'vf_coef':
0.8088573261336596, 'net_arch': 'medium', 'shared_arch': True,
'activation_fn': 'relu'}
kwargs = get_args(params)
......@@ -174,7 +151,7 @@ if __name__ == "__main__":
MlpPolicy,
env,
verbose=1,
tensorboard_log="./tensorboard_ppo_multi/",
tensorboard_log="./tensorboard_ppo/",
**kwargs
)
# gamma=0.1,
......
......@@ -26,8 +26,8 @@ class FractionArithSymbolic:
Creates a state and sets a random problem.
"""
if logger is None:
# self.logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
self.logger = StubLogger()
self.logger = DataShopLogger('FractionsTutor', extra_kcs=['field'])
# self.logger = StubLogger()
else:
self.logger = logger
self.logger.set_student()
......@@ -155,14 +155,15 @@ class FractionArithSymbolic:
return state_output
def set_random_problem(self):
num1 = str(randint(1, 5))
num2 = str(randint(1, 5))
denom1 = str(randint(2, 5))
denom2 = str(randint(2, 5))
num1 = str(randint(1, 15))
num2 = str(randint(1, 15))
denom1 = str(randint(2, 15))
denom2 = str(randint(2, 15))
operator = choice(['+', '*'])
self.reset(num1, denom1, operator, num2, denom2)
self.logger.set_problem("%s_%s_%s_%s_%s" % (num1, denom1, operator, num2, denom2))
self.logger.set_problem("%s_%s_%s_%s_%s" % (num1, denom1, operator,
num2, denom2))
if operator == "+" and denom1 == denom2:
self.ptype = 'AS'
......@@ -388,11 +389,11 @@ class FractionArithNumberEnv(gym.Env):
def __init__(self):
self.tutor = FractionArithSymbolic()
n_selections = len(self.tutor.get_possible_selections())
n_features = 2000
n_features = 900
self.dv = OnlineDictVectorizer(n_features)
self.observation_space = spaces.Box(
low=0.0, high=1.0, shape=(1, n_features), dtype=np.float32)
self.action_space = spaces.MultiDiscrete([n_selections, 50])
self.action_space = spaces.MultiDiscrete([n_selections, 450])
self.n_steps = 0
self.max_steps = 100000
......@@ -592,6 +593,7 @@ class FractionArithOppEnv(gym.Env):
return state
def step(self, action):
self.n_steps += 1
try:
s, a, i = self.decode(action)
reward = self.tutor.apply_sai(s, a, i)
......@@ -609,6 +611,9 @@ class FractionArithOppEnv(gym.Env):
obs = self.dv.fit_transform([state])[0]
info = {}
if self.n_steps > self.max_steps:
done = True
return obs, reward, done, info
......@@ -644,6 +649,7 @@ class FractionArithOppEnv(gym.Env):
return s, a, i
def reset(self):
self.n_steps = 0
self.tutor.set_random_problem()
state = self.get_rl_state()
obs = self.dv.fit_transform([state])[0]
......
......@@ -203,8 +203,8 @@ class MultiColumnAdditionSymbolic:
# append correct/incorrect counts
if add_counts:
d.text((0, 0), "h:{}".format(self.num_hints), fill=(0,0,0))
d.text((0, 10), "-:{}".format(self.num_incorrect_steps), fill=(0,0,0))
d.text((0, 20), "+:{}".format(self.num_correct_steps), fill=(0,0,0))
d.text((0, 80), "-:{}".format(self.num_incorrect_steps), fill=(0,0,0))
d.text((20, 0), "+:{}".format(self.num_correct_steps), fill=(0,0,0))
if add_dot:
d.ellipse((add_dot[0]-3, add_dot[1]-3, add_dot[0]+3, add_dot[1]+3),
......@@ -564,7 +564,7 @@ class MultiColumnAdditionOppEnv(gym.Env):
def __init__(self):
self.tutor = MultiColumnAdditionSymbolic()
n_selections = len(self.tutor.get_possible_selections())
n_features = 2000
n_features = 5000
n_operators = len(self.get_rl_operators())
n_args = len(self.tutor.get_possible_args())
self.dv = OnlineDictVectorizer(n_features)
......@@ -586,47 +586,37 @@ class MultiColumnAdditionOppEnv(gym.Env):
def get_rl_state(self):
state = self.tutor.state.copy()
for attr in self.tutor.state:
if attr == "operator":
if attr == "operator" or state[attr] == "":
continue
for attr2 in self.tutor.state:
if attr2 == "operator":
if attr2 == "operator" or state[attr2] == "":
continue
if attr >= attr2:
continue
try:
ones2 = int2_float_add_then_ones(state[attr], state[attr2])
state['add2-ones(%s,%s)' % (attr, attr2)] = ones2
except Exception:
pass
try:
tens2 = int2_float_add_then_tens(state[attr], state[attr2])
state['add2-tens(%s,%s)' % (attr, attr2)] = tens2
except Exception:
pass
for attr3 in self.tutor.state:
if attr3 == "operator":
if attr3 == "operator" or state[attr3] == "":
continue
if attr2 >= attr3:
continue
try:
ones3 = int3_float_add_then_ones(state[attr], state[attr2],
state[attr3])
state['add2-ones(%s,%s,%s)' % (attr, attr2, attr3)] = ones3
except Exception:
pass
try:
state['add3-ones(%s,%s,%s)' % (attr, attr2, attr3)] = ones3
tens3 = int3_float_add_then_tens(state[attr], state[attr2],
state[attr3])
state['add2-tens(%s,%s,%s)' % (attr, attr2, attr3)] = tens3
except Exception:
pass
state['add3-tens(%s,%s,%s)' % (attr, attr2, attr3)] = tens3
return state
def step(self, action):
self.n_steps += 1
try:
s, a, i = self.decode(action)
reward = self.tutor.apply_sai(s, a, i)
......@@ -635,6 +625,8 @@ class MultiColumnAdditionOppEnv(gym.Env):
reward = -1
done = False
# self.tutor.render()
# print(s, a, i)
# print()
# print(reward)
......@@ -644,6 +636,12 @@ class MultiColumnAdditionOppEnv(gym.Env):
obs = self.dv.fit_transform([state])[0]
info = {}
# have a max steps for a given problem.
# When we hit that we're done regardless.
if self.n_steps > self.max_steps:
done = True
return obs, reward, done, info
def apply_rl_op(self, op, arg1, arg2, arg3):
......@@ -689,6 +687,7 @@ class MultiColumnAdditionOppEnv(gym.Env):
return s, a, i
def reset(self):
self.n_steps = 0
self.tutor.set_random_problem()
state = self.get_rl_state()
obs = self.dv.fit_transform([state])[0]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment