diff --git a/sandbox/multicolumn/train_ppo.py b/sandbox/multicolumn/train_ppo.py
index bf39a402aaa0e99ccafcdac444063a97a9b307f4..41e745029174c7d4b7f3e36a1b3685dcf2a11816 100644
--- a/sandbox/multicolumn/train_ppo.py
+++ b/sandbox/multicolumn/train_ppo.py
@@ -19,7 +19,7 @@ def get_args(params: Dict[str, Any]) -> Dict[str, Any]:
     :return:
     """
     batch_size = int(2**params['batches_pow'])
-    n_steps = int(params['n_step_pow'])
+    n_steps = int(2**params['n_step_pow'])
     gamma = params['gamma']
     learning_rate = params['lr']
     lr_schedule = params['lr_schedule']
@@ -153,11 +153,19 @@ if __name__ == "__main__":
     #     'activation_fn': 'tanh'
     # }
 
-    params = {'activation_fn': 'relu', 'batch_size': 32, 'clip_range': 0.1,
-              'ent_coef': 0.008425259906148678, 'gae_lambda': 0.98, 'gamma':
-              0.0, 'lr': 0.0014548935455020253, 'lr_schedule': 'linear',
-              'max_grad_norm': 0.6, 'n_epochs': 5, 'n_steps': 64, 'net_arch':
-              'medium', 'shared_arch': True, 'vf_coef': 0.6725952403531438}
+    # params = {'activation_fn': 'relu', 'batch_size': 32, 'clip_range': 0.1,
+    #           'ent_coef': 0.008425259906148678, 'gae_lambda': 0.98, 'gamma':
+    #           0.0, 'lr': 0.0014548935455020253, 'lr_schedule': 'linear',
+    #           'max_grad_norm': 0.6, 'n_epochs': 5, 'n_steps': 64, 'net_arch':
+    #           'medium', 'shared_arch': True, 'vf_coef': 0.6725952403531438}
+
+    params = {'n_step_pow': 5.0, 'batches_pow': 5.0, 'gamma': 0.0, 'lr':
+              0.0014291278312354846, 'lr_schedule': 'linear', 'ent_coef':
+              0.042102094710275415, 'clip_range': 0.2, 'n_epochs': 5,
+              'gae_lambda': 0.92, 'max_grad_norm': 0.7, 'vf_coef':
+              0.40158288555773314, 'net_arch': 'medium', 'shared_arch': False,
+              'activation_fn': 'relu'}
+
     kwargs = get_args(params)
 
     # multiprocess environment
@@ -174,7 +182,7 @@ if __name__ == "__main__":
 
     # while True:
     # Train
-    model.learn(total_timesteps=5000000)
+    model.learn(total_timesteps=1000000)
 
     # Test
     # obs = env.reset()
diff --git a/sandbox/multicolumn/tune_ppo.py b/sandbox/multicolumn/tune_ppo.py
index 95c87c44102bb4a3c5f3f70dd89847b53051af2d..d28f03b3b3ef4d29e1113cdd5786b560e4b45eb3 100644
--- a/sandbox/multicolumn/tune_ppo.py
+++ b/sandbox/multicolumn/tune_ppo.py
@@ -170,7 +170,7 @@ class TrialCallback(BaseCallback):
 def objective(trial: optuna.Trial) -> float:
     n_eval_episodes = 15
     eval_freq = 5000
-    n_steps = 10000
+    n_steps = 350000
 
     with tempfile.TemporaryDirectory() as log_dir:
         env = DummyVecEnv([
diff --git a/tutorenvs/__init__.py b/tutorenvs/__init__.py
index 0e7c94d2264ddf3256a4ca6793c48da219e23c17..5f23777d8fbf804a565a7bd682f0e37953e74a26 100644
--- a/tutorenvs/__init__.py
+++ b/tutorenvs/__init__.py
@@ -1,13 +1,14 @@
 from gym.envs.registration import register
-from tutorenvs.fractions import FractionArithDigitsEnv
-from tutorenvs.fractions import FractionArithOppEnv
-from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv
-from tutorenvs.multicolumn import MultiColumnAdditionPixelEnv
-from tutorenvs.multicolumn import MultiColumnAdditionPerceptEnv
+from tutorenvs.fractions import FractionArithNumberEnv  # noqa: F401
+from tutorenvs.fractions import FractionArithDigitsEnv  # noqa: F401
+from tutorenvs.fractions import FractionArithOppEnv  # noqa: F401
+from tutorenvs.multicolumn import MultiColumnAdditionDigitsEnv  # noqa: F401
+from tutorenvs.multicolumn import MultiColumnAdditionPixelEnv  # noqa: F401
+from tutorenvs.multicolumn import MultiColumnAdditionPerceptEnv  # noqa: F401
 
 register(
     id='FractionArith-v0',
-    entry_point='tutorenvs:FractionArithOppEnv',
+    entry_point='tutorenvs:FractionArithNumberEnv',
 )
 
 register(
@@ -15,6 +16,11 @@ register(
     entry_point='tutorenvs:FractionArithDigitsEnv',
 )
 
+register(
+    id='FractionArith-v2',
+    entry_point='tutorenvs:FractionArithOppEnv',
+)
+
 # TODO no pixel fractions yet.
 # register(
 #     id='FractionArith-v2',
diff --git a/tutorenvs/fractions.py b/tutorenvs/fractions.py
index 1fc9d4428b7cd1a68a2b9751a0fe958ef0985b6e..fd325d8cd05d57e41fc52f002cb316c4f1f9a2da 100644
--- a/tutorenvs/fractions.py
+++ b/tutorenvs/fractions.py
@@ -9,9 +9,11 @@ from gym import error, spaces, utils
 from gym.utils import seeding
 from sklearn.feature_extraction import FeatureHasher
 from sklearn.feature_extraction import DictVectorizer
+from tutorenvs.utils import OnlineDictVectorizer
 import numpy as np
 
 from tutorenvs.utils import DataShopLogger
+from tutorenvs.utils import StubLogger
 
 
 class FractionArithSymbolic:
@@ -20,11 +22,8 @@ class FractionArithSymbolic:
         """
         Creates a state and sets a random problem.
         """
-        self.num_correct_steps = 0
-        self.num_incorrect_steps = 0
-        self.num_hints = 0
-
-        self.logger = DataShopLogger('FractionsTutor', extra_kcs=['ptype_field'])
+        # self.logger = DataShopLogger('FractionsTutor', extra_kcs=['ptype_field'])
+        self.logger = StubLogger()
         self.logger.set_student()
         self.set_random_problem()
         # self.reset("", "", "", "", "")
@@ -35,6 +34,10 @@ class FractionArithSymbolic:
         provided arguments.
         """
         self.steps = 0
+        self.num_correct_steps = 0
+        self.num_incorrect_steps = 0
+        self.num_hints = 0
+
         self.state = {
             'initial_num_left': num1,
             'initial_denom_left': denom1,
@@ -146,10 +149,10 @@ class FractionArithSymbolic:
         return state_output
 
     def set_random_problem(self):
-        num1 = str(randint(1, 15))
-        num2 = str(randint(1, 15))
-        denom1 = str(randint(2, 15))
-        denom2 = str(randint(2, 15))
+        num1 = str(randint(1, 7))
+        num2 = str(randint(1, 7))
+        denom1 = str(randint(2, 7))
+        denom2 = str(randint(2, 7))
         operator = choice(['+', '*'])
 
         self.reset(num1, denom1, operator, num2, denom2)
@@ -164,11 +167,12 @@ class FractionArithSymbolic:
 
     def apply_sai(self, selection, action, inputs):
         """
-        Give a SAI, it applies it. This method returns feedback (i.e., -1 or 1).
+        Give a SAI, it applies it. This method returns feedback
+        (i.e., -1 or 1).
         """
         self.steps += 1
         reward = self.evaluate_sai(selection, action, inputs)
-        
+
         if reward > 0:
             outcome = "CORRECT"
             self.num_correct_steps += 1
@@ -177,11 +181,11 @@ class FractionArithSymbolic:
             self.num_incorrect_steps += 1
 
         self.logger.log_step(selection, action, inputs['value'], outcome,
-                             step_name=self.ptype + '_' + demo[0],
+                             step_name=self.ptype + '_' + selection,
                              kcs=[self.ptype + '_' + selection])
 
         # Render output?
-        self.render()
+        # self.render()
 
         if reward == -1.0:
             return reward
@@ -371,6 +375,79 @@ class FractionArithSymbolic:
 
         raise Exception("request demo - logic missing")
 
+
+class FractionArithNumberEnv(gym.Env):
+    metadata = {'render.modes': ['human']}
+
+    def __init__(self):
+        self.tutor = FractionArithSymbolic()
+        n_selections = len(self.tutor.get_possible_selections())
+        n_features = 2000
+        self.dv = OnlineDictVectorizer(n_features)
+        self.observation_space = spaces.Box(
+            low=0.0, high=1.0, shape=(1, n_features), dtype=np.float32)
+        self.action_space = spaces.MultiDiscrete([n_selections, 98])
+        self.n_steps = 0
+        self.max_steps = 100000
+
+    def get_rl_state(self):
+        return self.tutor.state
+
+    def step(self, action):
+        self.n_steps += 1
+
+        s, a, i = self.decode(action)
+        # print(s, a, i)
+        # print()
+        reward = self.tutor.apply_sai(s, a, i)
+        # self.render()
+        # print(reward)
+        state = self.tutor.state
+        # pprint(state)
+        obs = self.dv.fit_transform([state])[0]
+        done = (s == 'done' and reward == 1.0)
+
+        # have a max steps for a given problem.
+        # When we hit that we're done regardless.
+        if self.n_steps > self.max_steps:
+            done = True
+
+        info = {}
+
+        return obs, reward, done, info
+
+    def decode(self, action):
+        # print(action)
+        s = self.tutor.get_possible_selections()[action[0]]
+
+        if s == "done":
+            a = "ButtonPressed"
+        else:
+            a = "UpdateField"
+
+        if s == "done":
+            v = -1
+        if s == "check_convert":
+            v = "x"
+        else:
+            v = action[1] + 1
+
+        i = {'value': str(v)}
+
+        return s, a, i
+
+    def reset(self):
+        self.n_steps = 0
+        self.tutor.set_random_problem()
+        # self.render()
+        state = self.get_rl_state()
+        obs = self.dv.fit_transform([state])[0]
+        return obs
+
+    def render(self, mode='human', close=False):
+        self.tutor.render()
+
+
 class FractionArithDigitsEnv(gym.Env):
     metadata = {'render.modes': ['human']}
 
diff --git a/tutorenvs/multicolumn.py b/tutorenvs/multicolumn.py
index 940fc0e08ffffe1e2d4a58a81665528ce7ecd984..92df12ece40ae633cb40fe72bbba5b93b821c13e 100644
--- a/tutorenvs/multicolumn.py
+++ b/tutorenvs/multicolumn.py
@@ -29,8 +29,8 @@ class MultiColumnAdditionSymbolic:
         Creates a state and sets a random problem.
         """
         if logger is None:
-            # self.logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
-            self.logger = StubLogger()
+            self.logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
+            # self.logger = StubLogger()
         else:
             self.logger = logger
         self.logger.set_student()