From ad3146329f40c388d8a8739a16857f9066d3eca0 Mon Sep 17 00:00:00 2001
From: Chris MacLellan <2348-cm3786@users.noreply.gitlab.cci.drexel.edu>
Date: Fri, 22 Jan 2021 17:01:42 -0500
Subject: [PATCH] fixed multicolumn bug where agent can receive reward for
submitting empty string to a field that should be empty
---
tutorenvs/multicolumn.py | 10 +++++++---
1 file changed, 7 insertions(+), 3 deletions(-)
diff --git a/tutorenvs/multicolumn.py b/tutorenvs/multicolumn.py
index a8a93a2..41341d5 100644
--- a/tutorenvs/multicolumn.py
+++ b/tutorenvs/multicolumn.py
@@ -34,8 +34,8 @@ class MultiColumnAdditionSymbolic:
Creates a state and sets a random problem.
"""
if logger is None:
- # self.logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
- self.logger = StubLogger()
+ self.logger = DataShopLogger('MulticolumnAdditionTutor', extra_kcs=['field'])
+ # self.logger = StubLogger()
else:
self.logger = logger
self.logger.set_student()
@@ -309,6 +309,11 @@ class MultiColumnAdditionSymbolic:
if self.state[selection] != "":
return -1.0
+ # You can't send the empty string (this is an edge case that can cause
+ # problems.
+ if inputs['value'] == "":
+ return -1.0
+
if (selection == "answer_ones" and
inputs['value'] == self.correct_ones):
return 1.0
@@ -641,7 +646,6 @@ class MultiColumnAdditionOppEnv(gym.Env):
if self.n_steps > self.max_steps:
done = True
-
return obs, reward, done, info
def apply_rl_op(self, op, arg1, arg2, arg3):
--
GitLab