Skip to content

Commit d58fbbf

Browse files
Fix unreliable capture of standard/error outputs
Fix potentially incomplete output capture when running unit tests, as seen with e.g. GNU Guile. Two changes: - Drain standard output and error output pipes - Remove hard-coded stdout/stderr truncation to 2kB
1 parent 3025a53 commit d58fbbf

2 files changed

Lines changed: 21 additions & 9 deletions

File tree

evaluation/src/containerized_eval.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,6 @@ def eval_string_script(language, program):
7878
f.write(program.encode("utf-8"))
7979
f.flush()
8080
result = eval_script(Path(f.name))
81-
# Only save the first 2K of output from the running program. Any futher
82-
# output is very likely an exceptionally long stack trace or a long
83-
# series of prints.
8481
if type(result["stdout"]) == bytes:
8582
result["stdout"] = result["stdout"].decode("utf-8", errors="ignore")
8683
if result["stdout"] is None:
@@ -93,8 +90,8 @@ def eval_string_script(language, program):
9390
assert type(result["stderr"]) == str
9491
return {
9592
"program": program,
96-
"stdout": result['stdout'].replace("!!int", "")[:2048],
97-
"stderr": result['stderr'][:2048],
93+
"stdout": result['stdout'].replace("!!int", ""),
94+
"stderr": result['stderr'],
9895
"exit_code": result['exit_code'],
9996
"status": result['status']
10097
}

evaluation/src/safe_subprocess/__init__.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ def run(
4747
stdout=subprocess.PIPE,
4848
stderr=subprocess.PIPE,
4949
start_new_session=True,
50-
bufsize=MAX_BYTES_PER_READ,
50+
# Use raw IO because BufferedReader returns None for partial
51+
# reads on non-blocking pipes which can lead to data loss.
52+
bufsize=0,
5153
cwd=cwd
5254
)
5355
set_nonblocking(p.stdout)
@@ -61,6 +63,7 @@ def run(
6163
stderr_saved_bytes = []
6264
stdout_bytes_read = 0
6365
stderr_bytes_read = 0
66+
exit_code = None
6467

6568
for _ in range(max_iterations):
6669
this_stdout_read = p.stdout.read(MAX_BYTES_PER_READ)
@@ -74,9 +77,21 @@ def run(
7477
stderr_saved_bytes.append(this_stderr_read)
7578
stderr_bytes_read += len(this_stderr_read)
7679
exit_code = p.poll()
77-
if exit_code is not None:
78-
break
79-
time.sleep(SLEEP_BETWEEN_READS)
80+
if exit_code is None:
81+
time.sleep(SLEEP_BETWEEN_READS)
82+
else:
83+
# After exit, keep looping without sleeping to drain any output
84+
# remaining in the pipe buffers; stop once both pipes are empty.
85+
this_stdout_read = p.stdout.read(MAX_BYTES_PER_READ)
86+
this_stderr_read = p.stderr.read(MAX_BYTES_PER_READ)
87+
if this_stdout_read is not None and stdout_bytes_read < max_output_size:
88+
stdout_saved_bytes.append(this_stdout_read)
89+
stdout_bytes_read += len(this_stdout_read)
90+
if this_stderr_read is not None and stderr_bytes_read < max_output_size:
91+
stderr_saved_bytes.append(this_stderr_read)
92+
stderr_bytes_read += len(this_stderr_read)
93+
if not this_stdout_read and not this_stderr_read:
94+
break
8095

8196
try:
8297
# Kills the process group. Without this line, test_fork_once fails.

0 commit comments

Comments
 (0)