Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 16 additions & 6 deletions packages/js-sdk/src/sandbox/commands/commandHandle.ts
Original file line number Diff line number Diff line change
Expand Up @@ -84,8 +84,10 @@ export class CommandHandle
Omit<CommandResult, 'exitCode' | 'error'>,
Partial<Pick<CommandResult, 'exitCode' | 'error'>>
{
private _stdout = ''
private _stderr = ''
private _stdoutChunks: string[] = []
private _stderrChunks: string[] = []
private _stdoutCached?: string
private _stderrCached?: string

private result?: CommandResult
private iterationError?: Error
Expand Down Expand Up @@ -130,14 +132,20 @@ export class CommandHandle
* Command execution stderr output.
*/
get stderr() {
return this._stderr
if (this._stderrCached === undefined) {
this._stderrCached = this._stderrChunks.join('')
}
return this._stderrCached
}

/**
* Command execution stdout output.
*/
get stdout() {
return this._stdout
if (this._stdoutCached === undefined) {
this._stdoutCached = this._stdoutChunks.join('')
}
return this._stdoutCached
}

/**
Expand Down Expand Up @@ -196,12 +204,14 @@ export class CommandHandle
switch (e.value.output.case) {
case 'stdout':
out = new TextDecoder().decode(e.value.output.value)
this._stdout += out
this._stdoutChunks.push(out)
this._stdoutCached = undefined
yield [out as Stdout, null, null]
break
case 'stderr':
out = new TextDecoder().decode(e.value.output.value)
this._stderr += out
this._stderrChunks.push(out)
this._stderrCached = undefined
yield [null, out as Stderr, null]
break
case 'pty':
Expand Down
92 changes: 92 additions & 0 deletions packages/js-sdk/tests/memory-repro.mts
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
import { Sandbox } from '../dist/index.mjs'

const MB = 1024 * 1024

function logMemory(label: string) {
const mem = process.memoryUsage()
console.log(
`[${label}] heapUsed: ${(mem.heapUsed / MB).toFixed(1)} MB, heapTotal: ${(mem.heapTotal / MB).toFixed(1)} MB, rss: ${(mem.rss / MB).toFixed(1)} MB`
)
}

async function main() {
const apiKey = process.env.E2B_API_KEY
if (!apiKey) {
console.error('E2B_API_KEY env var is required')
process.exit(1)
}

logMemory('before sandbox')

const sandbox = await Sandbox.create({ apiKey })
console.log(`Sandbox created: ${sandbox.sandboxId}`)
logMemory('after sandbox create')

// Generate ~50MB of stdout to trigger O(n²) memory amplification
// Use start() + reading .stdout in callback to force V8 string flattening
const stdoutSizeMB = 200
console.log(`Running command to generate ~${stdoutSizeMB}MB of stdout...`)
console.log(
'Reading .stdout in callback to force V8 string flattening (simulates readLines indexOf behavior)...'
)

const startTime = Date.now()

// Track peak memory during execution
let peakHeap = 0
let peakRss = 0
let chunkCount = 0
const memInterval = setInterval(() => {
const mem = process.memoryUsage()
if (mem.heapUsed > peakHeap) peakHeap = mem.heapUsed
if (mem.rss > peakRss) peakRss = mem.rss
}, 50)

try {
const cmd = await sandbox.commands.start(
`python3 -c "
import sys
line = 'x' * 1000 + '\\n'
for _ in range(${stdoutSizeMB * 1000}):
sys.stdout.write(line)
sys.stdout.flush()
"`,
{
timeout: 600,
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Use the correct timeout option in memory repro

This script passes timeout: 600, but the JS command API consumes timeoutMs, so this value is ignored and the default 60s timeout is used. For large-output runs near a minute, that makes the repro flaky and can invalidate the benchmark conclusions by timing out unexpectedly.

Useful? React with 👍 / 👎.

onStdout: () => {
chunkCount++
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Actually trigger flattening in the repro callback

The script states it is "reading .stdout in callback" to force flattening, but the callback only increments chunkCount. As written, the repro does not exercise the claimed flattening path, so results from this script cannot validate the specific regression/fix scenario described in the commit.

Useful? React with 👍 / 👎.

},
}
)
const result = await cmd.wait()

clearInterval(memInterval)
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1)

// Final memory check
const finalMem = process.memoryUsage()
if (finalMem.heapUsed > peakHeap) peakHeap = finalMem.heapUsed
if (finalMem.rss > peakRss) peakRss = finalMem.rss

console.log(`\nCommand completed in ${elapsed}s`)
console.log(`stdout length: ${(result.stdout.length / MB).toFixed(1)} MB`)
console.log(`chunks received: ${chunkCount}`)
console.log(`exit code: ${result.exitCode}`)
logMemory('after command')
console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`)
console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`)
console.log(
`Memory amplification: ${(peakHeap / (stdoutSizeMB * 1_000_000)).toFixed(1)}x`
)
} catch (e) {
clearInterval(memInterval)
console.error('Command failed:', e)
logMemory('after error')
console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`)
console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`)
} finally {
await sandbox.kill()
}
}

main().catch(console.error)
20 changes: 10 additions & 10 deletions packages/python-sdk/e2b/sandbox_async/commands/command_handle.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,18 +38,18 @@

@property
def stdout(self):
"""
Command stdout output.
"""
return self._stdout
return "".join(self._stdout_chunks)
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Cache joined async output between chunk updates

The new stdout/stderr properties now call "".join(...) on every access, which re-copies the full buffered output each time even when no new chunk arrived. This is a regression from the previous O(1) getter behavior and can reintroduce quadratic CPU/memory churn for callers that poll cmd.stdout while a process is running; adding a cached joined value (invalidated on append) would avoid this.

Useful? React with 👍 / 👎.


@property
def stderr(self):
"""
Command stderr output.
"""
return self._stderr
return "".join(self._stderr_chunks)

Check failure on line 52 in packages/python-sdk/e2b/sandbox_async/commands/command_handle.py

View check run for this annotation

Claude / Claude Code Review

Python async SDK: stdout/stderr properties lack caching, reintroducing O(n²) for callback access

The Python async SDK's `stdout` and `stderr` properties call `''.join(self._stdout_chunks)` on every access with no caching, reintroducing the same O(n²) pattern the PR set out to fix. If a user accesses `handle.stdout` inside an `on_stdout` callback (as code-interpreter's `readLines` does), each callback invocation triggers a full re-join of all accumulated chunks — O(total_size) per callback, O(n²) total. Fix by adding `_stdout_cached: Optional[str] = None`, returning the cached value in the g
Comment on lines 41 to 56
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 The Python async SDK's stdout and stderr properties call ''.join(self._stdout_chunks) on every access with no caching, reintroducing the same O(n²) pattern the PR set out to fix. If a user accesses handle.stdout inside an on_stdout callback (as code-interpreter's readLines does), each callback invocation triggers a full re-join of all accumulated chunks — O(total_size) per callback, O(n²) total. Fix by adding _stdout_cached: Optional[str] = None, returning the cached value in the getter, and setting it to None on each append, mirroring the JS implementation.

Extended reasoning...

What the bug is: The Python async SDK's stdout and stderr properties in command_handle.py (lines 41–52) always call ''.join(self._stdout_chunks) on every property access with no caching. The JS SDK correctly maintains _stdoutCached/_stderrCached fields that are set to undefined on each chunk push and lazily computed in the getter. The Python async SDK received the array-buffering change (replacing +=) but did not receive the corresponding caching layer.

The specific code path: When a user passes on_stdout to sandbox.commands.start(), the async SDK calls that callback for each chunk in _handle_events(). If the callback (or any downstream consumer) accesses handle.stdout to get the accumulated output — exactly the pattern code-interpreter's readLines uses to scan for newlines via str.find() — each callback invocation hits the property getter, which calls ''.join(self._stdout_chunks) from scratch.

Why existing code doesn't prevent it: The PR replaced self._stdout += out (O(n) per append) with self._stdout_chunks.append(out) (O(1) per append), which is correct. But it did not add a cache guard in the getter. Before the PR, reading self._stdout in a callback was O(1) (a simple attribute read). After the PR, reading handle.stdout in a callback is O(accumulated_size) because join() must traverse all chunks every time.

The impact: With N chunks totaling T bytes, accessing handle.stdout once per callback produces O(T) work per call and O(N × T) total — quadratic in T when N grows proportionally to T (fixed chunk sizes). The PR's own benchmark documents this scenario ('string flattening') measuring 669 MB peak heap / 3.5x amplification for 200 MB of output in the JS SDK after the fix. The Python async SDK has no protection at all, so this scenario will be at least as bad as the pre-fix JS numbers.

How to fix: Mirror the JS implementation exactly. Add self._stdout_cached: Optional[str] = None and self._stderr_cached: Optional[str] = None as instance variables in __init__. In the stdout property getter, return self._stdout_cached if it is not None, otherwise compute ''.join(self._stdout_chunks), store it in self._stdout_cached, and return it. In _iterate_events, after self._stdout_chunks.append(out), set self._stdout_cached = None. Apply the same pattern for stderr.

Step-by-step proof: Suppose a user runs sandbox.commands.start(cmd, on_stdout=lambda _: print(len(handle.stdout))) against a command producing 200,000 chunks of ~1,000 bytes each (200 MB total, matching the PR's benchmark). Chunk 1 arrives → _stdout_chunks has 1 element → handle.stdout joins 1 string, ~1,000 bytes of work. Chunk 2 → joins 2 strings, ~2,000 bytes. Chunk k → joins k strings, ~k×1,000 bytes. Total work = 1,000 × (1 + 2 + … + 200,000) ≈ 2×10¹³ bytes of string copying — clearly O(n²). This is the exact scenario the PR benchmarks, and the Python async SDK has no defense against it.

@property
def error(self):
"""
Expand Down Expand Up @@ -87,8 +87,8 @@
self._handle_kill = handle_kill
self._events = events

self._stdout: str = ""
self._stderr: str = ""
self._stdout_chunks: list[str] = []
self._stderr_chunks: list[str] = []

self._on_stdout = on_stdout
self._on_stderr = on_stderr
Expand All @@ -113,18 +113,18 @@
if event.event.HasField("data"):
if event.event.data.stdout:
out = event.event.data.stdout.decode("utf-8", "replace")
self._stdout += out
self._stdout_chunks.append(out)
yield out, None, None
if event.event.data.stderr:
out = event.event.data.stderr.decode("utf-8", "replace")
self._stderr += out
self._stderr_chunks.append(out)
yield None, out, None
if event.event.data.pty:
yield None, None, event.event.data.pty
if event.event.HasField("end"):
self._result = CommandResult(
stdout=self._stdout,
stderr=self._stderr,
stdout="".join(self._stdout_chunks),
stderr="".join(self._stderr_chunks),
exit_code=event.event.end.exit_code,
error=event.event.end.error,
)
Expand Down Expand Up @@ -176,8 +176,8 @@

if self._result.exit_code != 0:
raise CommandExitException(
stdout=self._stdout,
stderr=self._stderr,
stdout=self._result.stdout,
stderr=self._result.stderr,
exit_code=self._result.exit_code,
error=self._result.error,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,14 @@
],
):
self._pid = pid
self._handle_kill = handle_kill
self._events = events

self._stdout: str = ""
self._stderr: str = ""
self._stdout_chunks: list[str] = []
self._stderr_chunks: list[str] = []

self._result: Optional[CommandResult] = None
self._iteration_exception: Optional[Exception] = None

Check notice on line 44 in packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py

View check run for this annotation

Claude / Claude Code Review

Python sync SDK: missing public stdout/stderr properties (pre-existing API inconsistency with async SDK)

The sync `CommandHandle` has no public `stdout` or `stderr` properties, while the async `AsyncCommandHandle` exposes them via `@property`. This is a pre-existing API inconsistency: before this PR the sync SDK stored private `_stdout: str`, and after this PR it stores private `_stdout_chunks: list[str]` — neither exposes a public accessor. Users holding a background sync handle cannot inspect accumulated output via `handle.stdout` and would receive `AttributeError`.
Comment on lines 37 to 44
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🟣 The sync CommandHandle has no public stdout or stderr properties, while the async AsyncCommandHandle exposes them via @property. This is a pre-existing API inconsistency: before this PR the sync SDK stored private _stdout: str, and after this PR it stores private _stdout_chunks: list[str] — neither exposes a public accessor. Users holding a background sync handle cannot inspect accumulated output via handle.stdout and would receive AttributeError.

Extended reasoning...

What the bug is

The async AsyncCommandHandle has always exposed stdout and stderr as @property accessors returning ''.join(self._stdout_chunks). The sync CommandHandle has never had equivalent public properties — before this PR it had self._stdout: str = '' (private), and after this PR it has self._stdout_chunks: list[str] = [] (still private). There is no public stdout or stderr attribute on the sync handle.

The specific code path

In packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py, the class defines only a pid property. The _stdout_chunks and _stderr_chunks fields added by this PR at lines 42-43 are purely internal. The async counterpart in packages/python-sdk/e2b/sandbox_async/commands/command_handle.py defines @property stdout and @property stderr that are part of the documented public API.

Why existing code does not prevent it

The PR only changes internal storage from str to list[str] and correctly wires up the CommandResult returned by wait(). It does not add or remove any public properties. The public API surface of the sync handle is unchanged before and after this PR.

Addressing the refutation

The refutation argues this asymmetry is intentional: the sync SDK processes events lazily (only when iterated), so accumulated output mid-run is less meaningful, and users can use wait(on_stdout=...) callbacks or iterate via for stdout, stderr, pty in handle:. This is architecturally accurate. However, a user who uses both SDKs or who reads the async SDK docs would reasonably expect handle.stdout to work on either handle type. The AsyncCommandHandle treats it as standard public API, creating a discoverable inconsistency. The refutation is correct that this is intentional design, but it does not eliminate the confusion users encounter when the APIs diverge.

Impact

Any code that does handle = sandbox.commands.run(cmd, background=True) followed by handle.stdout will raise AttributeError: 'CommandHandle' object has no attribute 'stdout'. The fix would be trivially additive: @property stdout: return ''.join(self._stdout_chunks).

Step-by-step proof

  1. User calls handle = sandbox.commands.run('sleep 5 && echo done', background=True)
  2. User tries print(handle.stdout) to check intermediate output (as they would with AsyncCommandHandle)
  3. Python raises AttributeError: 'CommandHandle' object has no attribute 'stdout'
  4. Same code with the async SDK works fine because @property stdout is defined on AsyncCommandHandle
  5. This is pre-existing: even before this PR, step 3 would raise the same error — the private _stdout field was never a public property

Pre-existing nature

Before this PR: self._stdout: str = '' — private, no public property. After this PR: self._stdout_chunks: list[str] = [] — still private, still no public property. The inconsistency predates this PR.


def __iter__(self):
"""
Expand All @@ -67,18 +67,18 @@
if event.event.HasField("data"):
if event.event.data.stdout:
out = event.event.data.stdout.decode("utf-8", "replace")
self._stdout += out
self._stdout_chunks.append(out)
yield out, None, None
if event.event.data.stderr:
out = event.event.data.stderr.decode("utf-8", "replace")
self._stderr += out
self._stderr_chunks.append(out)
yield None, out, None
if event.event.data.pty:
yield None, None, event.event.data.pty
if event.event.HasField("end"):
self._result = CommandResult(
stdout=self._stdout,
stderr=self._stderr,
stdout="".join(self._stdout_chunks),
stderr="".join(self._stderr_chunks),
exit_code=event.event.end.exit_code,
error=event.event.end.error,
)
Expand Down Expand Up @@ -131,8 +131,8 @@

if self._result.exit_code != 0:
raise CommandExitException(
stdout=self._stdout,
stderr=self._stderr,
stdout=self._result.stdout,
stderr=self._result.stderr,
exit_code=self._result.exit_code,
error=self._result.error,
)
Expand Down
Loading