From b8cefe96f5bcea507c191e4fb70aeb56155678e5 Mon Sep 17 00:00:00 2001 From: Tomas Beran Date: Tue, 14 Apr 2026 15:20:18 +0200 Subject: [PATCH 1/3] =?UTF-8?q?fix:=20replace=20O(n=C2=B2)=20string=20conc?= =?UTF-8?q?atenation=20with=20array=20buffering=20for=20stdout/stderr?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace `this._stdout += out` with array-based buffering (`push` + lazy `join`) in JS and Python SDKs to eliminate quadratic memory amplification when accumulating large command output. Fixes e2b-dev/code-interpreter#251 --- .../src/sandbox/commands/commandHandle.ts | 22 +++-- packages/js-sdk/tests/memory-repro.mts | 92 +++++++++++++++++++ .../sandbox_async/commands/command_handle.py | 20 ++-- .../sandbox_sync/commands/command_handle.py | 16 ++-- 4 files changed, 126 insertions(+), 24 deletions(-) create mode 100644 packages/js-sdk/tests/memory-repro.mts diff --git a/packages/js-sdk/src/sandbox/commands/commandHandle.ts b/packages/js-sdk/src/sandbox/commands/commandHandle.ts index 67852a900c..c7175cac44 100644 --- a/packages/js-sdk/src/sandbox/commands/commandHandle.ts +++ b/packages/js-sdk/src/sandbox/commands/commandHandle.ts @@ -84,8 +84,10 @@ export class CommandHandle Omit, Partial> { - private _stdout = '' - private _stderr = '' + private _stdoutChunks: string[] = [] + private _stderrChunks: string[] = [] + private _stdoutCached?: string + private _stderrCached?: string private result?: CommandResult private iterationError?: Error @@ -130,14 +132,20 @@ export class CommandHandle * Command execution stderr output. */ get stderr() { - return this._stderr + if (this._stderrCached === undefined) { + this._stderrCached = this._stderrChunks.join('') + } + return this._stderrCached } /** * Command execution stdout output. */ get stdout() { - return this._stdout + if (this._stdoutCached === undefined) { + this._stdoutCached = this._stdoutChunks.join('') + } + return this._stdoutCached } /** @@ -196,12 +204,14 @@ export class CommandHandle switch (e.value.output.case) { case 'stdout': out = new TextDecoder().decode(e.value.output.value) - this._stdout += out + this._stdoutChunks.push(out) + this._stdoutCached = undefined yield [out as Stdout, null, null] break case 'stderr': out = new TextDecoder().decode(e.value.output.value) - this._stderr += out + this._stderrChunks.push(out) + this._stderrCached = undefined yield [null, out as Stderr, null] break case 'pty': diff --git a/packages/js-sdk/tests/memory-repro.mts b/packages/js-sdk/tests/memory-repro.mts new file mode 100644 index 0000000000..5a5818d02e --- /dev/null +++ b/packages/js-sdk/tests/memory-repro.mts @@ -0,0 +1,92 @@ +import { Sandbox } from '../dist/index.mjs' + +const MB = 1024 * 1024 + +function logMemory(label: string) { + const mem = process.memoryUsage() + console.log( + `[${label}] heapUsed: ${(mem.heapUsed / MB).toFixed(1)} MB, heapTotal: ${(mem.heapTotal / MB).toFixed(1)} MB, rss: ${(mem.rss / MB).toFixed(1)} MB` + ) +} + +async function main() { + const apiKey = process.env.E2B_API_KEY + if (!apiKey) { + console.error('E2B_API_KEY env var is required') + process.exit(1) + } + + logMemory('before sandbox') + + const sandbox = await Sandbox.create({ apiKey }) + console.log(`Sandbox created: ${sandbox.sandboxId}`) + logMemory('after sandbox create') + + // Generate ~50MB of stdout to trigger O(n²) memory amplification + // Use start() + reading .stdout in callback to force V8 string flattening + const stdoutSizeMB = 200 + console.log(`Running command to generate ~${stdoutSizeMB}MB of stdout...`) + console.log( + 'Reading .stdout in callback to force V8 string flattening (simulates readLines indexOf behavior)...' + ) + + const startTime = Date.now() + + // Track peak memory during execution + let peakHeap = 0 + let peakRss = 0 + let chunkCount = 0 + const memInterval = setInterval(() => { + const mem = process.memoryUsage() + if (mem.heapUsed > peakHeap) peakHeap = mem.heapUsed + if (mem.rss > peakRss) peakRss = mem.rss + }, 50) + + try { + const cmd = await sandbox.commands.start( + `python3 -c " +import sys +line = 'x' * 1000 + '\\n' +for _ in range(${stdoutSizeMB * 1000}): + sys.stdout.write(line) +sys.stdout.flush() +"`, + { + timeout: 600, + onStdout: () => { + chunkCount++ + }, + } + ) + const result = await cmd.wait() + + clearInterval(memInterval) + const elapsed = ((Date.now() - startTime) / 1000).toFixed(1) + + // Final memory check + const finalMem = process.memoryUsage() + if (finalMem.heapUsed > peakHeap) peakHeap = finalMem.heapUsed + if (finalMem.rss > peakRss) peakRss = finalMem.rss + + console.log(`\nCommand completed in ${elapsed}s`) + console.log(`stdout length: ${(result.stdout.length / MB).toFixed(1)} MB`) + console.log(`chunks received: ${chunkCount}`) + console.log(`exit code: ${result.exitCode}`) + logMemory('after command') + console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`) + console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`) + console.log( + `Memory amplification: ${(peakHeap / (stdoutSizeMB * 1_000_000)).toFixed(1)}x` + ) + } catch (e) { + clearInterval(memInterval) + console.error('Command failed:', e) + logMemory('after error') + console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`) + console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`) + } finally { + await sandbox.kill() + } +} + +main().catch(console.error) diff --git a/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py b/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py index dfe34e8383..306d1c41ba 100644 --- a/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py +++ b/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py @@ -41,14 +41,14 @@ def stdout(self): """ Command stdout output. """ - return self._stdout + return "".join(self._stdout_chunks) @property def stderr(self): """ Command stderr output. """ - return self._stderr + return "".join(self._stderr_chunks) @property def error(self): @@ -87,8 +87,8 @@ def __init__( self._handle_kill = handle_kill self._events = events - self._stdout: str = "" - self._stderr: str = "" + self._stdout_chunks: list[str] = [] + self._stderr_chunks: list[str] = [] self._on_stdout = on_stdout self._on_stderr = on_stderr @@ -113,18 +113,18 @@ async def _iterate_events( if event.event.HasField("data"): if event.event.data.stdout: out = event.event.data.stdout.decode("utf-8", "replace") - self._stdout += out + self._stdout_chunks.append(out) yield out, None, None if event.event.data.stderr: out = event.event.data.stderr.decode("utf-8", "replace") - self._stderr += out + self._stderr_chunks.append(out) yield None, out, None if event.event.data.pty: yield None, None, event.event.data.pty if event.event.HasField("end"): self._result = CommandResult( - stdout=self._stdout, - stderr=self._stderr, + stdout="".join(self._stdout_chunks), + stderr="".join(self._stderr_chunks), exit_code=event.event.end.exit_code, error=event.event.end.error, ) @@ -176,8 +176,8 @@ async def wait(self) -> CommandResult: if self._result.exit_code != 0: raise CommandExitException( - stdout=self._stdout, - stderr=self._stderr, + stdout=self._result.stdout, + stderr=self._result.stderr, exit_code=self._result.exit_code, error=self._result.error, ) diff --git a/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py b/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py index a58a613e02..b2416d2eee 100644 --- a/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py +++ b/packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py @@ -37,8 +37,8 @@ def __init__( self._handle_kill = handle_kill self._events = events - self._stdout: str = "" - self._stderr: str = "" + self._stdout_chunks: list[str] = [] + self._stderr_chunks: list[str] = [] self._result: Optional[CommandResult] = None self._iteration_exception: Optional[Exception] = None @@ -67,18 +67,18 @@ def _handle_events( if event.event.HasField("data"): if event.event.data.stdout: out = event.event.data.stdout.decode("utf-8", "replace") - self._stdout += out + self._stdout_chunks.append(out) yield out, None, None if event.event.data.stderr: out = event.event.data.stderr.decode("utf-8", "replace") - self._stderr += out + self._stderr_chunks.append(out) yield None, out, None if event.event.data.pty: yield None, None, event.event.data.pty if event.event.HasField("end"): self._result = CommandResult( - stdout=self._stdout, - stderr=self._stderr, + stdout="".join(self._stdout_chunks), + stderr="".join(self._stderr_chunks), exit_code=event.event.end.exit_code, error=event.event.end.error, ) @@ -131,8 +131,8 @@ def wait( if self._result.exit_code != 0: raise CommandExitException( - stdout=self._stdout, - stderr=self._stderr, + stdout=self._result.stdout, + stderr=self._result.stderr, exit_code=self._result.exit_code, error=self._result.error, ) From 0b4ee1f291fceb45f56b91ea274fddabff6246d7 Mon Sep 17 00:00:00 2001 From: Tomas Beran Date: Tue, 14 Apr 2026 15:24:22 +0200 Subject: [PATCH 2/3] chore: remove memory reproduction script --- packages/js-sdk/tests/memory-repro.mts | 92 -------------------------- 1 file changed, 92 deletions(-) delete mode 100644 packages/js-sdk/tests/memory-repro.mts diff --git a/packages/js-sdk/tests/memory-repro.mts b/packages/js-sdk/tests/memory-repro.mts deleted file mode 100644 index 5a5818d02e..0000000000 --- a/packages/js-sdk/tests/memory-repro.mts +++ /dev/null @@ -1,92 +0,0 @@ -import { Sandbox } from '../dist/index.mjs' - -const MB = 1024 * 1024 - -function logMemory(label: string) { - const mem = process.memoryUsage() - console.log( - `[${label}] heapUsed: ${(mem.heapUsed / MB).toFixed(1)} MB, heapTotal: ${(mem.heapTotal / MB).toFixed(1)} MB, rss: ${(mem.rss / MB).toFixed(1)} MB` - ) -} - -async function main() { - const apiKey = process.env.E2B_API_KEY - if (!apiKey) { - console.error('E2B_API_KEY env var is required') - process.exit(1) - } - - logMemory('before sandbox') - - const sandbox = await Sandbox.create({ apiKey }) - console.log(`Sandbox created: ${sandbox.sandboxId}`) - logMemory('after sandbox create') - - // Generate ~50MB of stdout to trigger O(n²) memory amplification - // Use start() + reading .stdout in callback to force V8 string flattening - const stdoutSizeMB = 200 - console.log(`Running command to generate ~${stdoutSizeMB}MB of stdout...`) - console.log( - 'Reading .stdout in callback to force V8 string flattening (simulates readLines indexOf behavior)...' - ) - - const startTime = Date.now() - - // Track peak memory during execution - let peakHeap = 0 - let peakRss = 0 - let chunkCount = 0 - const memInterval = setInterval(() => { - const mem = process.memoryUsage() - if (mem.heapUsed > peakHeap) peakHeap = mem.heapUsed - if (mem.rss > peakRss) peakRss = mem.rss - }, 50) - - try { - const cmd = await sandbox.commands.start( - `python3 -c " -import sys -line = 'x' * 1000 + '\\n' -for _ in range(${stdoutSizeMB * 1000}): - sys.stdout.write(line) -sys.stdout.flush() -"`, - { - timeout: 600, - onStdout: () => { - chunkCount++ - }, - } - ) - const result = await cmd.wait() - - clearInterval(memInterval) - const elapsed = ((Date.now() - startTime) / 1000).toFixed(1) - - // Final memory check - const finalMem = process.memoryUsage() - if (finalMem.heapUsed > peakHeap) peakHeap = finalMem.heapUsed - if (finalMem.rss > peakRss) peakRss = finalMem.rss - - console.log(`\nCommand completed in ${elapsed}s`) - console.log(`stdout length: ${(result.stdout.length / MB).toFixed(1)} MB`) - console.log(`chunks received: ${chunkCount}`) - console.log(`exit code: ${result.exitCode}`) - logMemory('after command') - console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`) - console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`) - console.log( - `Memory amplification: ${(peakHeap / (stdoutSizeMB * 1_000_000)).toFixed(1)}x` - ) - } catch (e) { - clearInterval(memInterval) - console.error('Command failed:', e) - logMemory('after error') - console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`) - console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`) - } finally { - await sandbox.kill() - } -} - -main().catch(console.error) From 438a9b61365a50f75fee2bacdb8ff37341417710 Mon Sep 17 00:00:00 2001 From: Tomas Beran Date: Tue, 14 Apr 2026 15:34:21 +0200 Subject: [PATCH 3/3] fix: add cached lazy join for Python async SDK stdout/stderr properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mirror the JS SDK's caching pattern — avoid re-joining all chunks on every property access when no new data has arrived. --- .../e2b/sandbox_async/commands/command_handle.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py b/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py index 306d1c41ba..fe63ef0f9c 100644 --- a/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py +++ b/packages/python-sdk/e2b/sandbox_async/commands/command_handle.py @@ -41,14 +41,18 @@ def stdout(self): """ Command stdout output. """ - return "".join(self._stdout_chunks) + if self._stdout_cached is None: + self._stdout_cached = "".join(self._stdout_chunks) + return self._stdout_cached @property def stderr(self): """ Command stderr output. """ - return "".join(self._stderr_chunks) + if self._stderr_cached is None: + self._stderr_cached = "".join(self._stderr_chunks) + return self._stderr_cached @property def error(self): @@ -89,6 +93,8 @@ def __init__( self._stdout_chunks: list[str] = [] self._stderr_chunks: list[str] = [] + self._stdout_cached: Optional[str] = None + self._stderr_cached: Optional[str] = None self._on_stdout = on_stdout self._on_stderr = on_stderr @@ -114,10 +120,12 @@ async def _iterate_events( if event.event.data.stdout: out = event.event.data.stdout.decode("utf-8", "replace") self._stdout_chunks.append(out) + self._stdout_cached = None yield out, None, None if event.event.data.stderr: out = event.event.data.stderr.decode("utf-8", "replace") self._stderr_chunks.append(out) + self._stderr_cached = None yield None, out, None if event.event.data.pty: yield None, None, event.event.data.pty