-
Notifications
You must be signed in to change notification settings - Fork 887
fix: replace O(n²) string concat with array buffering for stdout/stderr #1271
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| import { Sandbox } from '../dist/index.mjs' | ||
|
|
||
| const MB = 1024 * 1024 | ||
|
|
||
| function logMemory(label: string) { | ||
| const mem = process.memoryUsage() | ||
| console.log( | ||
| `[${label}] heapUsed: ${(mem.heapUsed / MB).toFixed(1)} MB, heapTotal: ${(mem.heapTotal / MB).toFixed(1)} MB, rss: ${(mem.rss / MB).toFixed(1)} MB` | ||
| ) | ||
| } | ||
|
|
||
| async function main() { | ||
| const apiKey = process.env.E2B_API_KEY | ||
| if (!apiKey) { | ||
| console.error('E2B_API_KEY env var is required') | ||
| process.exit(1) | ||
| } | ||
|
|
||
| logMemory('before sandbox') | ||
|
|
||
| const sandbox = await Sandbox.create({ apiKey }) | ||
| console.log(`Sandbox created: ${sandbox.sandboxId}`) | ||
| logMemory('after sandbox create') | ||
|
|
||
| // Generate ~50MB of stdout to trigger O(n²) memory amplification | ||
| // Use start() + reading .stdout in callback to force V8 string flattening | ||
| const stdoutSizeMB = 200 | ||
| console.log(`Running command to generate ~${stdoutSizeMB}MB of stdout...`) | ||
| console.log( | ||
| 'Reading .stdout in callback to force V8 string flattening (simulates readLines indexOf behavior)...' | ||
| ) | ||
|
|
||
| const startTime = Date.now() | ||
|
|
||
| // Track peak memory during execution | ||
| let peakHeap = 0 | ||
| let peakRss = 0 | ||
| let chunkCount = 0 | ||
| const memInterval = setInterval(() => { | ||
| const mem = process.memoryUsage() | ||
| if (mem.heapUsed > peakHeap) peakHeap = mem.heapUsed | ||
| if (mem.rss > peakRss) peakRss = mem.rss | ||
| }, 50) | ||
|
|
||
| try { | ||
| const cmd = await sandbox.commands.start( | ||
| `python3 -c " | ||
| import sys | ||
| line = 'x' * 1000 + '\\n' | ||
| for _ in range(${stdoutSizeMB * 1000}): | ||
| sys.stdout.write(line) | ||
| sys.stdout.flush() | ||
| "`, | ||
| { | ||
| timeout: 600, | ||
| onStdout: () => { | ||
| chunkCount++ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The script states it is "reading .stdout in callback" to force flattening, but the callback only increments Useful? React with 👍 / 👎. |
||
| }, | ||
| } | ||
| ) | ||
| const result = await cmd.wait() | ||
|
|
||
| clearInterval(memInterval) | ||
| const elapsed = ((Date.now() - startTime) / 1000).toFixed(1) | ||
|
|
||
| // Final memory check | ||
| const finalMem = process.memoryUsage() | ||
| if (finalMem.heapUsed > peakHeap) peakHeap = finalMem.heapUsed | ||
| if (finalMem.rss > peakRss) peakRss = finalMem.rss | ||
|
|
||
| console.log(`\nCommand completed in ${elapsed}s`) | ||
| console.log(`stdout length: ${(result.stdout.length / MB).toFixed(1)} MB`) | ||
| console.log(`chunks received: ${chunkCount}`) | ||
| console.log(`exit code: ${result.exitCode}`) | ||
| logMemory('after command') | ||
| console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`) | ||
| console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`) | ||
| console.log( | ||
| `Memory amplification: ${(peakHeap / (stdoutSizeMB * 1_000_000)).toFixed(1)}x` | ||
| ) | ||
| } catch (e) { | ||
| clearInterval(memInterval) | ||
| console.error('Command failed:', e) | ||
| logMemory('after error') | ||
| console.log(`Peak heapUsed: ${(peakHeap / MB).toFixed(1)} MB`) | ||
| console.log(`Peak RSS: ${(peakRss / MB).toFixed(1)} MB`) | ||
| } finally { | ||
| await sandbox.kill() | ||
| } | ||
| } | ||
|
|
||
| main().catch(console.error) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -38,18 +38,18 @@ | |
|
|
||
| @property | ||
| def stdout(self): | ||
| """ | ||
| Command stdout output. | ||
| """ | ||
| return self._stdout | ||
| return "".join(self._stdout_chunks) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The new Useful? React with 👍 / 👎. |
||
|
|
||
| @property | ||
| def stderr(self): | ||
| """ | ||
| Command stderr output. | ||
| """ | ||
| return self._stderr | ||
| return "".join(self._stderr_chunks) | ||
|
|
||
|
Check failure on line 52 in packages/python-sdk/e2b/sandbox_async/commands/command_handle.py
|
||
|
Comment on lines
41
to
56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 The Python async SDK's Extended reasoning...What the bug is: The Python async SDK's The specific code path: When a user passes Why existing code doesn't prevent it: The PR replaced The impact: With N chunks totaling T bytes, accessing How to fix: Mirror the JS implementation exactly. Add Step-by-step proof: Suppose a user runs |
||
| @property | ||
| def error(self): | ||
| """ | ||
|
|
@@ -87,8 +87,8 @@ | |
| self._handle_kill = handle_kill | ||
| self._events = events | ||
|
|
||
| self._stdout: str = "" | ||
| self._stderr: str = "" | ||
| self._stdout_chunks: list[str] = [] | ||
| self._stderr_chunks: list[str] = [] | ||
|
|
||
| self._on_stdout = on_stdout | ||
| self._on_stderr = on_stderr | ||
|
|
@@ -113,18 +113,18 @@ | |
| if event.event.HasField("data"): | ||
| if event.event.data.stdout: | ||
| out = event.event.data.stdout.decode("utf-8", "replace") | ||
| self._stdout += out | ||
| self._stdout_chunks.append(out) | ||
| yield out, None, None | ||
| if event.event.data.stderr: | ||
| out = event.event.data.stderr.decode("utf-8", "replace") | ||
| self._stderr += out | ||
| self._stderr_chunks.append(out) | ||
| yield None, out, None | ||
| if event.event.data.pty: | ||
| yield None, None, event.event.data.pty | ||
| if event.event.HasField("end"): | ||
| self._result = CommandResult( | ||
| stdout=self._stdout, | ||
| stderr=self._stderr, | ||
| stdout="".join(self._stdout_chunks), | ||
| stderr="".join(self._stderr_chunks), | ||
| exit_code=event.event.end.exit_code, | ||
| error=event.event.end.error, | ||
| ) | ||
|
|
@@ -176,8 +176,8 @@ | |
|
|
||
| if self._result.exit_code != 0: | ||
| raise CommandExitException( | ||
| stdout=self._stdout, | ||
| stderr=self._stderr, | ||
| stdout=self._result.stdout, | ||
| stderr=self._result.stderr, | ||
| exit_code=self._result.exit_code, | ||
| error=self._result.error, | ||
| ) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -34,14 +34,14 @@ | |
| ], | ||
| ): | ||
| self._pid = pid | ||
| self._handle_kill = handle_kill | ||
| self._events = events | ||
|
|
||
| self._stdout: str = "" | ||
| self._stderr: str = "" | ||
| self._stdout_chunks: list[str] = [] | ||
| self._stderr_chunks: list[str] = [] | ||
|
|
||
| self._result: Optional[CommandResult] = None | ||
| self._iteration_exception: Optional[Exception] = None | ||
|
Check notice on line 44 in packages/python-sdk/e2b/sandbox_sync/commands/command_handle.py
|
||
|
Comment on lines
37
to
44
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟣 The sync Extended reasoning...What the bug is The async The specific code path In Why existing code does not prevent it The PR only changes internal storage from Addressing the refutation The refutation argues this asymmetry is intentional: the sync SDK processes events lazily (only when iterated), so accumulated output mid-run is less meaningful, and users can use Impact Any code that does Step-by-step proof
Pre-existing nature Before this PR: |
||
|
|
||
| def __iter__(self): | ||
| """ | ||
|
|
@@ -67,18 +67,18 @@ | |
| if event.event.HasField("data"): | ||
| if event.event.data.stdout: | ||
| out = event.event.data.stdout.decode("utf-8", "replace") | ||
| self._stdout += out | ||
| self._stdout_chunks.append(out) | ||
| yield out, None, None | ||
| if event.event.data.stderr: | ||
| out = event.event.data.stderr.decode("utf-8", "replace") | ||
| self._stderr += out | ||
| self._stderr_chunks.append(out) | ||
| yield None, out, None | ||
| if event.event.data.pty: | ||
| yield None, None, event.event.data.pty | ||
| if event.event.HasField("end"): | ||
| self._result = CommandResult( | ||
| stdout=self._stdout, | ||
| stderr=self._stderr, | ||
| stdout="".join(self._stdout_chunks), | ||
| stderr="".join(self._stderr_chunks), | ||
| exit_code=event.event.end.exit_code, | ||
| error=event.event.end.error, | ||
| ) | ||
|
|
@@ -131,8 +131,8 @@ | |
|
|
||
| if self._result.exit_code != 0: | ||
| raise CommandExitException( | ||
| stdout=self._stdout, | ||
| stderr=self._stderr, | ||
| stdout=self._result.stdout, | ||
| stderr=self._result.stderr, | ||
| exit_code=self._result.exit_code, | ||
| error=self._result.error, | ||
| ) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This script passes
timeout: 600, but the JS command API consumestimeoutMs, so this value is ignored and the default 60s timeout is used. For large-output runs near a minute, that makes the repro flaky and can invalidate the benchmark conclusions by timing out unexpectedly.Useful? React with 👍 / 👎.