Skip to content

Commit 20c434d

Browse files
committed
feat(resume-build): add -shell flag for envd-backed PTY access
Opens an interactive PTY session via envd's Process.Start RPC instead of relying on sshd in the template image. Host stdin goes to raw mode, output streams back to stdout, and SIGWINCH is forwarded via Update. TERM and locale vars are passed through so curses apps (htop, tmux, vim) initialise correctly — envd only forwards PATH/HOME/USER/LOGNAME by default. In interactive mode, Ctrl+D exits and tears down the sandbox. In -signal-pause, the shell runs alongside the signal wait; Ctrl+D before the signal skips the snapshot. The existing nsenter+ssh hint is still printed for images with running sshd.
1 parent d59b47c commit 20c434d

3 files changed

Lines changed: 395 additions & 9 deletions

File tree

packages/orchestrator/cmd/resume-build/main.go

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ func main() {
7171
cmdPause := flag.String("cmd-pause", "", "execute command in sandbox, then pause on success")
7272
cmdSignalPause := flag.String("cmd-signal-pause", "", "execute command in sandbox, then wait for SIGUSR1 before pausing")
7373
optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)")
74+
shell := flag.Bool("shell", false, "attach an interactive PTY shell via envd (no sshd required in the sandbox)")
7475

7576
flag.Parse()
7677

@@ -125,6 +126,21 @@ func main() {
125126
log.Fatal("-optimize is incompatible with -iterations (benchmarking doesn't upload)")
126127
}
127128

129+
if *shell {
130+
if isCmdMode {
131+
log.Fatal("-shell is incompatible with -cmd")
132+
}
133+
if *cmdPause != "" || *cmdSignalPause != "" {
134+
log.Fatal("-shell is incompatible with -cmd-pause and -cmd-signal-pause")
135+
}
136+
if *pause {
137+
log.Fatal("-shell is incompatible with -pause (sandbox is paused immediately, no time to interact)")
138+
}
139+
if *iterations > 0 {
140+
log.Fatal("-shell is incompatible with -iterations")
141+
}
142+
}
143+
128144
// Generate new build ID if not specified and pause mode is enabled
129145
outputBuildID := *toBuild
130146
if isPauseMode && outputBuildID == "" {
@@ -159,7 +175,7 @@ func main() {
159175
iterations: *iterations,
160176
}
161177

162-
err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, pauseOpts, runOpts)
178+
err := run(ctx, *fromBuild, *iterations, *coldStart, *noPrefetch, *noEgress, *verbose, *shell, pauseOpts, runOpts)
163179
cancel()
164180

165181
if err != nil {
@@ -274,6 +290,7 @@ type runner struct {
274290
cache *template.Cache
275291
coldStart bool
276292
noPrefetch bool
293+
shell bool
277294
config cfg.BuilderConfig
278295
storage storage.StorageProvider
279296
}
@@ -314,11 +331,23 @@ func (r *runner) interactive(ctx context.Context) error {
314331

315332
fmt.Printf("✅ Running (resumed in %s)\n", time.Since(t0))
316333
fmt.Printf(" sudo nsenter --net=/var/run/netns/%s ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no root@169.254.0.21\n", sbx.Slot.NamespaceID())
317-
fmt.Println("Ctrl+C to stop")
318334

335+
defer func() {
336+
fmt.Println("🧹 Cleanup...")
337+
sbx.Close(context.WithoutCancel(ctx))
338+
}()
339+
340+
if r.shell {
341+
err := attachShell(ctx, sbx)
342+
if err != nil && !isShellExited(err) {
343+
return err
344+
}
345+
346+
return nil
347+
}
348+
349+
fmt.Println("Ctrl+C to stop")
319350
<-ctx.Done()
320-
fmt.Println("🧹 Cleanup...")
321-
sbx.Close(context.WithoutCancel(ctx))
322351

323352
return nil
324353
}
@@ -576,11 +605,17 @@ func (r *runner) pauseOnce(ctx context.Context, opts pauseOptions, verbose bool)
576605
fmt.Printf("🔧 Starting command: %s\n", opts.commandSignal)
577606
}
578607
cmdErrCh := runCommandInSandboxAsync(ctx, sbx, opts.commandSignal)
579-
if err := waitForPauseSignal(ctx, sbx, "SIGUSR1", cmdErrCh); err != nil {
608+
if err := waitForPauseSignal(ctx, sbx, "SIGUSR1", cmdErrCh, false); err != nil {
580609
return pauseTimings{resume: resumeDur, err: err}, err
581610
}
582611
case opts.signalName != "":
583-
if err := waitForPauseSignal(ctx, sbx, opts.signalName, nil); err != nil {
612+
if err := waitForPauseSignal(ctx, sbx, opts.signalName, nil, r.shell); err != nil {
613+
if errors.Is(err, errShellExitedBeforePauseSignal) {
614+
fmt.Println("ℹ️ Shell exited before pause signal — skipping snapshot")
615+
616+
return pauseTimings{resume: resumeDur}, nil
617+
}
618+
584619
return pauseTimings{resume: resumeDur, err: err}, err
585620
}
586621
}
@@ -959,7 +994,7 @@ func (r *runner) benchmark(ctx context.Context, n int) error {
959994
return lastErr
960995
}
961996

962-
func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose bool, pauseOpts pauseOptions, runOpts runOptions) error {
997+
func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefetch, noEgress, verbose, shell bool, pauseOpts pauseOptions, runOpts runOptions) error {
963998
// Silence other loggers unless verbose mode
964999
var l logger.Logger
9651000
if !verbose {
@@ -1122,6 +1157,7 @@ func run(ctx context.Context, buildID string, iterations int, coldStart, noPrefe
11221157
cache: cache,
11231158
coldStart: coldStart,
11241159
noPrefetch: noPrefetch,
1160+
shell: shell,
11251161
config: config.BuilderConfig,
11261162
storage: persistence,
11271163
sbxConfig: sbxCfg,
@@ -1242,7 +1278,12 @@ func runCommandInSandboxAsync(ctx context.Context, sbx *sandbox.Sandbox, command
12421278
return errCh
12431279
}
12441280

1245-
func waitForPauseSignal(ctx context.Context, sbx *sandbox.Sandbox, signalName string, cmdErrCh <-chan error) error {
1281+
// errShellExitedBeforePauseSignal indicates the user pressed Ctrl+D in the
1282+
// interactive shell before sending the configured pause signal. The caller
1283+
// should abort cleanly without taking a snapshot.
1284+
var errShellExitedBeforePauseSignal = errors.New("shell exited before pause signal")
1285+
1286+
func waitForPauseSignal(ctx context.Context, sbx *sandbox.Sandbox, signalName string, cmdErrCh <-chan error, shell bool) error {
12461287
sig := parseSignal(signalName)
12471288
if sig == nil {
12481289
return fmt.Errorf("unknown signal: %s", signalName)
@@ -1255,6 +1296,18 @@ func waitForPauseSignal(ctx context.Context, sbx *sandbox.Sandbox, signalName st
12551296
signal.Notify(sigCh, sig)
12561297
defer signal.Stop(sigCh)
12571298

1299+
var shellDoneCh <-chan struct{}
1300+
if shell {
1301+
done := make(chan struct{})
1302+
shellDoneCh = done
1303+
go func() {
1304+
defer close(done)
1305+
if err := attachShell(ctx, sbx); err != nil && !isShellExited(err) {
1306+
fmt.Printf("⚠️ Shell error: %v\n", err)
1307+
}
1308+
}()
1309+
}
1310+
12581311
for {
12591312
select {
12601313
case <-ctx.Done():
@@ -1263,6 +1316,8 @@ func waitForPauseSignal(ctx context.Context, sbx *sandbox.Sandbox, signalName st
12631316
fmt.Printf("📨 Received %s signal\n", signalName)
12641317

12651318
return nil
1319+
case <-shellDoneCh:
1320+
return errShellExitedBeforePauseSignal
12661321
case err, ok := <-cmdErrCh:
12671322
if !ok {
12681323
cmdErrCh = nil

0 commit comments

Comments
 (0)