Skip to content

Commit 3fe4149

Browse files
committed
draft(uffd,fc): balloon free-page-hinting + envd reclaim on pause
Adds a pre-pause guest reclaim step (sync + drop_caches + compact_memory + fstrim, run via the existing envd Process service with Connect-Timeout-Ms) and a virtio-balloon free-page-hinting drain to MADV_DONTNEED freed pages out of the memfile before the snapshot. The balloon is installed with FPH=true whenever FPR is on; both behaviors are off by default and gated by separate LD flags (free-page-hinting, reclaim-on-pause), so they can be flipped at runtime without rebuilding templates.
1 parent 417ed97 commit 3fe4149

6 files changed

Lines changed: 163 additions & 10 deletions

File tree

packages/orchestrator/cmd/resume-build/main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,19 @@ func main() {
7272
cmdSignalPause := flag.String("cmd-signal-pause", "", "execute command in sandbox, then wait for SIGUSR1 before pausing")
7373
optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)")
7474

75+
// Pause-time reclaim/FPH overrides — both off by default. Pass >0 to enable.
76+
fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-timeout-ms LD flag (0 = use LD default)")
77+
reclaimTimeoutMs := flag.Int("reclaim-timeout-ms", 0, "override reclaim-on-pause-timeout-ms LD flag (0 = use LD default)")
78+
7579
flag.Parse()
7680

81+
if *fphTimeoutMs > 0 {
82+
featureflags.NewIntFlag("free-page-hinting-timeout-ms", *fphTimeoutMs)
83+
}
84+
if *reclaimTimeoutMs > 0 {
85+
featureflags.NewIntFlag("reclaim-on-pause-timeout-ms", *reclaimTimeoutMs)
86+
}
87+
7788
if *fromBuild == "" {
7889
log.Fatal("-from-build required")
7990
}

packages/orchestrator/pkg/sandbox/fc/client.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,11 @@ func (c *apiClient) startVM(ctx context.Context) error {
427427
return nil
428428
}
429429

430-
func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
431-
ctx, span := tracer.Start(ctx, "enable-free-page-reporting")
430+
// installBalloon installs the virtio-balloon pre-boot with target size 0.
431+
// FreePageHinting is always armed (pure runtime toggle, used by DrainBalloon);
432+
// FreePageReporting is set per template-build gate.
433+
func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) error {
434+
ctx, span := tracer.Start(ctx, "install-balloon")
432435
defer span.End()
433436

434437
amountMib := int64(0)
@@ -439,7 +442,8 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
439442
Body: &models.Balloon{
440443
AmountMib: &amountMib,
441444
DeflateOnOom: &deflateOnOom,
442-
FreePageReporting: true,
445+
FreePageReporting: freePageReporting,
446+
FreePageHinting: true,
443447
},
444448
}
445449

@@ -451,6 +455,33 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
451455
return nil
452456
}
453457

458+
func (c *apiClient) startBalloonHinting(ctx context.Context, acknowledgeOnStop bool) error {
459+
params := operations.StartBalloonHintingParams{
460+
Context: ctx,
461+
Body: &models.BalloonStartCmd{AcknowledgeOnStop: acknowledgeOnStop},
462+
}
463+
_, err := c.client.Operations.StartBalloonHinting(&params)
464+
if err != nil {
465+
return fmt.Errorf("error starting balloon hinting: %w", err)
466+
}
467+
468+
return nil
469+
}
470+
471+
func (c *apiClient) describeBalloonHinting(ctx context.Context) (hostCmd, guestCmd int64, err error) {
472+
params := operations.DescribeBalloonHintingParams{Context: ctx}
473+
res, err := c.client.Operations.DescribeBalloonHinting(&params)
474+
if err != nil {
475+
return 0, 0, err
476+
}
477+
if res.Payload.HostCmd != nil {
478+
hostCmd = *res.Payload.HostCmd
479+
}
480+
guestCmd = res.Payload.GuestCmd
481+
482+
return hostCmd, guestCmd, nil
483+
}
484+
454485
func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) {
455486
params := operations.GetMemoryMappingsParams{
456487
Context: ctx,

packages/orchestrator/pkg/sandbox/fc/process.go

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,8 @@ type Process struct {
133133
Exit *utils.ErrorOnce
134134

135135
client *apiClient
136+
137+
balloonInstalled bool
136138
}
137139

138140
func NewProcess(
@@ -440,13 +442,13 @@ func (p *Process) Create(
440442
telemetry.ReportEvent(ctx, "set fc entropy config")
441443

442444
if freePageReporting {
443-
err = p.client.enableFreePageReporting(ctx)
444-
if err != nil {
445+
if err := p.client.installBalloon(ctx, freePageReporting); err != nil {
445446
fcStopErr := p.Stop(ctx)
446447

447-
return errors.Join(fmt.Errorf("error enabling free page reporting: %w", err), fcStopErr)
448+
return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr)
448449
}
449-
telemetry.ReportEvent(ctx, "enabled free page reporting")
450+
p.balloonInstalled = true
451+
telemetry.ReportEvent(ctx, "installed balloon device")
450452
}
451453

452454
err = p.client.startVM(ctx)
@@ -710,6 +712,41 @@ func (p *Process) Pause(ctx context.Context) error {
710712
return p.client.pauseVM(ctx)
711713
}
712714

715+
// DrainBalloon triggers a free-page-hinting run and blocks until the guest
716+
// acknowledges or ctx fires. No-op when the balloon wasn't installed.
717+
func (p *Process) DrainBalloon(ctx context.Context) error {
718+
if !p.balloonInstalled {
719+
return nil
720+
}
721+
722+
ctx, span := tracer.Start(ctx, "drain-balloon")
723+
defer span.End()
724+
725+
if err := p.client.startBalloonHinting(ctx, true /* ackOnStop */); err != nil {
726+
return fmt.Errorf("start balloon hinting: %w", err)
727+
}
728+
729+
backoff := 5 * time.Millisecond
730+
for {
731+
select {
732+
case <-ctx.Done():
733+
return ctx.Err()
734+
case <-time.After(backoff):
735+
}
736+
737+
host, guest, err := p.client.describeBalloonHinting(ctx)
738+
if err != nil {
739+
return fmt.Errorf("balloon hinting status: %w", err)
740+
}
741+
if guest >= host {
742+
return nil
743+
}
744+
if backoff < 50*time.Millisecond {
745+
backoff *= 2
746+
}
747+
}
748+
}
749+
713750
// CreateSnapshot VM needs to be paused before creating a snapshot.
714751
func (p *Process) CreateSnapshot(ctx context.Context, snapfilePath string) error {
715752
ctx, childSpan := tracer.Start(ctx, "create-snapshot-fc")
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
package sandbox
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"net/http"
7+
"strconv"
8+
"time"
9+
10+
"connectrpc.com/connect"
11+
"go.uber.org/zap"
12+
13+
"github.com/e2b-dev/infra/packages/shared/pkg/consts"
14+
"github.com/e2b-dev/infra/packages/shared/pkg/grpc"
15+
"github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process"
16+
"github.com/e2b-dev/infra/packages/shared/pkg/grpc/envd/process/processconnect"
17+
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
18+
)
19+
20+
// Steps separated by ';' so each runs even if a previous one fails. On
21+
// timeout envd kills bash; the in-flight syscall finishes and remaining
22+
// steps are skipped.
23+
const reclaimScript = `sync; echo 3 > /proc/sys/vm/drop_caches 2>/dev/null; echo 1 > /proc/sys/vm/compact_memory 2>/dev/null; fstrim -av 2>/dev/null`
24+
25+
// bestEffortReclaim asks envd to reclaim guest memory + disk before pause.
26+
// All failures are swallowed.
27+
func (s *Sandbox) bestEffortReclaim(ctx context.Context, timeout time.Duration) {
28+
ctx, span := tracer.Start(ctx, "envd-reclaim")
29+
defer span.End()
30+
31+
rcCtx, cancel := context.WithTimeout(ctx, timeout)
32+
defer cancel()
33+
34+
addr := fmt.Sprintf("http://%s:%d", s.Slot.HostIPString(), consts.DefaultEnvdServerPort)
35+
pc := processconnect.NewProcessClient(&http.Client{Transport: sandboxHttpClient.Transport}, addr)
36+
37+
req := connect.NewRequest(&process.StartRequest{
38+
Process: &process.ProcessConfig{Cmd: "/bin/bash", Args: []string{"-c", reclaimScript}},
39+
})
40+
req.Header().Set("Connect-Timeout-Ms", strconv.FormatInt(int64(timeout/time.Millisecond), 10))
41+
grpc.SetUserHeader(req.Header(), "root")
42+
43+
stream, err := pc.Start(rcCtx, req)
44+
if err != nil {
45+
logger.L().Warn(ctx, "envd reclaim failed", logger.WithSandboxID(s.Runtime.SandboxID), zap.Error(err))
46+
47+
return
48+
}
49+
defer stream.Close()
50+
51+
for stream.Receive() {
52+
}
53+
}

packages/orchestrator/pkg/sandbox/sandbox.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ type Sandbox struct {
218218
files *storage.SandboxFiles
219219
cleanup *Cleanup
220220

221+
featureFlags *featureflags.Client
222+
221223
process *fc.Process
222224
cgroupHandle *cgroup.CgroupHandle
223225

@@ -458,7 +460,8 @@ func (f *Factory) CreateSandbox(
458460
files: sandboxFiles,
459461
process: fcHandle,
460462

461-
cleanup: cleanup,
463+
cleanup: cleanup,
464+
featureFlags: f.featureFlags,
462465

463466
APIStoredConfig: apiConfigToStore,
464467

@@ -799,7 +802,8 @@ func (f *Factory) ResumeSandbox(
799802
files: sandboxFiles,
800803
process: fcHandle,
801804

802-
cleanup: cleanup,
805+
cleanup: cleanup,
806+
featureFlags: f.featureFlags,
803807

804808
APIStoredConfig: apiConfigToStore,
805809
CABundle: f.egressProxy.CABundle(),
@@ -1053,6 +1057,19 @@ func (s *Sandbox) Pause(
10531057
// Stop the health check before pausing the VM
10541058
s.Checks.Stop()
10551059

1060+
// Best-effort pre-pause guest reclaim, then FPH drain. Both run on the
1061+
// live VM and are non-fatal. Timeout=0 disables the step.
1062+
if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.ReclaimOnPauseTimeoutMs)) * time.Millisecond; t > 0 {
1063+
s.bestEffortReclaim(ctx, t)
1064+
}
1065+
if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.FreePageHintingTimeoutMs)) * time.Millisecond; t > 0 {
1066+
drainCtx, cancel := context.WithTimeout(ctx, t)
1067+
if err := s.process.DrainBalloon(drainCtx); err != nil {
1068+
telemetry.ReportError(ctx, "balloon hinting drain failed (continuing pause)", err)
1069+
}
1070+
cancel()
1071+
}
1072+
10561073
if err := s.process.Pause(ctx); err != nil {
10571074
return nil, fmt.Errorf("failed to pause VM: %w", err)
10581075
}

packages/shared/pkg/featureflags/flags.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,11 @@ var (
158158
BestOfKMaxOvercommit = NewIntFlag("best-of-k-max-overcommit", 400) // Default R=4 (stored as percentage, max over-commit ratio)
159159
BestOfKAlpha = NewIntFlag("best-of-k-alpha", 50) // Default Alpha=0.5 (stored as percentage for int flag, current usage weight)
160160
EnvdInitTimeoutMilliseconds = NewIntFlag("envd-init-request-timeout-milliseconds", 50) // Timeout for envd init request in milliseconds
161-
HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s)
161+
// 0 disables the step. Both default off; operator opts in once the host
162+
// kernel has the FPH race fix and the fleet is ready.
163+
FreePageHintingTimeoutMs = NewIntFlag("free-page-hinting-timeout-ms", 0)
164+
ReclaimOnPauseTimeoutMs = NewIntFlag("reclaim-on-pause-timeout-ms", 0)
165+
HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s)
162166
MaxCacheWriterConcurrencyFlag = NewIntFlag("max-cache-writer-concurrency", 10)
163167

164168
// BuildCacheMaxUsagePercentage the maximum percentage of the cache disk storage

0 commit comments

Comments
 (0)