Skip to content

Commit 263a0d0

Browse files
committed
feat(fc): drain virtio-balloon free-page-hinting before pause
Arm free-page-hinting on the existing balloon device (always set when the balloon is installed; pure runtime toggle), and on pause do a host-initiated hint+wait so MADV_DONTNEED-reclaimed pages are settled before the snapshot. Pages reclaimed this way generate UFFD_EVENT_REMOVE, which the orchestrator already tracks (parent FPR PR), so the snapshot captures them as removed instead of zero-filled. - fc/client.go: rename enableFreePageReporting -> installBalloon; always set FreePageHinting=true; add startBalloonHinting + describeBalloonHinting helpers. - fc/process.go: track balloonInstalled; add DrainBalloon (start + poll guest_cmd >= host_cmd, with host>0 guard against transient nil/zero responses). - sandbox.go: wire featureFlags into Sandbox; call DrainBalloon from Pause behind the flag. Failures are logged but non-fatal. Gated by free-page-hinting-timeout-ms (LD int flag, ms; default 0 = disabled). resume-build gains --fph-timeout-ms for local exercise.
1 parent 417ed97 commit 263a0d0

5 files changed

Lines changed: 118 additions & 10 deletions

File tree

packages/orchestrator/cmd/resume-build/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,15 @@ func main() {
7272
cmdSignalPause := flag.String("cmd-signal-pause", "", "execute command in sandbox, then wait for SIGUSR1 before pausing")
7373
optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)")
7474

75+
// Pause-time FPH override; 0 = use LD default (off).
76+
fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-timeout-ms LD flag (0 = use LD default)")
77+
7578
flag.Parse()
7679

80+
if *fphTimeoutMs > 0 {
81+
featureflags.NewIntFlag("free-page-hinting-timeout-ms", *fphTimeoutMs)
82+
}
83+
7784
if *fromBuild == "" {
7885
log.Fatal("-from-build required")
7986
}

packages/orchestrator/pkg/sandbox/fc/client.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,11 @@ func (c *apiClient) startVM(ctx context.Context) error {
427427
return nil
428428
}
429429

430-
func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
431-
ctx, span := tracer.Start(ctx, "enable-free-page-reporting")
430+
// installBalloon installs the virtio-balloon pre-boot with target size 0.
431+
// FreePageHinting is always armed (pure runtime toggle, used by DrainBalloon);
432+
// FreePageReporting is set per template-build gate.
433+
func (c *apiClient) installBalloon(ctx context.Context, freePageReporting bool) error {
434+
ctx, span := tracer.Start(ctx, "install-balloon")
432435
defer span.End()
433436

434437
amountMib := int64(0)
@@ -439,7 +442,8 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
439442
Body: &models.Balloon{
440443
AmountMib: &amountMib,
441444
DeflateOnOom: &deflateOnOom,
442-
FreePageReporting: true,
445+
FreePageReporting: freePageReporting,
446+
FreePageHinting: true,
443447
},
444448
}
445449

@@ -451,6 +455,33 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
451455
return nil
452456
}
453457

458+
func (c *apiClient) startBalloonHinting(ctx context.Context, acknowledgeOnStop bool) error {
459+
params := operations.StartBalloonHintingParams{
460+
Context: ctx,
461+
Body: &models.BalloonStartCmd{AcknowledgeOnStop: acknowledgeOnStop},
462+
}
463+
_, err := c.client.Operations.StartBalloonHinting(&params)
464+
if err != nil {
465+
return fmt.Errorf("error starting balloon hinting: %w", err)
466+
}
467+
468+
return nil
469+
}
470+
471+
func (c *apiClient) describeBalloonHinting(ctx context.Context) (hostCmd, guestCmd int64, err error) {
472+
params := operations.DescribeBalloonHintingParams{Context: ctx}
473+
res, err := c.client.Operations.DescribeBalloonHinting(&params)
474+
if err != nil {
475+
return 0, 0, err
476+
}
477+
if res.Payload.HostCmd != nil {
478+
hostCmd = *res.Payload.HostCmd
479+
}
480+
guestCmd = res.Payload.GuestCmd
481+
482+
return hostCmd, guestCmd, nil
483+
}
484+
454485
func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) {
455486
params := operations.GetMemoryMappingsParams{
456487
Context: ctx,

packages/orchestrator/pkg/sandbox/fc/process.go

Lines changed: 56 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/rootfs"
2727
"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/socket"
2828
"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template"
29+
"github.com/e2b-dev/infra/packages/shared/pkg/fc/client/operations"
2930
"github.com/e2b-dev/infra/packages/shared/pkg/keys"
3031
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
3132
sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox"
@@ -440,13 +441,12 @@ func (p *Process) Create(
440441
telemetry.ReportEvent(ctx, "set fc entropy config")
441442

442443
if freePageReporting {
443-
err = p.client.enableFreePageReporting(ctx)
444-
if err != nil {
444+
if err := p.client.installBalloon(ctx, freePageReporting); err != nil {
445445
fcStopErr := p.Stop(ctx)
446446

447-
return errors.Join(fmt.Errorf("error enabling free page reporting: %w", err), fcStopErr)
447+
return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr)
448448
}
449-
telemetry.ReportEvent(ctx, "enabled free page reporting")
449+
telemetry.ReportEvent(ctx, "installed balloon device")
450450
}
451451

452452
err = p.client.startVM(ctx)
@@ -710,6 +710,58 @@ func (p *Process) Pause(ctx context.Context) error {
710710
return p.client.pauseVM(ctx)
711711
}
712712

713+
// DrainBalloon triggers a free-page-hinting run and blocks until the guest
714+
// acknowledges or ctx fires. No-op when no balloon device is configured
715+
// (FC returns 400) so it survives snapshot/resume without local state.
716+
// Records `drain-balloon.outcome` on the span: ok | not-configured | timeout |
717+
// start-failed | describe-failed.
718+
func (p *Process) DrainBalloon(ctx context.Context) error {
719+
ctx, span := tracer.Start(ctx, "drain-balloon")
720+
outcome := "ok"
721+
defer func() {
722+
span.SetAttributes(attribute.String("drain-balloon.outcome", outcome))
723+
span.End()
724+
}()
725+
726+
if err := p.client.startBalloonHinting(ctx, true /* ackOnStop */); err != nil {
727+
var notConfigured *operations.StartBalloonHintingBadRequest
728+
if errors.As(err, &notConfigured) {
729+
outcome = "not-configured"
730+
731+
return nil
732+
}
733+
734+
outcome = "start-failed"
735+
736+
return fmt.Errorf("start balloon hinting: %w", err)
737+
}
738+
739+
backoff := 5 * time.Millisecond
740+
for {
741+
select {
742+
case <-ctx.Done():
743+
outcome = "timeout"
744+
745+
return ctx.Err()
746+
case <-time.After(backoff):
747+
}
748+
749+
host, guest, err := p.client.describeBalloonHinting(ctx)
750+
if err != nil {
751+
outcome = "describe-failed"
752+
753+
return fmt.Errorf("balloon hinting status: %w", err)
754+
}
755+
// host_cmd is monotonic and we just called start, so host > 0
756+
// after FC accepts it. Require it to guard against transient
757+
// nil/zero responses returning a false-positive completion.
758+
if host > 0 && guest >= host {
759+
return nil
760+
}
761+
backoff = min(backoff*2, 50*time.Millisecond)
762+
}
763+
}
764+
713765
// CreateSnapshot VM needs to be paused before creating a snapshot.
714766
func (p *Process) CreateSnapshot(ctx context.Context, snapfilePath string) error {
715767
ctx, childSpan := tracer.Start(ctx, "create-snapshot-fc")

packages/orchestrator/pkg/sandbox/sandbox.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ type Sandbox struct {
218218
files *storage.SandboxFiles
219219
cleanup *Cleanup
220220

221+
featureFlags *featureflags.Client
222+
221223
process *fc.Process
222224
cgroupHandle *cgroup.CgroupHandle
223225

@@ -458,7 +460,8 @@ func (f *Factory) CreateSandbox(
458460
files: sandboxFiles,
459461
process: fcHandle,
460462

461-
cleanup: cleanup,
463+
cleanup: cleanup,
464+
featureFlags: f.featureFlags,
462465

463466
APIStoredConfig: apiConfigToStore,
464467

@@ -799,7 +802,8 @@ func (f *Factory) ResumeSandbox(
799802
files: sandboxFiles,
800803
process: fcHandle,
801804

802-
cleanup: cleanup,
805+
cleanup: cleanup,
806+
featureFlags: f.featureFlags,
803807

804808
APIStoredConfig: apiConfigToStore,
805809
CABundle: f.egressProxy.CABundle(),
@@ -1053,6 +1057,17 @@ func (s *Sandbox) Pause(
10531057
// Stop the health check before pausing the VM
10541058
s.Checks.Stop()
10551059

1060+
// Drain free-page-hinting before pause so the snapshot doesn't capture
1061+
// pages the guest already considers free. No-op when no balloon. Failures
1062+
// are logged but non-fatal. Timeout=0 disables the step.
1063+
if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.FreePageHintingTimeoutMs)) * time.Millisecond; t > 0 {
1064+
drainCtx, cancel := context.WithTimeout(ctx, t)
1065+
if err := s.process.DrainBalloon(drainCtx); err != nil {
1066+
telemetry.ReportError(ctx, "balloon hinting drain failed (continuing pause)", err)
1067+
}
1068+
cancel()
1069+
}
1070+
10561071
if err := s.process.Pause(ctx); err != nil {
10571072
return nil, fmt.Errorf("failed to pause VM: %w", err)
10581073
}

packages/shared/pkg/featureflags/flags.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,10 @@ var (
158158
BestOfKMaxOvercommit = NewIntFlag("best-of-k-max-overcommit", 400) // Default R=4 (stored as percentage, max over-commit ratio)
159159
BestOfKAlpha = NewIntFlag("best-of-k-alpha", 50) // Default Alpha=0.5 (stored as percentage for int flag, current usage weight)
160160
EnvdInitTimeoutMilliseconds = NewIntFlag("envd-init-request-timeout-milliseconds", 50) // Timeout for envd init request in milliseconds
161-
HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s)
161+
// FreePageHintingTimeoutMs gates a pre-pause virtio-balloon free-page-hinting
162+
// drain. 0 disables it. Operator opts in once the kernel has the FPH race fix.
163+
FreePageHintingTimeoutMs = NewIntFlag("free-page-hinting-timeout-ms", 0)
164+
HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s)
162165
MaxCacheWriterConcurrencyFlag = NewIntFlag("max-cache-writer-concurrency", 10)
163166

164167
// BuildCacheMaxUsagePercentage the maximum percentage of the cache disk storage

0 commit comments

Comments
 (0)