Skip to content

Commit f4e3ab0

Browse files
committed
feat(fc): drain virtio-balloon free-page-hinting before pause
Arm free-page-hinting on the existing balloon device (always set when the balloon is installed; pure runtime toggle), and on pause do a host-initiated hint+wait so MADV_DONTNEED-reclaimed pages are settled before the snapshot. Pages reclaimed this way generate UFFD_EVENT_REMOVE, which the orchestrator already tracks (parent FPR PR), so the snapshot captures them as removed instead of zero-filled. - fc/client.go: rename enableFreePageReporting -> installBalloon; always set FreePageHinting=true; add startBalloonHinting + describeBalloonHinting helpers. - fc/process.go: track balloonInstalled; add DrainBalloon (start + poll guest_cmd >= host_cmd, with host>0 guard against transient nil/zero responses). - sandbox.go: wire featureFlags into Sandbox; call DrainBalloon from Pause behind the flag. Failures are logged but non-fatal. Gated by free-page-hinting-timeout-ms (LD int flag, ms; default 0 = disabled). resume-build gains --fph-timeout-ms for local exercise.
1 parent 417ed97 commit f4e3ab0

6 files changed

Lines changed: 143 additions & 10 deletions

File tree

packages/orchestrator/cmd/resume-build/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,15 @@ func main() {
7272
cmdSignalPause := flag.String("cmd-signal-pause", "", "execute command in sandbox, then wait for SIGUSR1 before pausing")
7373
optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)")
7474

75+
// Pause-time FPH override; 0 = use LD default (off).
76+
fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-timeout-ms LD flag (0 = use LD default)")
77+
7578
flag.Parse()
7679

80+
if *fphTimeoutMs > 0 {
81+
featureflags.NewIntFlag("free-page-hinting-timeout-ms", *fphTimeoutMs)
82+
}
83+
7784
if *fromBuild == "" {
7885
log.Fatal("-from-build required")
7986
}

packages/orchestrator/pkg/sandbox/fc/client.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -427,8 +427,11 @@ func (c *apiClient) startVM(ctx context.Context) error {
427427
return nil
428428
}
429429

430-
func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
431-
ctx, span := tracer.Start(ctx, "enable-free-page-reporting")
430+
// installBalloon installs the virtio-balloon pre-boot with target size 0.
431+
// FreePageReporting and FreePageHinting are each gated independently at
432+
// template build time (FC version for FPR; guest kernel version for FPH).
433+
func (c *apiClient) installBalloon(ctx context.Context, freePageReporting, freePageHinting bool) error {
434+
ctx, span := tracer.Start(ctx, "install-balloon")
432435
defer span.End()
433436

434437
amountMib := int64(0)
@@ -439,7 +442,8 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
439442
Body: &models.Balloon{
440443
AmountMib: &amountMib,
441444
DeflateOnOom: &deflateOnOom,
442-
FreePageReporting: true,
445+
FreePageReporting: freePageReporting,
446+
FreePageHinting: freePageHinting,
443447
},
444448
}
445449

@@ -451,6 +455,33 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error {
451455
return nil
452456
}
453457

458+
func (c *apiClient) startBalloonHinting(ctx context.Context, acknowledgeOnStop bool) error {
459+
params := operations.StartBalloonHintingParams{
460+
Context: ctx,
461+
Body: &models.BalloonStartCmd{AcknowledgeOnStop: acknowledgeOnStop},
462+
}
463+
_, err := c.client.Operations.StartBalloonHinting(&params)
464+
if err != nil {
465+
return fmt.Errorf("error starting balloon hinting: %w", err)
466+
}
467+
468+
return nil
469+
}
470+
471+
func (c *apiClient) describeBalloonHinting(ctx context.Context) (hostCmd, guestCmd int64, err error) {
472+
params := operations.DescribeBalloonHintingParams{Context: ctx}
473+
res, err := c.client.Operations.DescribeBalloonHinting(&params)
474+
if err != nil {
475+
return 0, 0, err
476+
}
477+
if res.Payload.HostCmd != nil {
478+
hostCmd = *res.Payload.HostCmd
479+
}
480+
guestCmd = res.Payload.GuestCmd
481+
482+
return hostCmd, guestCmd, nil
483+
}
484+
454485
func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) {
455486
params := operations.GetMemoryMappingsParams{
456487
Context: ctx,
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package fc
2+
3+
import (
4+
"strings"
5+
6+
"github.com/e2b-dev/infra/packages/shared/pkg/utils"
7+
)
8+
9+
// MinFreePageHintingKernelVersion is the minimum guest kernel version that
10+
// contains the virtio-balloon free-page-hinting race fix. Templates built
11+
// against an older kernel get the balloon installed with FreePageHinting
12+
// disabled so the race can't be triggered, regardless of any runtime
13+
// LaunchDarkly toggle. Bump this once the fixed kernel is published to
14+
// e2b-dev/fc-kernels.
15+
const MinFreePageHintingKernelVersion = "999.0.0"
16+
17+
// kernelSupportsFreePageHinting reports whether kernelVersion (e.g.
18+
// "vmlinux-6.1.158") includes the FPH/MADV_DONTNEED race fix.
19+
func kernelSupportsFreePageHinting(kernelVersion string) bool {
20+
v := strings.TrimPrefix(kernelVersion, "vmlinux-")
21+
ok, _ := utils.IsGTEVersion(v, MinFreePageHintingKernelVersion)
22+
23+
return ok
24+
}

packages/orchestrator/pkg/sandbox/fc/process.go

Lines changed: 57 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/rootfs"
2727
"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/socket"
2828
"github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template"
29+
"github.com/e2b-dev/infra/packages/shared/pkg/fc/client/operations"
2930
"github.com/e2b-dev/infra/packages/shared/pkg/keys"
3031
"github.com/e2b-dev/infra/packages/shared/pkg/logger"
3132
sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox"
@@ -440,13 +441,13 @@ func (p *Process) Create(
440441
telemetry.ReportEvent(ctx, "set fc entropy config")
441442

442443
if freePageReporting {
443-
err = p.client.enableFreePageReporting(ctx)
444-
if err != nil {
444+
freePageHinting := kernelSupportsFreePageHinting(p.Versions.KernelVersion)
445+
if err := p.client.installBalloon(ctx, freePageReporting, freePageHinting); err != nil {
445446
fcStopErr := p.Stop(ctx)
446447

447-
return errors.Join(fmt.Errorf("error enabling free page reporting: %w", err), fcStopErr)
448+
return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr)
448449
}
449-
telemetry.ReportEvent(ctx, "enabled free page reporting")
450+
telemetry.ReportEvent(ctx, "installed balloon device", attribute.Bool("balloon.free_page_hinting", freePageHinting))
450451
}
451452

452453
err = p.client.startVM(ctx)
@@ -710,6 +711,58 @@ func (p *Process) Pause(ctx context.Context) error {
710711
return p.client.pauseVM(ctx)
711712
}
712713

714+
// DrainBalloon triggers a free-page-hinting run and blocks until the guest
715+
// acknowledges or ctx fires. No-op when no balloon device is configured
716+
// (FC returns 400) so it survives snapshot/resume without local state.
717+
// Records `drain-balloon.outcome` on the span: ok | not-configured | timeout |
718+
// start-failed | describe-failed.
719+
func (p *Process) DrainBalloon(ctx context.Context) error {
720+
ctx, span := tracer.Start(ctx, "drain-balloon")
721+
outcome := "ok"
722+
defer func() {
723+
span.SetAttributes(attribute.String("drain-balloon.outcome", outcome))
724+
span.End()
725+
}()
726+
727+
if err := p.client.startBalloonHinting(ctx, true /* ackOnStop */); err != nil {
728+
var notConfigured *operations.StartBalloonHintingBadRequest
729+
if errors.As(err, &notConfigured) {
730+
outcome = "not-configured"
731+
732+
return nil
733+
}
734+
735+
outcome = "start-failed"
736+
737+
return fmt.Errorf("start balloon hinting: %w", err)
738+
}
739+
740+
backoff := 5 * time.Millisecond
741+
for {
742+
select {
743+
case <-ctx.Done():
744+
outcome = "timeout"
745+
746+
return ctx.Err()
747+
case <-time.After(backoff):
748+
}
749+
750+
host, guest, err := p.client.describeBalloonHinting(ctx)
751+
if err != nil {
752+
outcome = "describe-failed"
753+
754+
return fmt.Errorf("balloon hinting status: %w", err)
755+
}
756+
// host_cmd is monotonic and we just called start, so host > 0
757+
// after FC accepts it. Require it to guard against transient
758+
// nil/zero responses returning a false-positive completion.
759+
if host > 0 && guest >= host {
760+
return nil
761+
}
762+
backoff = min(backoff*2, 50*time.Millisecond)
763+
}
764+
}
765+
713766
// CreateSnapshot VM needs to be paused before creating a snapshot.
714767
func (p *Process) CreateSnapshot(ctx context.Context, snapfilePath string) error {
715768
ctx, childSpan := tracer.Start(ctx, "create-snapshot-fc")

packages/orchestrator/pkg/sandbox/sandbox.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,8 @@ type Sandbox struct {
218218
files *storage.SandboxFiles
219219
cleanup *Cleanup
220220

221+
featureFlags *featureflags.Client
222+
221223
process *fc.Process
222224
cgroupHandle *cgroup.CgroupHandle
223225

@@ -458,7 +460,8 @@ func (f *Factory) CreateSandbox(
458460
files: sandboxFiles,
459461
process: fcHandle,
460462

461-
cleanup: cleanup,
463+
cleanup: cleanup,
464+
featureFlags: f.featureFlags,
462465

463466
APIStoredConfig: apiConfigToStore,
464467

@@ -799,7 +802,8 @@ func (f *Factory) ResumeSandbox(
799802
files: sandboxFiles,
800803
process: fcHandle,
801804

802-
cleanup: cleanup,
805+
cleanup: cleanup,
806+
featureFlags: f.featureFlags,
803807

804808
APIStoredConfig: apiConfigToStore,
805809
CABundle: f.egressProxy.CABundle(),
@@ -1053,6 +1057,17 @@ func (s *Sandbox) Pause(
10531057
// Stop the health check before pausing the VM
10541058
s.Checks.Stop()
10551059

1060+
// Drain free-page-hinting before pause so the snapshot doesn't capture
1061+
// pages the guest already considers free. No-op when no balloon. Failures
1062+
// are logged but non-fatal. Timeout=0 disables the step.
1063+
if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.FreePageHintingTimeoutMs)) * time.Millisecond; t > 0 {
1064+
drainCtx, cancel := context.WithTimeout(ctx, t)
1065+
if err := s.process.DrainBalloon(drainCtx); err != nil {
1066+
telemetry.ReportError(ctx, "balloon hinting drain failed (continuing pause)", err)
1067+
}
1068+
cancel()
1069+
}
1070+
10561071
if err := s.process.Pause(ctx); err != nil {
10571072
return nil, fmt.Errorf("failed to pause VM: %w", err)
10581073
}

packages/shared/pkg/featureflags/flags.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,10 @@ var (
158158
BestOfKMaxOvercommit = NewIntFlag("best-of-k-max-overcommit", 400) // Default R=4 (stored as percentage, max over-commit ratio)
159159
BestOfKAlpha = NewIntFlag("best-of-k-alpha", 50) // Default Alpha=0.5 (stored as percentage for int flag, current usage weight)
160160
EnvdInitTimeoutMilliseconds = NewIntFlag("envd-init-request-timeout-milliseconds", 50) // Timeout for envd init request in milliseconds
161-
HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s)
161+
// FreePageHintingTimeoutMs gates a pre-pause virtio-balloon free-page-hinting
162+
// drain. 0 disables it. Operator opts in once the kernel has the FPH race fix.
163+
FreePageHintingTimeoutMs = NewIntFlag("free-page-hinting-timeout-ms", 0)
164+
HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s)
162165
MaxCacheWriterConcurrencyFlag = NewIntFlag("max-cache-writer-concurrency", 10)
163166

164167
// BuildCacheMaxUsagePercentage the maximum percentage of the cache disk storage

0 commit comments

Comments
 (0)