diff --git a/packages/orchestrator/cmd/resume-build/main.go b/packages/orchestrator/cmd/resume-build/main.go index fe66689035..d90e3e2604 100644 --- a/packages/orchestrator/cmd/resume-build/main.go +++ b/packages/orchestrator/cmd/resume-build/main.go @@ -72,8 +72,15 @@ func main() { cmdSignalPause := flag.String("cmd-signal-pause", "", "execute command in sandbox, then wait for SIGUSR1 before pausing") optimize := flag.Bool("optimize", false, "collect fresh prefetch mapping after pause (resumes snapshot to record page faults)") + // Pause-time FPH override; 0 = use LD default (off). + fphTimeoutMs := flag.Int("fph-timeout-ms", 0, "override free-page-hinting-timeout-ms LD flag (0 = use LD default)") + flag.Parse() + if *fphTimeoutMs > 0 { + featureflags.NewIntFlag("free-page-hinting-timeout-ms", *fphTimeoutMs) + } + if *fromBuild == "" { log.Fatal("-from-build required") } diff --git a/packages/orchestrator/pkg/sandbox/fc/client.go b/packages/orchestrator/pkg/sandbox/fc/client.go index d35d28040c..5a21d74e46 100644 --- a/packages/orchestrator/pkg/sandbox/fc/client.go +++ b/packages/orchestrator/pkg/sandbox/fc/client.go @@ -427,8 +427,11 @@ func (c *apiClient) startVM(ctx context.Context) error { return nil } -func (c *apiClient) enableFreePageReporting(ctx context.Context) error { - ctx, span := tracer.Start(ctx, "enable-free-page-reporting") +// installBalloon installs the virtio-balloon pre-boot with target size 0. +// FreePageReporting and FreePageHinting are each gated independently at +// template build time (FC version for FPR; guest kernel version for FPH). +func (c *apiClient) installBalloon(ctx context.Context, freePageReporting, freePageHinting bool) error { + ctx, span := tracer.Start(ctx, "install-balloon") defer span.End() amountMib := int64(0) @@ -439,7 +442,8 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error { Body: &models.Balloon{ AmountMib: &amountMib, DeflateOnOom: &deflateOnOom, - FreePageReporting: true, + FreePageReporting: freePageReporting, + FreePageHinting: freePageHinting, }, } @@ -451,6 +455,33 @@ func (c *apiClient) enableFreePageReporting(ctx context.Context) error { return nil } +func (c *apiClient) startBalloonHinting(ctx context.Context, acknowledgeOnStop bool) error { + params := operations.StartBalloonHintingParams{ + Context: ctx, + Body: &models.BalloonStartCmd{AcknowledgeOnStop: acknowledgeOnStop}, + } + _, err := c.client.Operations.StartBalloonHinting(¶ms) + if err != nil { + return fmt.Errorf("error starting balloon hinting: %w", err) + } + + return nil +} + +func (c *apiClient) describeBalloonHinting(ctx context.Context) (hostCmd, guestCmd int64, err error) { + params := operations.DescribeBalloonHintingParams{Context: ctx} + res, err := c.client.Operations.DescribeBalloonHinting(¶ms) + if err != nil { + return 0, 0, err + } + if res.Payload.HostCmd != nil { + hostCmd = *res.Payload.HostCmd + } + guestCmd = res.Payload.GuestCmd + + return hostCmd, guestCmd, nil +} + func (c *apiClient) memoryMapping(ctx context.Context) (*memory.Mapping, error) { params := operations.GetMemoryMappingsParams{ Context: ctx, diff --git a/packages/orchestrator/pkg/sandbox/fc/fph_gates.go b/packages/orchestrator/pkg/sandbox/fc/fph_gates.go new file mode 100644 index 0000000000..3be8c483e6 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/fc/fph_gates.go @@ -0,0 +1,36 @@ +package fc + +import ( + "strings" + + "github.com/e2b-dev/infra/packages/shared/pkg/fcversion" + "github.com/e2b-dev/infra/packages/shared/pkg/utils" +) + +// MinFreePageHintingKernelVersion is the minimum guest kernel version that +// contains the virtio-balloon free-page-hinting race fix. Templates built +// against an older kernel get the balloon installed with FreePageHinting +// disabled so the race can't be triggered, regardless of any runtime +// LaunchDarkly toggle. Bump this once the fixed kernel is published to +// e2b-dev/fc-kernels. +const MinFreePageHintingKernelVersion = "999.0.0" + +// kernelSupportsFreePageHinting reports whether kernelVersion (e.g. +// "vmlinux-6.1.158") includes the FPH/MADV_DONTNEED race fix. +func kernelSupportsFreePageHinting(kernelVersion string) bool { + v := strings.TrimPrefix(kernelVersion, "vmlinux-") + ok, _ := utils.IsGTEVersion(v, MinFreePageHintingKernelVersion) + + return ok +} + +// fcSupportsFreePageHinting reports whether the Firecracker version exposes +// the start_balloon_hinting / describe_balloon_hinting API (v1.14+). +func fcSupportsFreePageHinting(fcVersion string) bool { + info, err := fcversion.New(fcVersion) + if err != nil { + return false + } + + return info.HasFreePageHinting() +} diff --git a/packages/orchestrator/pkg/sandbox/fc/process.go b/packages/orchestrator/pkg/sandbox/fc/process.go index 977091304d..297a051ec6 100644 --- a/packages/orchestrator/pkg/sandbox/fc/process.go +++ b/packages/orchestrator/pkg/sandbox/fc/process.go @@ -26,6 +26,7 @@ import ( "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/rootfs" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/socket" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template" + "github.com/e2b-dev/infra/packages/shared/pkg/fc/client/operations" "github.com/e2b-dev/infra/packages/shared/pkg/keys" "github.com/e2b-dev/infra/packages/shared/pkg/logger" sbxlogger "github.com/e2b-dev/infra/packages/shared/pkg/logger/sandbox" @@ -440,13 +441,13 @@ func (p *Process) Create( telemetry.ReportEvent(ctx, "set fc entropy config") if freePageReporting { - err = p.client.enableFreePageReporting(ctx) - if err != nil { + freePageHinting := fcSupportsFreePageHinting(p.Versions.FirecrackerVersion) && kernelSupportsFreePageHinting(p.Versions.KernelVersion) + if err := p.client.installBalloon(ctx, freePageReporting, freePageHinting); err != nil { fcStopErr := p.Stop(ctx) - return errors.Join(fmt.Errorf("error enabling free page reporting: %w", err), fcStopErr) + return errors.Join(fmt.Errorf("error installing balloon device: %w", err), fcStopErr) } - telemetry.ReportEvent(ctx, "enabled free page reporting") + telemetry.ReportEvent(ctx, "installed balloon device", attribute.Bool("balloon.free_page_hinting", freePageHinting)) } err = p.client.startVM(ctx) @@ -710,6 +711,64 @@ func (p *Process) Pause(ctx context.Context) error { return p.client.pauseVM(ctx) } +// DrainBalloon triggers a free-page-hinting run and blocks until the guest +// acknowledges or ctx fires. No-op on FC < v1.14 (no API) and when no balloon +// device is configured (FC returns 400) so it survives snapshot/resume +// without local state. Records `drain-balloon.outcome` on the span: +// ok | fc-unsupported | not-configured | timeout | start-failed | describe-failed. +func (p *Process) DrainBalloon(ctx context.Context) error { + ctx, span := tracer.Start(ctx, "drain-balloon") + outcome := "ok" + defer func() { + span.SetAttributes(attribute.String("drain-balloon.outcome", outcome)) + span.End() + }() + + if !fcSupportsFreePageHinting(p.Versions.FirecrackerVersion) { + outcome = "fc-unsupported" + + return nil + } + + if err := p.client.startBalloonHinting(ctx, true /* ackOnStop */); err != nil { + var notConfigured *operations.StartBalloonHintingBadRequest + if errors.As(err, ¬Configured) { + outcome = "not-configured" + + return nil + } + + outcome = "start-failed" + + return fmt.Errorf("start balloon hinting: %w", err) + } + + backoff := 5 * time.Millisecond + for { + select { + case <-ctx.Done(): + outcome = "timeout" + + return ctx.Err() + case <-time.After(backoff): + } + + host, guest, err := p.client.describeBalloonHinting(ctx) + if err != nil { + outcome = "describe-failed" + + return fmt.Errorf("balloon hinting status: %w", err) + } + // host_cmd is monotonic and we just called start, so host > 0 + // after FC accepts it. Require it to guard against transient + // nil/zero responses returning a false-positive completion. + if host > 0 && guest >= host { + return nil + } + backoff = min(backoff*2, 50*time.Millisecond) + } +} + // CreateSnapshot VM needs to be paused before creating a snapshot. func (p *Process) CreateSnapshot(ctx context.Context, snapfilePath string) error { ctx, childSpan := tracer.Start(ctx, "create-snapshot-fc") diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index 23ca0bda82..3583e67c4c 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -218,6 +218,8 @@ type Sandbox struct { files *storage.SandboxFiles cleanup *Cleanup + featureFlags *featureflags.Client + process *fc.Process cgroupHandle *cgroup.CgroupHandle @@ -458,7 +460,8 @@ func (f *Factory) CreateSandbox( files: sandboxFiles, process: fcHandle, - cleanup: cleanup, + cleanup: cleanup, + featureFlags: f.featureFlags, APIStoredConfig: apiConfigToStore, @@ -799,7 +802,8 @@ func (f *Factory) ResumeSandbox( files: sandboxFiles, process: fcHandle, - cleanup: cleanup, + cleanup: cleanup, + featureFlags: f.featureFlags, APIStoredConfig: apiConfigToStore, CABundle: f.egressProxy.CABundle(), @@ -1053,6 +1057,17 @@ func (s *Sandbox) Pause( // Stop the health check before pausing the VM s.Checks.Stop() + // Drain free-page-hinting before pause so the snapshot doesn't capture + // pages the guest already considers free. No-op when no balloon. Failures + // are logged but non-fatal. Timeout=0 disables the step. + if t := time.Duration(s.featureFlags.IntFlag(ctx, featureflags.FreePageHintingTimeoutMs)) * time.Millisecond; t > 0 { + drainCtx, cancel := context.WithTimeout(ctx, t) + if err := s.process.DrainBalloon(drainCtx); err != nil { + telemetry.ReportError(ctx, "balloon hinting drain failed (continuing pause)", err) + } + cancel() + } + if err := s.process.Pause(ctx); err != nil { return nil, fmt.Errorf("failed to pause VM: %w", err) } diff --git a/packages/shared/pkg/fcversion/sandbox_features.go b/packages/shared/pkg/fcversion/sandbox_features.go index b768f199d4..1985cb61d4 100644 --- a/packages/shared/pkg/fcversion/sandbox_features.go +++ b/packages/shared/pkg/fcversion/sandbox_features.go @@ -11,3 +11,10 @@ func (v *Info) HasHugePages() bool { func (v *Info) HasFreePageReporting() bool { return v.lastReleaseVersion.Major() > 1 || (v.lastReleaseVersion.Major() == 1 && v.lastReleaseVersion.Minor() >= 14) } + +// HasFreePageHinting reports whether the Firecracker version exposes the +// balloon free-page-hinting API (start_balloon_hinting / describe_balloon_hinting). +// Introduced in v1.14. +func (v *Info) HasFreePageHinting() bool { + return v.lastReleaseVersion.Major() > 1 || (v.lastReleaseVersion.Major() == 1 && v.lastReleaseVersion.Minor() >= 14) +} diff --git a/packages/shared/pkg/featureflags/flags.go b/packages/shared/pkg/featureflags/flags.go index 27cf349da8..955e442b24 100644 --- a/packages/shared/pkg/featureflags/flags.go +++ b/packages/shared/pkg/featureflags/flags.go @@ -158,7 +158,10 @@ var ( BestOfKMaxOvercommit = NewIntFlag("best-of-k-max-overcommit", 400) // Default R=4 (stored as percentage, max over-commit ratio) BestOfKAlpha = NewIntFlag("best-of-k-alpha", 50) // Default Alpha=0.5 (stored as percentage for int flag, current usage weight) EnvdInitTimeoutMilliseconds = NewIntFlag("envd-init-request-timeout-milliseconds", 50) // Timeout for envd init request in milliseconds - HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) + // FreePageHintingTimeoutMs gates a pre-pause virtio-balloon free-page-hinting + // drain. 0 disables it. Operator opts in once the kernel has the FPH race fix. + FreePageHintingTimeoutMs = NewIntFlag("free-page-hinting-timeout-ms", 0) + HostStatsSamplingInterval = NewIntFlag("host-stats-sampling-interval", 5000) // Host stats sampling interval in milliseconds (default 5s) MaxCacheWriterConcurrencyFlag = NewIntFlag("max-cache-writer-concurrency", 10) // BuildCacheMaxUsagePercentage the maximum percentage of the cache disk storage