diff --git a/.claude/worktrees/agent-a3e892705328d7738 b/.claude/worktrees/agent-a3e892705328d7738 new file mode 160000 index 0000000000..ea3676c682 --- /dev/null +++ b/.claude/worktrees/agent-a3e892705328d7738 @@ -0,0 +1 @@ +Subproject commit ea3676c682f3166094f3fb06a2e80118fc731e17 diff --git a/packages/orchestrator/benchmarks/benchmark_test.go b/packages/orchestrator/benchmarks/benchmark_test.go index 68178b38e5..64f9427f68 100644 --- a/packages/orchestrator/benchmarks/benchmark_test.go +++ b/packages/orchestrator/benchmarks/benchmark_test.go @@ -275,6 +275,7 @@ func BenchmarkBaseImageLaunch(b *testing.B) { sandboxes, templateCache, buildMetrics, + nil, ) buildPath := filepath.Join(os.Getenv("LOCAL_TEMPLATE_STORAGE_BASE_PATH"), buildID, "rootfs.ext4") diff --git a/packages/orchestrator/benchmarks/concurrent_benchmark_test.go b/packages/orchestrator/benchmarks/concurrent_benchmark_test.go index 0143e7462e..594ab92774 100644 --- a/packages/orchestrator/benchmarks/concurrent_benchmark_test.go +++ b/packages/orchestrator/benchmarks/concurrent_benchmark_test.go @@ -324,6 +324,7 @@ func BenchmarkConcurrentResume(b *testing.B) { config.BuilderConfig, l, featureFlags, sandboxFactory, persistenceTemplate, persistenceBuild, artifactRegistry, dockerhubRepository, sandboxProxy, sandboxes, templateCache, buildMetrics, + nil, ) // build template if not cached diff --git a/packages/orchestrator/chunks.proto b/packages/orchestrator/chunks.proto index 55a1a539db..89993d17f2 100644 --- a/packages/orchestrator/chunks.proto +++ b/packages/orchestrator/chunks.proto @@ -14,12 +14,6 @@ message PeerAvailability { // use_storage is true when the GCS upload has completed and the caller // should switch to reading from GCS/NFS directly instead of this peer. bool use_storage = 2; - // memfile_header contains the serialized V4 header (with FrameTables) - // for the memfile, included when use_storage is true and the upload was compressed. - bytes memfile_header = 3; - // rootfs_header contains the serialized V4 header (with FrameTables) - // for the rootfs, included when use_storage is true and the upload was compressed. - bytes rootfs_header = 4; } message GetBuildFileSizeRequest { diff --git a/packages/orchestrator/cmd/create-build/main.go b/packages/orchestrator/cmd/create-build/main.go index dfbcca6779..6a8690b4f3 100644 --- a/packages/orchestrator/cmd/create-build/main.go +++ b/packages/orchestrator/cmd/create-build/main.go @@ -320,10 +320,18 @@ func doBuild( buildMetrics, _ := metrics.NewBuildMetrics(noop.MeterProvider{}) sandboxFactory := sandbox.NewFactory(c.BuilderConfig, networkPool, devicePool, featureFlags, hoststats.NewNoopDelivery(), cgroup.NewNoopManager(), network.NewNoopEgressProxy(), sandboxes) + // Layered V4 builds need the upload coordinator so child layers wait on + // their parents' header finalization. Redis is nil (CLI is single-host β€” + // no cross-orch signaling needed); local same-orch coordination via + // futures is what matters here. + uploads := sandbox.NewUploads(templateCache, persistenceTemplate, nil) + defer uploads.Stop() + builder := build.NewBuilder( builderConfig, l, featureFlags, sandboxFactory, persistenceTemplate, persistenceBuild, artifactRegistry, dockerhubRepo, sandboxProxy, sandboxes, templateCache, buildMetrics, + uploads, ) l = l.With(zap.String("envID", templateID)).With(zap.String("buildID", buildID)) diff --git a/packages/orchestrator/cmd/resume-build/main.go b/packages/orchestrator/cmd/resume-build/main.go index 517ef0b437..bb4568b7a3 100644 --- a/packages/orchestrator/cmd/resume-build/main.go +++ b/packages/orchestrator/cmd/resume-build/main.go @@ -638,14 +638,18 @@ func (r *runner) pauseOnce(ctx context.Context, opts pauseOptions, verbose bool) // Only upload when not in benchmark mode (verbose = true means single run) if verbose { - paths := storage.Paths{BuildID: opts.newBuildID} if opts.isRemoteStorage { fmt.Println("πŸ“€ Uploading snapshot...") } else { fmt.Println("πŸ’Ύ Saving snapshot to local storage...") } - if _, _, err := snapshot.Upload(ctx, r.storage, paths, storage.CompressConfig{}, nil, ""); err != nil { + upload, err := sandbox.NewUpload(ctx, nil, snapshot, r.storage, storage.CompressConfig{}, nil, "") + if err != nil { + return timings, fmt.Errorf("failed to prepare upload: %w", err) + } + + if err := upload.Run(ctx); err != nil { return timings, fmt.Errorf("failed to upload snapshot: %w", err) } diff --git a/packages/orchestrator/cmd/smoketest/smoke_test.go b/packages/orchestrator/cmd/smoketest/smoke_test.go index 5134c6379b..dc20cd89c3 100644 --- a/packages/orchestrator/cmd/smoketest/smoke_test.go +++ b/packages/orchestrator/cmd/smoketest/smoke_test.go @@ -237,6 +237,7 @@ func newTestInfra(t *testing.T, ctx context.Context) *testInfra { builderConfig, l, flags, factory, persistenceTemplate, persistenceBuild, artifactRegistry, dockerhubRepo, sandboxProxy, sandboxes, templateCache, buildMetrics, + nil, ) return ti diff --git a/packages/orchestrator/pkg/factories/run.go b/packages/orchestrator/pkg/factories/run.go index 7a625bdb4f..65397103de 100644 --- a/packages/orchestrator/pkg/factories/run.go +++ b/packages/orchestrator/pkg/factories/run.go @@ -548,6 +548,13 @@ func run(config cfg.Config, opts Options) (success bool) { builder := chrooted.NewBuilder(config) volumeService := volumes.New(config, builder) + uploads := sandbox.NewUploads(templateCache, persistence, redisClient) + closers = append(closers, closer{"pending uploads", func(context.Context) error { + uploads.Stop() + + return nil + }}) + orchestratorService, err := server.New(server.ServiceConfig{ Config: config, SandboxFactory: sandboxFactory, @@ -561,10 +568,14 @@ func run(config cfg.Config, opts Options) (success bool) { FeatureFlags: featureFlags, SbxEventsService: events.NewEventsService(sbxEventsDeliveryTargets), PeerRegistry: peerRegistry, + Uploads: uploads, }) if err != nil { logger.L().Fatal(ctx, "failed to create orchestrator server", zap.Error(err)) } + closers = append(closers, closer{"orchestrator server", func(context.Context) error { + return orchestratorService.Close() + }}) // template manager sandbox logger tmplSbxLoggerExternal := sbxlogger.NewLogger( @@ -639,6 +650,7 @@ func run(config cfg.Config, opts Options) (success bool) { templateCache, persistence, buildPersistence, + uploads, ) if err != nil { logger.L().Fatal(ctx, "failed to create template manager", zap.Error(err)) diff --git a/packages/orchestrator/pkg/sandbox/block/device.go b/packages/orchestrator/pkg/sandbox/block/device.go index 5cd6c0ba79..9f81d58abe 100644 --- a/packages/orchestrator/pkg/sandbox/block/device.go +++ b/packages/orchestrator/pkg/sandbox/block/device.go @@ -36,6 +36,7 @@ type ReadonlyDevice interface { Slicer BlockSize() int64 Header() *header.Header + SwapHeader(h *header.Header) } type Device interface { diff --git a/packages/orchestrator/pkg/sandbox/block/empty.go b/packages/orchestrator/pkg/sandbox/block/empty.go index 8574b2a75d..e7e1795af8 100644 --- a/packages/orchestrator/pkg/sandbox/block/empty.go +++ b/packages/orchestrator/pkg/sandbox/block/empty.go @@ -4,6 +4,7 @@ import ( "context" "errors" "fmt" + "sync/atomic" "github.com/google/uuid" @@ -11,7 +12,7 @@ import ( ) type Empty struct { - header *header.Header + header atomic.Pointer[header.Header] } var _ ReadonlyDevice = (*Empty)(nil) @@ -26,9 +27,10 @@ func NewEmpty(size int64, blockSize int64, buildID uuid.UUID) (*Empty, error) { return nil, fmt.Errorf("failed to create header: %w", err) } - return &Empty{ - header: h, - }, nil + e := &Empty{} + e.header.Store(h) + + return e, nil } func (e *Empty) ReadAt(ctx context.Context, p []byte, off int64) (int, error) { @@ -41,11 +43,11 @@ func (e *Empty) ReadAt(ctx context.Context, p []byte, off int64) (int, error) { } func (e *Empty) Size(_ context.Context) (int64, error) { - return int64(e.header.Metadata.Size), nil + return int64(e.Header().Metadata.Size), nil } func (e *Empty) BlockSize() int64 { - return int64(e.header.Metadata.BlockSize) + return int64(e.Header().Metadata.BlockSize) } func (e *Empty) Close() error { @@ -54,7 +56,7 @@ func (e *Empty) Close() error { func (e *Empty) Slice(_ context.Context, off, length int64) ([]byte, error) { end := off + length - size := int64(e.header.Metadata.Size) + size := int64(e.Header().Metadata.Size) if end > size { end = size length = end - off @@ -65,7 +67,11 @@ func (e *Empty) Slice(_ context.Context, off, length int64) ([]byte, error) { } func (e *Empty) Header() *header.Header { - return e.header + return e.header.Load() +} + +func (e *Empty) SwapHeader(h *header.Header) { + e.header.Store(h) } func (e *Empty) UpdateSize() error { diff --git a/packages/orchestrator/pkg/sandbox/block/local.go b/packages/orchestrator/pkg/sandbox/block/local.go index 013c4b8940..8400c3cc0f 100644 --- a/packages/orchestrator/pkg/sandbox/block/local.go +++ b/packages/orchestrator/pkg/sandbox/block/local.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "os" + "sync/atomic" "github.com/google/uuid" @@ -15,7 +16,7 @@ type Local struct { f *os.File path string - header *header.Header + header atomic.Pointer[header.Header] } var _ ReadonlyDevice = (*Local)(nil) @@ -44,11 +45,10 @@ func NewLocal(path string, blockSize int64, buildID uuid.UUID) (*Local, error) { return nil, fmt.Errorf("failed to create header: %w", err) } - return &Local{ - f: f, - path: path, - header: h, - }, nil + d := &Local{f: f, path: path} + d.header.Store(h) + + return d, nil } func (d *Local) Path() string { @@ -65,11 +65,11 @@ func (d *Local) ReadAt(ctx context.Context, p []byte, off int64) (int, error) { } func (d *Local) Size(_ context.Context) (int64, error) { - return int64(d.header.Metadata.Size), nil + return int64(d.Header().Metadata.Size), nil } func (d *Local) BlockSize() int64 { - return int64(d.header.Metadata.BlockSize) + return int64(d.Header().Metadata.BlockSize) } func (d *Local) Close() (e error) { @@ -83,7 +83,7 @@ func (d *Local) Close() (e error) { func (d *Local) Slice(_ context.Context, off, length int64) ([]byte, error) { end := off + length - size := int64(d.header.Metadata.Size) + size := int64(d.Header().Metadata.Size) if end > size { end = size length = end - off @@ -99,7 +99,11 @@ func (d *Local) Slice(_ context.Context, off, length int64) ([]byte, error) { } func (d *Local) Header() *header.Header { - return d.header + return d.header.Load() +} + +func (d *Local) SwapHeader(h *header.Header) { + d.header.Store(h) } func (d *Local) UpdateHeaderSize() error { @@ -108,7 +112,16 @@ func (d *Local) UpdateHeaderSize() error { return fmt.Errorf("failed to get file info: %w", err) } - d.header.Metadata.Size = uint64(info.Size()) + h := d.Header() + metaCopy := *h.Metadata + metaCopy.Size = uint64(info.Size()) + + updated := &header.Header{ + Metadata: &metaCopy, + Builds: h.Builds, + Mapping: h.Mapping, + } + d.SwapHeader(updated) return nil } diff --git a/packages/orchestrator/pkg/sandbox/block/mocks/mockreadonlydevice.go b/packages/orchestrator/pkg/sandbox/block/mocks/mockreadonlydevice.go index 8f0a2e5717..8336b299c1 100644 --- a/packages/orchestrator/pkg/sandbox/block/mocks/mockreadonlydevice.go +++ b/packages/orchestrator/pkg/sandbox/block/mocks/mockreadonlydevice.go @@ -377,3 +377,43 @@ func (_c *MockReadonlyDevice_Slice_Call) RunAndReturn(run func(ctx context.Conte _c.Call.Return(run) return _c } + +// SwapHeader provides a mock function for the type MockReadonlyDevice +func (_mock *MockReadonlyDevice) SwapHeader(h *header.Header) { + _mock.Called(h) + return +} + +// MockReadonlyDevice_SwapHeader_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'SwapHeader' +type MockReadonlyDevice_SwapHeader_Call struct { + *mock.Call +} + +// SwapHeader is a helper method to define mock.On call +// - h *header.Header +func (_e *MockReadonlyDevice_Expecter) SwapHeader(h interface{}) *MockReadonlyDevice_SwapHeader_Call { + return &MockReadonlyDevice_SwapHeader_Call{Call: _e.mock.On("SwapHeader", h)} +} + +func (_c *MockReadonlyDevice_SwapHeader_Call) Run(run func(h *header.Header)) *MockReadonlyDevice_SwapHeader_Call { + _c.Call.Run(func(args mock.Arguments) { + var arg0 *header.Header + if args[0] != nil { + arg0 = args[0].(*header.Header) + } + run( + arg0, + ) + }) + return _c +} + +func (_c *MockReadonlyDevice_SwapHeader_Call) Return() *MockReadonlyDevice_SwapHeader_Call { + _c.Call.Return() + return _c +} + +func (_c *MockReadonlyDevice_SwapHeader_Call) RunAndReturn(run func(h *header.Header)) *MockReadonlyDevice_SwapHeader_Call { + _c.Run(run) + return _c +} diff --git a/packages/orchestrator/pkg/sandbox/block/overlay.go b/packages/orchestrator/pkg/sandbox/block/overlay.go index 499aa23ada..0e9987937f 100644 --- a/packages/orchestrator/pkg/sandbox/block/overlay.go +++ b/packages/orchestrator/pkg/sandbox/block/overlay.go @@ -89,3 +89,7 @@ func (o *Overlay) Close() error { func (o *Overlay) Header() *header.Header { return o.device.Header() } + +func (o *Overlay) SwapHeader(h *header.Header) { + o.device.SwapHeader(h) +} diff --git a/packages/orchestrator/pkg/sandbox/build/build.go b/packages/orchestrator/pkg/sandbox/build/build.go index 4db9bd2771..c01cd4f3c1 100644 --- a/packages/orchestrator/pkg/sandbox/build/build.go +++ b/packages/orchestrator/pkg/sandbox/build/build.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "sync/atomic" + "time" "github.com/google/uuid" "go.uber.org/zap" @@ -16,6 +17,10 @@ import ( "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" ) +// swapReadHeaderBudget bounds how long the read-path swap polls GCS for the +// V4 header to appear. +const swapReadHeaderBudget = 30 * time.Second + type File struct { header atomic.Pointer[header.Header] store *DiffStore @@ -42,21 +47,15 @@ func NewFile( return f } -// Header returns the current header. After a peer transition the header may -// have been atomically swapped to a V4 header containing FrameTables. func (b *File) Header() *header.Header { return b.header.Load() } -// maxTransitionRetries caps the number of header-swap retries when the peer -// signals upload completion via PeerTransitionedError. After a successful CAS, -// subsequent swapHeader calls are no-ops, so without a limit the loop would -// retry the same failing read forever. -const maxTransitionRetries = 2 +func (b *File) SwapHeader(h *header.Header) { + b.header.Store(h) +} func (b *File) ReadAt(ctx context.Context, p []byte, off int64) (n int, err error) { - transitionRetries := 0 - for n < len(p) { h := b.header.Load() @@ -109,7 +108,7 @@ func (b *File) ReadAt(ctx context.Context, p []byte, off int64) (n int, err erro ft, ) if err != nil { - if retry, swapErr := b.retryOnTransition(ctx, err, &transitionRetries); retry { + if retry, swapErr := b.retryOnTransition(ctx, err); retry { continue } else if swapErr != nil { return 0, swapErr @@ -126,8 +125,6 @@ func (b *File) ReadAt(ctx context.Context, p []byte, off int64) (n int, err erro // The slice access must be in the predefined blocksize of the build. func (b *File) Slice(ctx context.Context, off, _ int64) ([]byte, error) { - transitionRetries := 0 - for { h := b.header.Load() @@ -150,7 +147,7 @@ func (b *File) Slice(ctx context.Context, off, _ int64) ([]byte, error) { result, err := diff.Slice(ctx, int64(mappedBuild.Offset), int64(h.Metadata.BlockSize), ft) if err != nil { - if retry, swapErr := b.retryOnTransition(ctx, err, &transitionRetries); retry { + if retry, swapErr := b.retryOnTransition(ctx, err); retry { continue } else if swapErr != nil { return nil, swapErr @@ -163,58 +160,30 @@ func (b *File) Slice(ctx context.Context, off, _ int64) ([]byte, error) { } } -// retryOnTransition checks if err is a PeerTransitionedError and swaps the -// header if the retry budget allows. Returns (true, nil) to signal the caller -// should continue the loop, or (false, swapErr) if the swap itself failed. -func (b *File) retryOnTransition(ctx context.Context, err error, retries *int) (retry bool, swapErr error) { +// retryOnTransition catches a PeerTransitionedError and swaps the header from +// storage. Returns (true, nil) to signal the caller should continue the loop, +// or (false, swapErr) if the swap itself failed. peerSeekable emits the +// transition error at most once per seekable, so the loop is naturally +// bounded β€” no retry counter needed here. +func (b *File) retryOnTransition(ctx context.Context, err error) (bool, error) { var transErr *storage.PeerTransitionedError - if !errors.As(err, &transErr) || *retries >= maxTransitionRetries { + if !errors.As(err, &transErr) { return false, nil } - *retries++ - logger.L().Info(ctx, "peer transition detected, swapping header", zap.String("file_type", string(b.fileType)), - zap.Int("retry", *retries), ) - if swapErr := b.swapHeader(transErr); swapErr != nil { - return false, fmt.Errorf("failed to swap header: %w", swapErr) + h, loadErr := PollRemoteStorageForHeader(ctx, b.persistence, b.header.Load().Metadata.BuildId, b.fileType, nil, swapReadHeaderBudget) + if loadErr != nil { + return false, fmt.Errorf("failed to swap header: %w", loadErr) } + b.SwapHeader(h) return true, nil } -// swapHeader atomically replaces the header when the peer signals upload -// completion. Only the first goroutine to CAS succeeds; others just retry -// with the already-swapped header. The caller's retry counter bounds -// repeated attempts. -func (b *File) swapHeader(transErr *storage.PeerTransitionedError) error { - var headerBytes []byte - - switch b.fileType { - case Memfile: - headerBytes = transErr.MemfileHeader - case Rootfs: - headerBytes = transErr.RootfsHeader - } - - if len(headerBytes) == 0 { - return errors.New("no header bytes available") - } - - newH, err := header.DeserializeBytes(headerBytes) - if err != nil { - return fmt.Errorf("failed to swap header: %w", err) - } - - old := b.header.Load() - b.header.CompareAndSwap(old, newH) - - return nil -} - // buildFileSize returns the uncompressed file size for a build. Returns 0 for // V3 headers, which signals the read path to fall back to a Size() RPC. func (b *File) buildFileSize(h *header.Header, buildID uuid.UUID) int64 { diff --git a/packages/orchestrator/pkg/sandbox/build/header_load.go b/packages/orchestrator/pkg/sandbox/build/header_load.go new file mode 100644 index 0000000000..f779aac6b8 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/build/header_load.go @@ -0,0 +1,76 @@ +package build + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/google/uuid" + + "github.com/e2b-dev/infra/packages/shared/pkg/storage" + "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" +) + +const ( + loadV4InitialBackoff = 100 * time.Millisecond + loadV4MaxBackoff = 5 * time.Second + loadV4MaxTransientErrors = 3 +) + +// PollRemoteStorageForHeader polls storage for the post-upload V4 header for buildID/fileType. +// ErrObjectNotExist is retried until the budget expires; other LoadHeader +// errors are tolerated up to loadV4MaxTransientErrors consecutive occurrences +// (e.g. transient GCS hiccups during the rare window between the upload-done +// signal and object visibility) before giving up. +// +// hint is an optional accelerator. A nil error received on the channel says +// "the upload just finished, poll storage now"; a non-nil error says "the +// upload failed" and PollRemoteStorageForHeader returns it immediately without further polling. +// A nil channel never fires, so callers without hint plumbing fall through to +// the ticker-only path. budget bounds total wait time. +func PollRemoteStorageForHeader( + ctx context.Context, + store storage.StorageProvider, + buildID uuid.UUID, + t DiffType, + hint <-chan error, + budget time.Duration, +) (*header.Header, error) { + hdrPath := storage.Paths{BuildID: buildID.String()}.HeaderFile(string(t)) + deadline := time.Now().Add(budget) + + backoff := loadV4InitialBackoff + transientErrs := 0 + for { + h, err := header.LoadHeader(ctx, store, hdrPath) + if err == nil { + return h, nil + } + if !errors.Is(err, storage.ErrObjectNotExist) { + transientErrs++ + if transientErrs >= loadV4MaxTransientErrors { + return nil, fmt.Errorf("load V4 header for %s/%s after %d attempts: %w", buildID, t, transientErrs, err) + } + } else { + transientErrs = 0 + } + if !time.Now().Before(deadline) { + return nil, fmt.Errorf("V4 header for %s/%s not visible after %s: %w", buildID, t, budget, err) + } + + select { + case <-ctx.Done(): + return nil, ctx.Err() + case hintErr := <-hint: + if hintErr != nil { + return nil, fmt.Errorf("upload signaled failure for %s/%s: %w", buildID, t, hintErr) + } + backoff = loadV4InitialBackoff + case <-time.After(backoff): + if backoff < loadV4MaxBackoff { + backoff *= 2 + } + } + } +} diff --git a/packages/orchestrator/pkg/sandbox/build_upload.go b/packages/orchestrator/pkg/sandbox/build_upload.go index 870e5c2954..080e1dee5b 100644 --- a/packages/orchestrator/pkg/sandbox/build_upload.go +++ b/packages/orchestrator/pkg/sandbox/build_upload.go @@ -2,141 +2,95 @@ package sandbox import ( "context" - "sync" + "errors" + "fmt" "github.com/google/uuid" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/build" "github.com/e2b-dev/infra/packages/shared/pkg/featureflags" "github.com/e2b-dev/infra/packages/shared/pkg/storage" headers "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" + "github.com/e2b-dev/infra/packages/shared/pkg/utils" ) -// BuildUploader uploads a paused snapshot's files to storage. -type BuildUploader interface { - // UploadData uploads data files, snapfile, and metadata. - UploadData(ctx context.Context) error - // FinalizeHeaders uploads final headers after all upstream layers are done. - // Returns serialized V4 header bytes for peer transition (nil for uncompressed). - FinalizeHeaders(ctx context.Context) (memfileHeader, rootfsHeader []byte, err error) +type Upload struct { + buildID uuid.UUID + snap *Snapshot + paths storage.Paths + uploads *Uploads + store storage.StorageProvider + mem storage.CompressConfig + root storage.CompressConfig + future *utils.ErrorOnce } -// NewBuildUploader creates a BuildUploader for the given snapshot. -// -// Compression config is resolved per file (memfile, rootfs) using the base -// config, feature flags, and use case. If neither file has compression enabled, -// returns a V3 (uncompressed) uploader; otherwise a V4 (compressed) uploader -// with the two resolved configs. -// -// pending is shared across layers for multi-layer builds; nil is fine for -// single-layer. -func NewBuildUploader(ctx context.Context, snapshot *Snapshot, persistence storage.StorageProvider, paths storage.Paths, cfg storage.CompressConfig, ff *featureflags.Client, useCase string, pending *PendingBuildInfo) BuildUploader { - base := buildUploader{ - paths: paths, - persistence: persistence, - snapshot: snapshot, +func NewUpload( + ctx context.Context, + uploads *Uploads, + snap *Snapshot, + store storage.StorageProvider, + cfg storage.CompressConfig, + ff *featureflags.Client, + useCase string, +) (*Upload, error) { + u := &Upload{ + buildID: snap.BuildID, + snap: snap, + paths: storage.Paths{BuildID: snap.BuildID.String()}, + uploads: uploads, + store: store, + mem: storage.ResolveCompressConfig(ctx, cfg, ff, storage.MemfileName, useCase), + root: storage.ResolveCompressConfig(ctx, cfg, ff, storage.RootfsName, useCase), } - memCfg := storage.ResolveCompressConfig(ctx, cfg, ff, storage.MemfileName, useCase) - rootfsCfg := storage.ResolveCompressConfig(ctx, cfg, ff, storage.RootfsName, useCase) - - if !memCfg.IsCompressionEnabled() && !rootfsCfg.IsCompressionEnabled() { - return &uncompressedUploader{buildUploader: base} - } - - if pending == nil { - pending = &PendingBuildInfo{} - } - - return &compressedUploader{ - buildUploader: base, - pending: pending, - memCfg: memCfg, - rootfsCfg: rootfsCfg, + if uploads != nil { + fut, err := uploads.Start(snap.BuildID) + if err != nil { + return nil, err + } + u.future = fut } -} -// buildUploader contains fields and helpers shared by both implementations. -type buildUploader struct { - paths storage.Paths - persistence storage.StorageProvider - snapshot *Snapshot + return u, nil } -type pendingBuildInfo struct { - ft *storage.FrameTable - fileSize int64 - checksum [32]byte -} - -// PendingBuildInfo collects FrameTables and file sizes from compressed data -// uploads across all layers. After all data files are uploaded, the collected -// tables are applied to headers before the compressed headers are serialized -// and uploaded. Safe for concurrent use from errgroup goroutines. -type PendingBuildInfo struct { - mu sync.Mutex - m map[string]pendingBuildInfo -} +func (u *Upload) Run(ctx context.Context) error { + if !u.mem.IsCompressionEnabled() && !u.root.IsCompressionEnabled() { + return u.runV3(ctx) + } -func pendingBuildInfoKey(buildID, fileType string) string { - return buildID + "/" + fileType + return u.runV4(ctx) } -func (p *PendingBuildInfo) add(key string, ft *storage.FrameTable, fileSize int64, checksum [32]byte) { - p.mu.Lock() - defer p.mu.Unlock() - - if p.m == nil { - p.m = make(map[string]pendingBuildInfo) +// Finish signals the upload's terminal outcome. Same-orch waiters wake on the +// future; cross-orch waiters wake on the Redis hint published here. +func (u *Upload) Finish(ctx context.Context, uploadErr error) { + if u.future != nil { + _ = u.future.SetError(uploadErr) + } + if u.uploads != nil { + u.uploads.publishUploadDoneToRedis(ctx, u.buildID, uploadErr) } - - p.m[key] = pendingBuildInfo{ft: ft, fileSize: fileSize, checksum: checksum} } -func (p *PendingBuildInfo) get(key string) *pendingBuildInfo { - p.mu.Lock() - defer p.mu.Unlock() - - info, ok := p.m[key] - if !ok { +// publish swaps a finalized header into the local cached device so peers and +// Wait()ers see the build as complete. ErrBuildNotInCache is the one acceptable +// failure mode: nothing was cached locally, nothing to swap. +func (u *Upload) publish(ctx context.Context, t build.DiffType, h *headers.Header) error { + if u.uploads == nil { return nil } - return &info -} - -// PrepareV4Header clones src for upload, applies pending build metadata for -// fileType, and sets the V4 version. The clone is safe to serialize without -// racing with concurrent readers of the original (e.g. UFFD handlers). -func (p *PendingBuildInfo) PrepareV4Header(h *headers.Header, fileType string) *headers.Header { - h = h.CloneForUpload(headers.MetadataVersionV4) - - seen := make(map[uuid.UUID]struct{}, len(h.Mapping)) - - for _, m := range h.Mapping { - if _, ok := seen[m.BuildId]; ok { - continue - } - seen[m.BuildId] = struct{}{} - - key := pendingBuildInfoKey(m.BuildId.String(), fileType) - info := p.get(key) - if info == nil { - // Parent builds and uuid.Nil (empty blocks) have no pending entry. - // Parent builds are either already in h.Builds (copied by ToDiffHeader), - // or h.Builds is nil (V3 base with no Builds map at all). - continue - } - - bd := headers.BuildData{ - Size: info.fileSize, - Checksum: info.checksum, - } - if info.ft != nil && info.ft.IsCompressed() { - bd.FrameData = info.ft - } - - h.SetBuild(m.BuildId, bd) + dev, err := u.uploads.find(ctx, u.buildID, t) + if errors.Is(err, ErrBuildNotInCache) { + return nil + } + if err != nil { + return fmt.Errorf("load %s for swap: %w", t, err) } - return h + dev.SwapHeader(h) + + return nil } diff --git a/packages/orchestrator/pkg/sandbox/build_upload_v3.go b/packages/orchestrator/pkg/sandbox/build_upload_v3.go index 258bb7e12e..e482a321d7 100644 --- a/packages/orchestrator/pkg/sandbox/build_upload_v3.go +++ b/packages/orchestrator/pkg/sandbox/build_upload_v3.go @@ -6,56 +6,46 @@ import ( "golang.org/x/sync/errgroup" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/build" "github.com/e2b-dev/infra/packages/shared/pkg/storage" headers "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" ) -// uncompressedUploader implements BuildUploader for V3 (uncompressed) builds. -type uncompressedUploader struct { - buildUploader -} - -func (u *uncompressedUploader) UploadData(ctx context.Context) error { - memfilePath, err := u.snapshot.MemfileDiff.CachePath() +func (u *Upload) runV3(ctx context.Context) error { + memfilePath, err := u.snap.MemfileDiff.CachePath() if err != nil { return fmt.Errorf("error getting memfile diff path: %w", err) } - rootfsPath, err := u.snapshot.RootfsDiff.CachePath() + rootfsPath, err := u.snap.RootfsDiff.CachePath() if err != nil { return fmt.Errorf("error getting rootfs diff path: %w", err) } eg, ctx := errgroup.WithContext(ctx) - // V3 headers eg.Go(func() error { - if u.snapshot.MemfileDiffHeader == nil { + if u.snap.MemfileDiffHeader == nil { return nil } - _, err := headers.StoreHeader(ctx, u.persistence, u.paths.MemfileHeader(), u.snapshot.MemfileDiffHeader) - - return err + return headers.StoreHeader(ctx, u.store, u.paths.MemfileHeader(), finalizeV3(u.snap.MemfileDiffHeader)) }) eg.Go(func() error { - if u.snapshot.RootfsDiffHeader == nil { + if u.snap.RootfsDiffHeader == nil { return nil } - _, err := headers.StoreHeader(ctx, u.persistence, u.paths.RootfsHeader(), u.snapshot.RootfsDiffHeader) - - return err + return headers.StoreHeader(ctx, u.store, u.paths.RootfsHeader(), finalizeV3(u.snap.RootfsDiffHeader)) }) - // Uncompressed data eg.Go(func() error { if memfilePath == "" { return nil } - _, _, err := storage.UploadFramed(ctx, u.persistence, u.paths.Memfile(), storage.MemfileObjectType, memfilePath, storage.CompressConfig{}) + _, _, err := storage.UploadFramed(ctx, u.store, u.paths.Memfile(), storage.MemfileObjectType, memfilePath, storage.CompressConfig{}) return err }) @@ -65,25 +55,45 @@ func (u *uncompressedUploader) UploadData(ctx context.Context) error { return nil } - _, _, err := storage.UploadFramed(ctx, u.persistence, u.paths.Rootfs(), storage.RootFSObjectType, rootfsPath, storage.CompressConfig{}) + _, _, err := storage.UploadFramed(ctx, u.store, u.paths.Rootfs(), storage.RootFSObjectType, rootfsPath, storage.CompressConfig{}) return err }) eg.Go(func() error { - return storage.UploadBlob(ctx, u.persistence, u.paths.Snapfile(), storage.SnapfileObjectType, u.snapshot.Snapfile.Path()) + return storage.UploadBlob(ctx, u.store, u.paths.Snapfile(), storage.SnapfileObjectType, u.snap.Snapfile.Path()) }) eg.Go(func() error { - return storage.UploadBlob(ctx, u.persistence, u.paths.Metadata(), storage.MetadataObjectType, u.snapshot.Metafile.Path()) + return storage.UploadBlob(ctx, u.store, u.paths.Metadata(), storage.MetadataObjectType, u.snap.Metafile.Path()) }) - return eg.Wait() -} + if err := eg.Wait(); err != nil { + return err + } -func (u *uncompressedUploader) FinalizeHeaders(context.Context) ([]byte, []byte, error) { - return nil, nil, nil + if h := finalizeV3(u.snap.MemfileDiffHeader); h != nil { + if err := u.publish(ctx, build.Memfile, h); err != nil { + return err + } + } + if h := finalizeV3(u.snap.RootfsDiffHeader); h != nil { + if err := u.publish(ctx, build.Rootfs, h); err != nil { + return err + } + } + + return nil } -// Ensure uncompressedUploader implements BuildUploader. -var _ BuildUploader = (*uncompressedUploader)(nil) +// finalizeV3 returns a shallow copy of src with IncompletePendingUpload cleared, +// or nil if src is nil. Safe shallow copy: only the bool field is mutated. +func finalizeV3(src *headers.Header) *headers.Header { + if src == nil { + return nil + } + h := *src + h.IncompletePendingUpload = false + + return &h +} diff --git a/packages/orchestrator/pkg/sandbox/build_upload_v4.go b/packages/orchestrator/pkg/sandbox/build_upload_v4.go index 85338f1e51..0cd6706390 100644 --- a/packages/orchestrator/pkg/sandbox/build_upload_v4.go +++ b/packages/orchestrator/pkg/sandbox/build_upload_v4.go @@ -4,128 +4,145 @@ import ( "context" "fmt" + "github.com/google/uuid" "golang.org/x/sync/errgroup" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/build" "github.com/e2b-dev/infra/packages/shared/pkg/storage" headers "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" ) -// compressedUploader implements BuildUploader for V4 (compressed) builds. -// Per-file configs are resolved in NewBuildUploader and passed in directly. -type compressedUploader struct { - buildUploader - - pending *PendingBuildInfo - memCfg storage.CompressConfig - rootfsCfg storage.CompressConfig -} - -func (c *compressedUploader) UploadData(ctx context.Context) error { - memfilePath, err := c.snapshot.MemfileDiff.CachePath() +func (u *Upload) runV4(ctx context.Context) error { + memSrc, err := u.snap.MemfileDiff.CachePath() if err != nil { - return fmt.Errorf("error getting memfile diff path: %w", err) + return fmt.Errorf("memfile diff path: %w", err) } - rootfsPath, err := c.snapshot.RootfsDiff.CachePath() + rootfsSrc, err := u.snap.RootfsDiff.CachePath() if err != nil { - return fmt.Errorf("error getting rootfs diff path: %w", err) + return fmt.Errorf("rootfs diff path: %w", err) } eg, ctx := errgroup.WithContext(ctx) - if memfilePath != "" { + if u.snap.MemfileDiffHeader != nil { eg.Go(func() error { - if !c.memCfg.IsCompressionEnabled() { - _, _, err := storage.UploadFramed(ctx, c.persistence, c.paths.Memfile(), storage.MemfileObjectType, memfilePath, storage.CompressConfig{}) - - return err - } - - ft, checksum, err := storage.UploadFramed(ctx, c.persistence, c.paths.MemfileCompressed(c.memCfg.CompressionType()), storage.MemfileObjectType, memfilePath, c.memCfg) - if err != nil { - return fmt.Errorf("compressed memfile upload: %w", err) - } - - c.pending.add(pendingBuildInfoKey(c.paths.BuildID, storage.MemfileName), ft, ft.UncompressedSize(), checksum) - - return nil + return u.uploadFramed(ctx, build.Memfile, memSrc, u.snap.MemfileDiffHeader, u.mem) }) } - if rootfsPath != "" { + if u.snap.RootfsDiffHeader != nil { eg.Go(func() error { - if !c.rootfsCfg.IsCompressionEnabled() { - _, _, err := storage.UploadFramed(ctx, c.persistence, c.paths.Rootfs(), storage.RootFSObjectType, rootfsPath, storage.CompressConfig{}) - - return err - } - - ft, checksum, err := storage.UploadFramed(ctx, c.persistence, c.paths.RootfsCompressed(c.rootfsCfg.CompressionType()), storage.RootFSObjectType, rootfsPath, c.rootfsCfg) - if err != nil { - return fmt.Errorf("compressed rootfs upload: %w", err) - } - - c.pending.add(pendingBuildInfoKey(c.paths.BuildID, storage.RootfsName), ft, ft.UncompressedSize(), checksum) - - return nil + return u.uploadFramed(ctx, build.Rootfs, rootfsSrc, u.snap.RootfsDiffHeader, u.root) }) } eg.Go(func() error { - return storage.UploadBlob(ctx, c.persistence, c.paths.Snapfile(), storage.SnapfileObjectType, c.snapshot.Snapfile.Path()) + return storage.UploadBlob(ctx, u.store, u.paths.Snapfile(), storage.SnapfileObjectType, u.snap.Snapfile.Path()) }) eg.Go(func() error { - return storage.UploadBlob(ctx, c.persistence, c.paths.Metadata(), storage.MetadataObjectType, c.snapshot.Metafile.Path()) + return storage.UploadBlob(ctx, u.store, u.paths.Metadata(), storage.MetadataObjectType, u.snap.Metafile.Path()) }) return eg.Wait() } -// FinalizeHeaders applies pending frame tables to headers and uploads them as V4 format. -// -// The snapshot headers are cloned before mutation because the originals may be -// concurrently read by sandboxes resumed from the template cache (e.g. the -// optimize phase's UFFD handlers). -func (c *compressedUploader) FinalizeHeaders(ctx context.Context) (memfileHeader, rootfsHeader []byte, err error) { - eg, ctx := errgroup.WithContext(ctx) - - if c.snapshot.MemfileDiffHeader != nil { - eg.Go(func() error { - h := c.pending.PrepareV4Header(c.snapshot.MemfileDiffHeader, storage.MemfileName) - - data, err := headers.StoreHeader(ctx, c.persistence, c.paths.MemfileHeader(), h) - if err != nil { - return err - } +func (u *Upload) uploadFramed( + ctx context.Context, + fileType build.DiffType, + srcPath string, + srcHeader *headers.Header, + cfg storage.CompressConfig, +) error { + var selfBuild headers.BuildData + + if srcPath != "" { + ft, checksum, err := storage.UploadFramed(ctx, u.store, u.paths.DataFile(string(fileType), cfg.CompressionType()), seekableTypeFor(fileType), srcPath, cfg) + if err != nil { + return fmt.Errorf("%s upload: %w", fileType, err) + } + + // FrameTable count, not os.Stat: sparse memfile diffs stream less than + // they appear on disk. + selfBuild = headers.BuildData{Size: ft.UncompressedSize(), Checksum: checksum} + if ft.IsCompressed() { + selfBuild.FrameData = ft + } + } - memfileHeader = data + h := srcHeader.CloneForUpload(headers.MetadataVersionV4) + h.IncompletePendingUpload = false - return nil - }) + // Dependency closure is the set of buildIDs referenced by mappings, minus + // self. Each ancestor's BuildData lives in its own finalized header's + // self-entry; Wait routes to local future, peer, or GCS as appropriate. + // Already-final ancestors resolve immediately (GCS round-trip beats + // blocking on whatever the immediate parent's upload is doing). + ancestors, err := u.collectAncestorBuilds(ctx, srcHeader.Mapping, fileType) + if err != nil { + return err } - if c.snapshot.RootfsDiffHeader != nil { - eg.Go(func() error { - h := c.pending.PrepareV4Header(c.snapshot.RootfsDiffHeader, storage.RootfsName) + // Empty diffs still represent a layer descendants must record as an ancestor. + h.Builds = ancestors + h.Builds[u.buildID] = selfBuild - data, err := headers.StoreHeader(ctx, c.persistence, c.paths.RootfsHeader(), h) - if err != nil { - return err - } + if err := headers.StoreHeader(ctx, u.store, u.paths.HeaderFile(string(fileType)), h); err != nil { + return fmt.Errorf("store %s header: %w", fileType, err) + } - rootfsHeader = data + return u.publish(ctx, fileType, h) +} - return nil - }) +// collectAncestorBuilds resolves every unique buildID referenced by mappings +// (excluding self) to its finalized BuildData. Local ancestors resolve from +// the in-memory futures map without any I/O; cross-orch ancestors take a +// single GCS round-trip each. Sequential β€” the critical path is the slowest +// pending Wait either way, and serial keeps the code simple. +func (u *Upload) collectAncestorBuilds( + ctx context.Context, + mappings []headers.BuildMap, + fileType build.DiffType, +) (map[uuid.UUID]headers.BuildData, error) { + out := make(map[uuid.UUID]headers.BuildData) + if u.uploads == nil { + return out, nil } - if err = eg.Wait(); err != nil { - return nil, nil, err + for _, m := range mappings { + if m.BuildId == u.buildID || m.BuildId == uuid.Nil { + continue + } + if _, dup := out[m.BuildId]; dup { + continue + } + + h, err := u.uploads.Wait(ctx, m.BuildId, fileType) + if err != nil { + return nil, fmt.Errorf("wait for ancestor %s/%s: %w", m.BuildId, fileType, err) + } + // V3 ancestors have Builds=nil (FrameTable is V4-only); their data is + // raw bytes and the read path doesn't consult Builds for them. Skip + // silently so V4 descendants of V3 ancestors still upload. + bd, ok := h.Builds[m.BuildId] + if !ok { + continue + } + + out[m.BuildId] = bd } - return memfileHeader, rootfsHeader, nil + return out, nil } -// Ensure compressedUploader implements BuildUploader. -var _ BuildUploader = (*compressedUploader)(nil) +func seekableTypeFor(fileType build.DiffType) storage.SeekableObjectType { + switch fileType { + case build.Memfile: + return storage.MemfileObjectType + case build.Rootfs: + return storage.RootFSObjectType + } + + return storage.UnknownSeekableObjectType +} diff --git a/packages/orchestrator/pkg/sandbox/nbd/path_direct_slow_test.go b/packages/orchestrator/pkg/sandbox/nbd/path_direct_slow_test.go index d1316247dc..82a12dacee 100644 --- a/packages/orchestrator/pkg/sandbox/nbd/path_direct_slow_test.go +++ b/packages/orchestrator/pkg/sandbox/nbd/path_direct_slow_test.go @@ -57,6 +57,10 @@ func (s *SlowDevice) Header() *header.Header { return s.inner.Header() } +func (s *SlowDevice) SwapHeader(h *header.Header) { + s.inner.SwapHeader(h) +} + func (s *SlowDevice) Close() error { return s.inner.Close() } diff --git a/packages/orchestrator/pkg/sandbox/nbd/testutils/build_device.go b/packages/orchestrator/pkg/sandbox/nbd/testutils/build_device.go index 1041107be1..bc4e94ab00 100644 --- a/packages/orchestrator/pkg/sandbox/nbd/testutils/build_device.go +++ b/packages/orchestrator/pkg/sandbox/nbd/testutils/build_device.go @@ -37,6 +37,10 @@ func (m *BuildDevice) Header() *header.Header { return m.header } +func (m *BuildDevice) SwapHeader(h *header.Header) { + m.header = h +} + func (m *BuildDevice) Size(_ context.Context) (int64, error) { return int64(m.header.Metadata.Size), nil } diff --git a/packages/orchestrator/pkg/sandbox/nbd/testutils/logger_overlay.go b/packages/orchestrator/pkg/sandbox/nbd/testutils/logger_overlay.go index ea33af60cd..932e5fae23 100644 --- a/packages/orchestrator/pkg/sandbox/nbd/testutils/logger_overlay.go +++ b/packages/orchestrator/pkg/sandbox/nbd/testutils/logger_overlay.go @@ -61,6 +61,10 @@ func (l *LoggerOverlay) Header() *header.Header { return l.overlay.Header() } +func (l *LoggerOverlay) SwapHeader(h *header.Header) { + l.overlay.SwapHeader(h) +} + func (l *LoggerOverlay) Close() error { return l.overlay.Close() } diff --git a/packages/orchestrator/pkg/sandbox/nbd/testutils/zero_device.go b/packages/orchestrator/pkg/sandbox/nbd/testutils/zero_device.go index e4733ea20c..357e6e7a89 100644 --- a/packages/orchestrator/pkg/sandbox/nbd/testutils/zero_device.go +++ b/packages/orchestrator/pkg/sandbox/nbd/testutils/zero_device.go @@ -62,6 +62,10 @@ func (z *ZeroDevice) Header() *header.Header { return z.header } +func (z *ZeroDevice) SwapHeader(h *header.Header) { + z.header = h +} + func (z *ZeroDevice) Close() error { return nil } diff --git a/packages/orchestrator/pkg/sandbox/sandbox.go b/packages/orchestrator/pkg/sandbox/sandbox.go index f69100015d..94f2ed7467 100644 --- a/packages/orchestrator/pkg/sandbox/sandbox.go +++ b/packages/orchestrator/pkg/sandbox/sandbox.go @@ -1125,6 +1125,8 @@ func (s *Sandbox) Pause( RootfsDiff: rootfsDiff, RootfsDiffHeader: rootfsDiffHeader, + BuildID: buildID, + cleanup: cleanup, }, nil } diff --git a/packages/orchestrator/pkg/sandbox/snapshot.go b/packages/orchestrator/pkg/sandbox/snapshot.go index 224dd30532..37ab988e19 100644 --- a/packages/orchestrator/pkg/sandbox/snapshot.go +++ b/packages/orchestrator/pkg/sandbox/snapshot.go @@ -4,10 +4,10 @@ import ( "context" "fmt" + "github.com/google/uuid" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/build" "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template" - "github.com/e2b-dev/infra/packages/shared/pkg/featureflags" - "github.com/e2b-dev/infra/packages/shared/pkg/storage" "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" ) @@ -18,29 +18,11 @@ type Snapshot struct { RootfsDiffHeader *header.Header Snapfile template.File Metafile template.File + BuildID uuid.UUID cleanup *Cleanup } -// Upload uploads snapshot files to storage and returns serialized V4 header -// bytes for peer transition (nil for uncompressed builds). -func (s *Snapshot) Upload( - ctx context.Context, - persistence storage.StorageProvider, - paths storage.Paths, - cfg storage.CompressConfig, - ff *featureflags.Client, - useCase string, -) (memfileHdr, rootfsHdr []byte, err error) { - uploader := NewBuildUploader(ctx, s, persistence, paths, cfg, ff, useCase, nil) - - if err := uploader.UploadData(ctx); err != nil { - return nil, nil, fmt.Errorf("error uploading template files: %w", err) - } - - return uploader.FinalizeHeaders(ctx) -} - func (s *Snapshot) Close(ctx context.Context) error { err := s.cleanup.Run(ctx) if err != nil { diff --git a/packages/orchestrator/pkg/sandbox/template/peerclient/blob.go b/packages/orchestrator/pkg/sandbox/template/peerclient/blob.go index 38f9b7a2df..439b0f25bb 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerclient/blob.go +++ b/packages/orchestrator/pkg/sandbox/template/peerclient/blob.go @@ -93,7 +93,7 @@ func openPeerBlobStream( ctx context.Context, client orchestrator.ChunkServiceClient, req *orchestrator.GetBuildBlobRequest, - uploaded *atomic.Pointer[UploadedHeaders], + uploaded *atomic.Bool, ) (func() ([]byte, error), error) { stream, err := client.GetBuildBlob(ctx, req) if err != nil { diff --git a/packages/orchestrator/pkg/sandbox/template/peerclient/blob_test.go b/packages/orchestrator/pkg/sandbox/template/peerclient/blob_test.go index b8587ca582..e1c7025409 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerclient/blob_test.go +++ b/packages/orchestrator/pkg/sandbox/template/peerclient/blob_test.go @@ -34,7 +34,7 @@ func TestPeerBlob_WriteTo_PeerSucceeds(t *testing.T) { client: client, buildID: "build-1", fileName: "snapfile", - uploaded: &atomic.Pointer[UploadedHeaders]{}, + uploaded: &atomic.Bool{}, }} var buf bytes.Buffer @@ -66,7 +66,7 @@ func TestPeerBlob_WriteTo_PeerNotAvailable_FallsBackToBase(t *testing.T) { client: client, buildID: "build-1", fileName: "snapfile", - uploaded: &atomic.Pointer[UploadedHeaders]{}, + uploaded: &atomic.Bool{}, openFn: func(ctx context.Context) (storage.Blob, error) { return base.OpenBlob(ctx, "build-1/snapfile", storage.SnapfileObjectType) }, @@ -98,7 +98,7 @@ func TestPeerBlob_WriteTo_PeerError_FallsBackToBase(t *testing.T) { client: client, buildID: "build-1", fileName: "snapfile", - uploaded: &atomic.Pointer[UploadedHeaders]{}, + uploaded: &atomic.Bool{}, openFn: func(ctx context.Context) (storage.Blob, error) { return base.OpenBlob(ctx, "build-1/snapfile", storage.SnapfileObjectType) }, @@ -113,14 +113,14 @@ func TestPeerBlob_WriteTo_PeerError_FallsBackToBase(t *testing.T) { func TestPeerBlob_WriteTo_UploadedSetMidStream_CompletesFromPeerThenFallsBack(t *testing.T) { t.Parallel() - uploaded := &atomic.Pointer[UploadedHeaders]{} + uploaded := &atomic.Bool{} // Peer streams three chunks; the second Recv sets uploaded=true // (simulating a concurrent operation receiving UseStorage). stream := orchestratormocks.NewMockChunkService_GetBuildBlobClient(t) stream.EXPECT().Recv().Return(&orchestrator.GetBuildBlobResponse{Data: []byte("aaa")}, nil).Once() stream.EXPECT().Recv().RunAndReturn(func() (*orchestrator.GetBuildBlobResponse, error) { - uploaded.Store(&UploadedHeaders{}) + uploaded.Store(true) return &orchestrator.GetBuildBlobResponse{Data: []byte("bbb")}, nil }).Once() @@ -173,7 +173,7 @@ func TestPeerBlob_Exists_PeerHasFile(t *testing.T) { return req.GetBuildId() == "build-1" && req.GetFileName() == "snapfile" })).Return(&orchestrator.GetBuildFileExistsResponse{}, nil) - blob := &peerBlob{peerHandle: peerHandle[storage.Blob]{client: client, buildID: "build-1", fileName: "snapfile", uploaded: &atomic.Pointer[UploadedHeaders]{}}} + blob := &peerBlob{peerHandle: peerHandle[storage.Blob]{client: client, buildID: "build-1", fileName: "snapfile", uploaded: &atomic.Bool{}}} ok, err := blob.Exists(t.Context()) require.NoError(t, err) assert.True(t, ok) @@ -194,7 +194,7 @@ func TestPeerBlob_Exists_PeerNotAvailable_FallsBackToBase(t *testing.T) { client: client, buildID: "build-1", fileName: "snapfile", - uploaded: &atomic.Pointer[UploadedHeaders]{}, + uploaded: &atomic.Bool{}, openFn: func(ctx context.Context) (storage.Blob, error) { return base.OpenBlob(ctx, "build-1/snapfile", storage.SnapfileObjectType) }, @@ -216,7 +216,7 @@ func TestPeerBlob_Exists_UseStorage_FallsBackToBase(t *testing.T) { base := storage.NewMockStorageProvider(t) base.EXPECT().OpenBlob(mock.Anything, "build-1/snapfile", storage.SnapfileObjectType).Return(baseBlob, nil) - uploaded := &atomic.Pointer[UploadedHeaders]{} + uploaded := &atomic.Bool{} blob := &peerBlob{peerHandle: peerHandle[storage.Blob]{ client: client, buildID: "build-1", diff --git a/packages/orchestrator/pkg/sandbox/template/peerclient/resolver.go b/packages/orchestrator/pkg/sandbox/template/peerclient/resolver.go index 999ccf5c2a..49bd708bd7 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerclient/resolver.go +++ b/packages/orchestrator/pkg/sandbox/template/peerclient/resolver.go @@ -30,17 +30,9 @@ type Resolver interface { Close() } -// UploadedHeaders holds the serialized V4 headers received from the peer's -// use_storage response. These are used by build.File to atomically swap headers -// when transitioning from P2P to compressed GCS reads. -type UploadedHeaders struct { - MemfileHeader []byte - RootfsHeader []byte -} - type resolveResult struct { client orchestrator.ChunkServiceClient - uploaded *atomic.Pointer[UploadedHeaders] + uploaded *atomic.Bool addr string } @@ -57,11 +49,11 @@ func (nopResolver) Close() {} // peerResolver is the real implementation that looks up peers via the Registry. type peerResolver struct { - registry Registry - selfAddress string - peerConns sync.Map // address β†’ *grpc.ClientConn - uploaded sync.Map // buildID β†’ *atomic.Pointer[UploadedHeaders] - dialGroup singleflight.Group + registry Registry + selfAddress string + peerConns sync.Map // address β†’ *grpc.ClientConn + uploadedBuilds sync.Map // buildID β†’ *atomic.Bool + dialGroup singleflight.Group } func NewResolver(registry Registry, selfAddress string) Resolver { @@ -112,33 +104,32 @@ func (r *peerResolver) isSelfAddress(address string) bool { return address == r.selfAddress } -// uploadedPtr returns a shared atomic pointer for the given build ID. -// Non-nil value means the build is uploaded (use_storage). The UploadedHeaders -// may contain serialized V4 headers for the peer transition protocol, or be -// empty (for uncompressed builds). -func (r *peerResolver) uploadedPtr(buildID string) *atomic.Pointer[UploadedHeaders] { - if v, ok := r.uploaded.Load(buildID); ok { - return v.(*atomic.Pointer[UploadedHeaders]) +// uploadedFlag returns a shared atomic flag for the given build ID. +// Once any reader sets the flag (via use_storage), all subsequent opens for +// that build skip the peer. +func (r *peerResolver) uploadedFlag(buildID string) *atomic.Bool { + if v, ok := r.uploadedBuilds.Load(buildID); ok { + return v.(*atomic.Bool) } - ptr := &atomic.Pointer[UploadedHeaders]{} - actual, _ := r.uploaded.LoadOrStore(buildID, ptr) + flag := &atomic.Bool{} + actual, _ := r.uploadedBuilds.LoadOrStore(buildID, flag) - return actual.(*atomic.Pointer[UploadedHeaders]) + return actual.(*atomic.Bool) } // Purge removes the uploaded state for a build, called on template // cache eviction so the entry doesn't accumulate forever. func (r *peerResolver) Purge(buildID string) { - r.uploaded.Delete(buildID) + r.uploadedBuilds.Delete(buildID) } // resolve looks up the peer for the given build and returns a gRPC client if // a remote peer is found. Returns a nil client when the base provider should // be used instead (uploaded, no peer, self, or error). func (r *peerResolver) resolve(ctx context.Context, buildID string) (attribute.KeyValue, resolveResult) { - hdrs := r.uploadedPtr(buildID) - if hdrs.Load() != nil { + uploaded := r.uploadedFlag(buildID) + if uploaded.Load() { return attrResolveUploaded, resolveResult{} } @@ -162,7 +153,7 @@ func (r *peerResolver) resolve(ctx context.Context, buildID string) (attribute.K return attrResolvePeer, resolveResult{ client: orchestrator.NewChunkServiceClient(conn), - uploaded: hdrs, + uploaded: uploaded, addr: addr, } } diff --git a/packages/orchestrator/pkg/sandbox/template/peerclient/seekable.go b/packages/orchestrator/pkg/sandbox/template/peerclient/seekable.go index c1b0537803..1b08d77411 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerclient/seekable.go +++ b/packages/orchestrator/pkg/sandbox/template/peerclient/seekable.go @@ -20,6 +20,14 @@ var _ storage.Seekable = (*peerSeekable)(nil) // calls (e.g. ReadAt then OpenRangeReader) do not re-open the underlying GCS object. type peerSeekable struct { peerHandle[storage.Seekable] + + // transitionEmitted ensures we signal PeerTransitionedError at most once + // after the peer flips uploaded=true. The caller (build.File) reacts by + // loading the post-upload header from storage; whether that ends up V4 + // (compressed) or V3 (no upgrade) determines how subsequent reads route. + // Either way, after the first emission we fall through to base so V3 + // builds don't loop forever against PeerTransitionedError. + transitionEmitted atomic.Bool } func (s *peerSeekable) Size(ctx context.Context) (int64, error) { @@ -69,14 +77,11 @@ func (s *peerSeekable) OpenRangeReader(ctx context.Context, off int64, length in }, nil }, func(ctx context.Context, base storage.Seekable) (io.ReadCloser, error) { - // Signal the caller to swap to V4 headers if compressed headers are available. - if s.uploaded != nil { - if hdrs := s.uploaded.Load(); hdrs != nil && (len(hdrs.MemfileHeader) > 0 || len(hdrs.RootfsHeader) > 0) { - return nil, &storage.PeerTransitionedError{ - MemfileHeader: hdrs.MemfileHeader, - RootfsHeader: hdrs.RootfsHeader, - } - } + // Signal the caller once to fetch the post-upload header from storage; + // thereafter fall through so V3 builds (no V4 to upgrade to) don't + // loop against PeerTransitionedError. + if s.uploaded != nil && s.uploaded.Load() && s.transitionEmitted.CompareAndSwap(false, true) { + return nil, &storage.PeerTransitionedError{} } return base.OpenRangeReader(ctx, off, length, frameTable) @@ -101,7 +106,7 @@ func openPeerSeekableStream( ctx context.Context, client orchestrator.ChunkServiceClient, req *orchestrator.ReadAtBuildSeekableRequest, - uploaded *atomic.Pointer[UploadedHeaders], + uploaded *atomic.Bool, ) (func() ([]byte, error), error) { stream, err := client.ReadAtBuildSeekable(ctx, req) if err != nil { diff --git a/packages/orchestrator/pkg/sandbox/template/peerclient/seekable_test.go b/packages/orchestrator/pkg/sandbox/template/peerclient/seekable_test.go index 15f0a0c051..60ae758604 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerclient/seekable_test.go +++ b/packages/orchestrator/pkg/sandbox/template/peerclient/seekable_test.go @@ -25,7 +25,7 @@ func TestPeerSeekable_Size_PeerSucceeds(t *testing.T) { return req.GetBuildId() == "build-1" && req.GetFileName() == storage.MemfileName })).Return(&orchestrator.GetBuildFileSizeResponse{TotalSize: 4096}, nil) - s := &peerSeekable{peerHandle: peerHandle[storage.Seekable]{client: client, buildID: "build-1", fileName: storage.MemfileName, uploaded: &atomic.Pointer[UploadedHeaders]{}}} + s := &peerSeekable{peerHandle: peerHandle[storage.Seekable]{client: client, buildID: "build-1", fileName: storage.MemfileName, uploaded: &atomic.Bool{}}} size, err := s.Size(t.Context()) require.NoError(t, err) assert.Equal(t, int64(4096), size) @@ -47,7 +47,7 @@ func TestPeerSeekable_Size_PeerNotAvailable_FallsBackToBase(t *testing.T) { client: client, buildID: "build-1", fileName: storage.MemfileName, - uploaded: &atomic.Pointer[UploadedHeaders]{}, + uploaded: &atomic.Bool{}, openFn: func(ctx context.Context) (storage.Seekable, error) { return base.OpenSeekable(ctx, "build-1/memfile", storage.MemfileObjectType) }, @@ -71,7 +71,7 @@ func TestPeerSeekable_OpenRangeReader_PeerSucceeds(t *testing.T) { return req.GetOffset() == 10 && req.GetLength() == int64(len(data)) })).Return(stream, nil) - s := &peerSeekable{peerHandle: peerHandle[storage.Seekable]{client: client, buildID: "build-1", fileName: storage.MemfileName, uploaded: &atomic.Pointer[UploadedHeaders]{}}} + s := &peerSeekable{peerHandle: peerHandle[storage.Seekable]{client: client, buildID: "build-1", fileName: storage.MemfileName, uploaded: &atomic.Bool{}}} rc, err := s.OpenRangeReader(t.Context(), 10, int64(len(data)), nil) require.NoError(t, err) defer rc.Close() @@ -98,7 +98,7 @@ func TestPeerSeekable_OpenRangeReader_PeerError_FallsBackToBase(t *testing.T) { client: client, buildID: "build-1", fileName: storage.MemfileName, - uploaded: &atomic.Pointer[UploadedHeaders]{}, + uploaded: &atomic.Bool{}, openFn: func(ctx context.Context) (storage.Seekable, error) { return base.OpenSeekable(ctx, "build-1/memfile", storage.MemfileObjectType) }, @@ -112,19 +112,13 @@ func TestPeerSeekable_OpenRangeReader_PeerError_FallsBackToBase(t *testing.T) { assert.Equal(t, baseData, got) } -func TestPeerSeekable_OpenRangeReader_UploadedHeaders_ReturnsPeerTransitionedError(t *testing.T) { +func TestPeerSeekable_OpenRangeReader_Uploaded_ReturnsPeerTransitionedError(t *testing.T) { t.Parallel() - memHeader := []byte("mem-header-v4") - rootHeader := []byte("root-header-v4") - client := orchestratormocks.NewMockChunkServiceClient(t) - uploaded := &atomic.Pointer[UploadedHeaders]{} - uploaded.Store(&UploadedHeaders{ - MemfileHeader: memHeader, - RootfsHeader: rootHeader, - }) + uploaded := &atomic.Bool{} + uploaded.Store(true) baseSeekable := storage.NewMockSeekable(t) base := storage.NewMockStorageProvider(t) @@ -140,46 +134,9 @@ func TestPeerSeekable_OpenRangeReader_UploadedHeaders_ReturnsPeerTransitionedErr }, }} - // frameTable=nil triggers the transition header check in the fallback path _, err := s.OpenRangeReader(t.Context(), 0, 100, nil) require.Error(t, err) var transErr *storage.PeerTransitionedError require.ErrorAs(t, err, &transErr) - assert.Equal(t, memHeader, transErr.MemfileHeader) - assert.Equal(t, rootHeader, transErr.RootfsHeader) -} - -func TestPeerSeekable_OpenRangeReader_UploadedSkipsPeer(t *testing.T) { - t.Parallel() - - client := orchestratormocks.NewMockChunkServiceClient(t) - - uploaded := &atomic.Pointer[UploadedHeaders]{} - uploaded.Store(&UploadedHeaders{}) - - baseData := []byte("from gcs") - baseSeekable := storage.NewMockSeekable(t) - baseSeekable.EXPECT().OpenRangeReader(mock.Anything, int64(0), int64(len(baseData)), (*storage.FrameTable)(nil)).Return(io.NopCloser(bytes.NewReader(baseData)), nil) - - base := storage.NewMockStorageProvider(t) - base.EXPECT().OpenSeekable(mock.Anything, "build-1/memfile", storage.MemfileObjectType).Return(baseSeekable, nil) - - s := &peerSeekable{peerHandle: peerHandle[storage.Seekable]{ - client: client, - buildID: "build-1", - fileName: storage.MemfileName, - uploaded: uploaded, - openFn: func(ctx context.Context) (storage.Seekable, error) { - return base.OpenSeekable(ctx, "build-1/memfile", storage.MemfileObjectType) - }, - }} - - rc, err := s.OpenRangeReader(t.Context(), 0, int64(len(baseData)), nil) - require.NoError(t, err) - defer rc.Close() - - got, err := io.ReadAll(rc) - require.NoError(t, err) - assert.Equal(t, baseData, got) } diff --git a/packages/orchestrator/pkg/sandbox/template/peerclient/storage.go b/packages/orchestrator/pkg/sandbox/template/peerclient/storage.go index f2f9b25289..ec9fc46945 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerclient/storage.go +++ b/packages/orchestrator/pkg/sandbox/template/peerclient/storage.go @@ -110,15 +110,15 @@ var _ storage.StorageProvider = (*peerStorageProvider)(nil) type peerStorageProvider struct { base storage.StorageProvider peerClient orchestrator.ChunkServiceClient - // uploaded is set when the peer signals GCS upload is complete (use_storage=true). - // Once non-nil, all subsequent reads skip the peer and go to base. - uploaded *atomic.Pointer[UploadedHeaders] + // uploaded is set to true when the peer signals that GCS upload is complete + // (use_storage=true). Once set, all subsequent reads skip the peer and go to base. + uploaded *atomic.Bool } func newPeerStorageProvider( base storage.StorageProvider, peerClient orchestrator.ChunkServiceClient, - uploaded *atomic.Pointer[UploadedHeaders], + uploaded *atomic.Bool, ) storage.StorageProvider { return &peerStorageProvider{ base: base, @@ -167,18 +167,14 @@ func (p *peerStorageProvider) GetDetails() string { return p.base.GetDetails() } -// checkPeerAvailability marks the build as uploaded when UseStorage is set. -func checkPeerAvailability(avail *orchestrator.PeerAvailability, uploaded *atomic.Pointer[UploadedHeaders]) bool { +// checkPeerAvailability also marks the uploaded flag when UseStorage is set. +func checkPeerAvailability(avail *orchestrator.PeerAvailability, uploaded *atomic.Bool) bool { if avail.GetNotAvailable() { return false } if avail.GetUseStorage() { - hdrs := &UploadedHeaders{ - MemfileHeader: avail.GetMemfileHeader(), - RootfsHeader: avail.GetRootfsHeader(), - } - uploaded.Store(hdrs) + uploaded.Store(true) return false } @@ -190,7 +186,7 @@ type peerHandle[Base any] struct { client orchestrator.ChunkServiceClient buildID string fileName string - uploaded *atomic.Pointer[UploadedHeaders] + uploaded *atomic.Bool mu sync.Mutex base Base @@ -241,7 +237,7 @@ func withPeerFallback[Base, T any]( )) defer span.End() - if h.uploaded.Load() == nil { + if !h.uploaded.Load() { timer := peerReadTimerFactory.Begin(opAttr) res, err := peerFn(ctx) diff --git a/packages/orchestrator/pkg/sandbox/template/peerclient/storage_test.go b/packages/orchestrator/pkg/sandbox/template/peerclient/storage_test.go index 8ec3f79c70..7e9e895c3f 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerclient/storage_test.go +++ b/packages/orchestrator/pkg/sandbox/template/peerclient/storage_test.go @@ -29,7 +29,7 @@ func TestPeerStorageProvider_OpenBlob_ExtractsFileName(t *testing.T) { base := storage.NewMockStorageProvider(t) - p := newPeerStorageProvider(base, client, &atomic.Pointer[UploadedHeaders]{}) + p := newPeerStorageProvider(base, client, &atomic.Bool{}) blob, err := p.OpenBlob(t.Context(), "build-1/snapfile", storage.SnapfileObjectType) require.NoError(t, err) @@ -49,7 +49,7 @@ func TestPeerStorageProvider_OpenSeekable_ExtractsFileName(t *testing.T) { base := storage.NewMockStorageProvider(t) - p := newPeerStorageProvider(base, client, &atomic.Pointer[UploadedHeaders]{}) + p := newPeerStorageProvider(base, client, &atomic.Bool{}) ff, err := p.OpenSeekable(t.Context(), "build-1/memfile", storage.MemfileObjectType) require.NoError(t, err) diff --git a/packages/orchestrator/pkg/sandbox/template/peerserver/header.go b/packages/orchestrator/pkg/sandbox/template/peerserver/header.go index 44de5c56bd..835553d18f 100644 --- a/packages/orchestrator/pkg/sandbox/template/peerserver/header.go +++ b/packages/orchestrator/pkg/sandbox/template/peerserver/header.go @@ -35,7 +35,18 @@ func (f *headerSource) Stream(ctx context.Context, sender Sender) error { return ErrNotAvailable } - data, err := header.SerializeHeader(h) + // V4 headers served via P2P are always for in-flight builds β€” peers stop + // being routed once the upload finalizes (peerStorageProvider switches to + // base/GCS via the uploaded flag). Force the wire bit on regardless of + // the in-memory state so consumers reliably treat these bytes as a + // pending diff and refresh from GCS once the upload lands. V3 has no + // in-flight notion on the wire, so it ships as-is and is treated as final. + wire := *h + if wire.Metadata.Version >= header.MetadataVersionV4 { + wire.IncompletePendingUpload = true + } + + data, err := header.SerializeHeader(&wire) if err != nil { span.RecordError(err) diff --git a/packages/orchestrator/pkg/sandbox/template/storage.go b/packages/orchestrator/pkg/sandbox/template/storage.go index e7febea4a1..3e0308e178 100644 --- a/packages/orchestrator/pkg/sandbox/template/storage.go +++ b/packages/orchestrator/pkg/sandbox/template/storage.go @@ -147,6 +147,10 @@ func (d *Storage) Header() *header.Header { return d.source.Header() } +func (d *Storage) SwapHeader(h *header.Header) { + d.source.SwapHeader(h) +} + func (d *Storage) Close() error { return nil } diff --git a/packages/orchestrator/pkg/sandbox/uploads.go b/packages/orchestrator/pkg/sandbox/uploads.go new file mode 100644 index 0000000000..82f9c36d83 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/uploads.go @@ -0,0 +1,218 @@ +package sandbox + +import ( + "context" + "errors" + "fmt" + "time" + + "github.com/google/uuid" + "github.com/jellydator/ttlcache/v3" + "github.com/redis/go-redis/v9" + "go.opentelemetry.io/otel/attribute" + "go.opentelemetry.io/otel/trace" + "go.uber.org/zap" + + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/block" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/build" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template" + "github.com/e2b-dev/infra/packages/shared/pkg/logger" + "github.com/e2b-dev/infra/packages/shared/pkg/storage" + "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" + "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" + "github.com/e2b-dev/infra/packages/shared/pkg/utils" +) + +var ( + errUploadInFlight = errors.New("upload already in flight for build") + ErrBuildNotInCache = errors.New("build not in template cache") +) + +const ( + futureTTL = 1 * time.Hour + + // refreshHeaderBudget bounds how long an upload Wait polls GCS for a + // parent's V4 header. Crosses orchestrators: A may still be uploading on a + // remote orch when B's runV4 calls Wait(A) here. Matches the per-upload + // bound in server.uploadTimeout β€” anything longer means the parent's + // upload is itself stuck and would have failed on its own. + refreshHeaderBudget = 20 * time.Minute + + // uploadDoneChannelPrefix is the Redis pub/sub channel prefix for per-build + // upload-finished signals. Empty payload = success; non-empty = upload error. + uploadDoneChannelPrefix = "orchestrator.upload.done." // followed by buildID String +) + +type templateLookup interface { + GetCachedTemplate(buildID string) (template.Template, bool) +} + +// Uploads is the in-flight upload table. Each entry's future fires when its +// build's V4 header has been swapped, gating child layers that depend on it. +// +// Cross-orch coordination uses Redis pub/sub on per-build channels: the +// uploader publishes on Finish, consumers subscribe inside Wait while polling +// GCS. The Redis client is optional β€” nil falls back to ticker-only polling. +type Uploads struct { + tc templateLookup + persistence storage.StorageProvider + redis redis.UniversalClient + + futures *ttlcache.Cache[uuid.UUID, *utils.ErrorOnce] +} + +func NewUploads(tc *template.Cache, persistence storage.StorageProvider, redisClient redis.UniversalClient) *Uploads { + futures := ttlcache.New( + ttlcache.WithTTL[uuid.UUID, *utils.ErrorOnce](futureTTL), + ) + go futures.Start() + + return &Uploads{tc: tc, persistence: persistence, redis: redisClient, futures: futures} +} + +func (u *Uploads) Stop() { + u.futures.Stop() +} + +// Start replaces a finished future at the same key; rejects an in-flight one. +// Build IDs are unique per upload so concurrent Starts for the same key are +// not expected β€” the in-flight check only guards against accidental misuse. +func (u *Uploads) Start(buildID uuid.UUID) (*utils.ErrorOnce, error) { + if existing := u.futures.Get(buildID); existing != nil { + select { + case <-existing.Value().Done(): + default: + return nil, fmt.Errorf("%w: %s", errUploadInFlight, buildID) + } + } + + fut := utils.NewErrorOnce() + u.futures.Set(buildID, fut, ttlcache.DefaultTTL) + + return fut, nil +} + +// Wait returns the parent's post-upload V4 header. Same-orch waits on the local +// future; cross-orch refreshes from GCS when the locally-cached header is +// stale, optionally accelerated by a per-call Redis subscription. +func (u *Uploads) Wait(ctx context.Context, buildID uuid.UUID, t build.DiffType) (*header.Header, error) { + ctx, span := tracer.Start(ctx, "wait-for-parent-upload", trace.WithAttributes( + telemetry.WithBuildID(buildID.String()), + attribute.String("file_type", string(t)), + )) + defer span.End() + + if item := u.futures.Get(buildID); item != nil { + if err := item.Value().WaitWithContext(ctx); err != nil { + return nil, fmt.Errorf("wait for upload %s: %w", buildID, err) + } + } + + d, err := u.find(ctx, buildID, t) + if errors.Is(err, ErrBuildNotInCache) { + // Ancestor never resumed locally (typical for grand-grandparents + // reached via mappings). It's necessarily finalized β€” load directly + // from GCS without an in-memory device or future to track. + hdrPath := storage.Paths{BuildID: buildID.String()}.HeaderFile(string(t)) + + return header.LoadHeader(ctx, u.persistence, hdrPath) + } + if err != nil { + return nil, err + } + + h := d.Header() + if h.IncompletePendingUpload { + // The only way we can still have an incomplete header at this point is + // the P2P path. We already waited on the local upload future and it did + // not finalize the header. + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + h, err = build.PollRemoteStorageForHeader(ctx, u.persistence, buildID, t, u.subscribe(ctx, buildID), refreshHeaderBudget) + if err != nil { + return nil, err + } + d.SwapHeader(h) + } + + return h, nil +} + +func (u *Uploads) find(ctx context.Context, buildID uuid.UUID, t build.DiffType) (block.ReadonlyDevice, error) { + tpl, ok := u.tc.GetCachedTemplate(buildID.String()) + if !ok { + return nil, fmt.Errorf("build %s: %w", buildID, ErrBuildNotInCache) + } + + switch t { + case build.Memfile: + return tpl.Memfile(ctx) + case build.Rootfs: + return tpl.Rootfs() + default: + return nil, fmt.Errorf("unsupported file type: %s", t) + } +} + +// --- Cross-orch upload-done signaling (Redis pub/sub on per-build channels) --- + +func uploadDoneChannel(buildID uuid.UUID) string { + return uploadDoneChannelPrefix + buildID.String() +} + +// publishUploadDoneToRedis broadcasts an upload-finished signal so cross-orch waiters can stop +// polling. Best-effort; failures fall through to the ticker poll. Empty +// payload = success; non-empty = the upload error message. +func (u *Uploads) publishUploadDoneToRedis(ctx context.Context, buildID uuid.UUID, uploadErr error) { + if u.redis == nil { + return + } + + payload := "" + if uploadErr != nil { + payload = uploadErr.Error() + } + + if err := u.redis.Publish(ctx, uploadDoneChannel(buildID), payload).Err(); err != nil { + logger.L().Warn(ctx, "failed to publish upload-done signal", + logger.WithBuildID(buildID.String()), + zap.Error(err), + ) + } +} + +// subscribe opens a per-call SUBSCRIBE on buildID's upload-done channel and +// returns a channel that fires once with the upload outcome. The subscription +// is torn down when ctx cancels (caller must use a derived context). Returns +// a nil channel when Redis is not configured β€” nil channels never fire, so +// LoadV4 cleanly degrades to ticker-only polling. +func (u *Uploads) subscribe(ctx context.Context, buildID uuid.UUID) <-chan error { + if u.redis == nil { + return nil + } + + out := make(chan error, 1) + + go func() { + ps := u.redis.Subscribe(ctx, uploadDoneChannel(buildID)) + defer ps.Close() + + msg, err := ps.ReceiveMessage(ctx) + if err != nil { + return // ctx cancelled or connection error: silent (ticker covers) + } + + var uploadErr error + if msg.Payload != "" { + uploadErr = errors.New(msg.Payload) + } + + select { + case out <- uploadErr: + case <-ctx.Done(): + } + }() + + return out +} diff --git a/packages/orchestrator/pkg/sandbox/uploads_test.go b/packages/orchestrator/pkg/sandbox/uploads_test.go new file mode 100644 index 0000000000..d502364b33 --- /dev/null +++ b/packages/orchestrator/pkg/sandbox/uploads_test.go @@ -0,0 +1,231 @@ +package sandbox + +import ( + "context" + "errors" + "sync" + "sync/atomic" + "testing" + "time" + + "github.com/google/uuid" + "github.com/jellydator/ttlcache/v3" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + blockmocks "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/block/mocks" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/build" + "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template" + templatemocks "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox/template/mocks" + headers "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" + "github.com/e2b-dev/infra/packages/shared/pkg/utils" +) + +type fakeCache struct { + mu sync.Mutex + m map[string]template.Template +} + +func newFakeCache() *fakeCache { + return &fakeCache{m: make(map[string]template.Template)} +} + +func (f *fakeCache) GetCachedTemplate(buildID string) (template.Template, bool) { + f.mu.Lock() + defer f.mu.Unlock() + t, ok := f.m[buildID] + + return t, ok +} + +func (f *fakeCache) put(buildID string, tpl template.Template) { + f.mu.Lock() + defer f.mu.Unlock() + f.m[buildID] = tpl +} + +func newUploads(t *testing.T) (*Uploads, *fakeCache) { + t.Helper() + cache := newFakeCache() + futures := ttlcache.New( + ttlcache.WithTTL[uuid.UUID, *utils.ErrorOnce](futureTTL), + ) + go futures.Start() + t.Cleanup(futures.Stop) + + return &Uploads{ + tc: cache, + futures: futures, + }, cache +} + +func putFinalHeader(t *testing.T, cache *fakeCache, buildID uuid.UUID, fileType build.DiffType) { + t.Helper() + tpl := templatemocks.NewMockTemplate(t) + dev := blockmocks.NewMockReadonlyDevice(t) + dev.EXPECT().Header().Return(&headers.Header{ + Metadata: &headers.Metadata{Version: headers.MetadataVersionV4}, + Builds: map[uuid.UUID]headers.BuildData{buildID: {}}, // self-entry β†’ not stale + }).Maybe() + + switch fileType { + case build.Memfile: + tpl.EXPECT().Memfile(mock.Anything).Return(dev, nil).Maybe() + case build.Rootfs: + tpl.EXPECT().Rootfs().Return(dev, nil).Maybe() + } + + cache.put(buildID.String(), tpl) +} + +func TestUploads_BeginDistinctIDsAreIndependent(t *testing.T) { + t.Parallel() + c, _ := newUploads(t) + + a := uuid.New() + b := uuid.New() + + futA, err := c.Start(a) + require.NoError(t, err) + futB, err := c.Start(b) + require.NoError(t, err) + + require.NotSame(t, futA, futB) + require.NoError(t, futA.SetSuccess()) + + select { + case <-futB.Done(): + t.Fatal("futB should not be done after only futA fires") + default: + } +} + +func TestUploads_Wait_BlocksUntilSet(t *testing.T) { + t.Parallel() + c, cache := newUploads(t) + + id := uuid.New() + putFinalHeader(t, cache, id, build.Memfile) + fut, err := c.Start(id) + require.NoError(t, err) + + done := make(chan struct{}) + go func() { + _, _ = c.Wait(context.Background(), id, build.Memfile) + close(done) + }() + + select { + case <-done: + t.Fatal("Wait should block until the future fires") + case <-time.After(50 * time.Millisecond): + } + + require.NoError(t, fut.SetSuccess()) + + select { + case <-done: + case <-time.After(time.Second): + t.Fatal("Wait should return after future fires") + } +} + +func TestUploads_Wait_PropagatesUploadError(t *testing.T) { + t.Parallel() + c, cache := newUploads(t) + + id := uuid.New() + putFinalHeader(t, cache, id, build.Memfile) + fut, err := c.Start(id) + require.NoError(t, err) + + uploadErr := errors.New("upload exploded") + require.NoError(t, fut.SetError(uploadErr)) + + _, err = c.Wait(context.Background(), id, build.Memfile) + require.ErrorIs(t, err, uploadErr) +} + +func TestUploads_Wait_ContextCancellation(t *testing.T) { + t.Parallel() + c, _ := newUploads(t) + + id := uuid.New() + _, err := c.Start(id) // never signaled + require.NoError(t, err) + + ctx, cancel := context.WithCancel(context.Background()) + + errCh := make(chan error, 1) + go func() { + _, err := c.Wait(ctx, id, build.Memfile) + errCh <- err + }() + + cancel() + + select { + case err := <-errCh: + require.ErrorIs(t, err, context.Canceled) + case <-time.After(time.Second): + t.Fatal("Wait should return on context cancel") + } +} + +func TestUploads_Wait_NoFuture_ReadsFromCache(t *testing.T) { + t.Parallel() + c, cache := newUploads(t) + + id := uuid.New() + want := &headers.Header{ + Metadata: &headers.Metadata{Version: headers.MetadataVersionV4}, + Builds: map[uuid.UUID]headers.BuildData{id: {}}, + } + + tpl := templatemocks.NewMockTemplate(t) + dev := blockmocks.NewMockReadonlyDevice(t) + dev.EXPECT().Header().Return(want) + tpl.EXPECT().Rootfs().Return(dev, nil) + cache.put(id.String(), tpl) + + got, err := c.Wait(context.Background(), id, build.Rootfs) + require.NoError(t, err) + require.Same(t, want, got) +} + +func TestUploads_ConcurrentBeginsAndWaits(t *testing.T) { + t.Parallel() + c, cache := newUploads(t) + + const n = 10 + + ids := make([]uuid.UUID, n) + futs := make([]*utils.ErrorOnce, n) + for i := range n { + ids[i] = uuid.New() + putFinalHeader(t, cache, ids[i], build.Memfile) + fut, err := c.Start(ids[i]) + require.NoError(t, err) + futs[i] = fut + } + + var done atomic.Int32 + var wg sync.WaitGroup + for i := range n { + wg.Add(1) + go func(i int) { + defer wg.Done() + if _, err := c.Wait(context.Background(), ids[i], build.Memfile); err == nil { + done.Add(1) + } + }(i) + } + + for i := range n { + require.NoError(t, futs[i].SetSuccess()) + } + + wg.Wait() + assert.Equal(t, int32(n), done.Load()) +} diff --git a/packages/orchestrator/pkg/server/chunks.go b/packages/orchestrator/pkg/server/chunks.go index 387532e590..39f2857a46 100644 --- a/packages/orchestrator/pkg/server/chunks.go +++ b/packages/orchestrator/pkg/server/chunks.go @@ -45,18 +45,11 @@ func toGRPCError(err error) error { } func (s *Server) buildUploadedResponse(buildID string) *orchestrator.PeerAvailability { - cacheItem := s.uploadedBuilds.Get(buildID) - if cacheItem == nil { + if s.uploadedBuilds.Get(buildID) == nil { return nil } - hdrs := cacheItem.Value() - - return &orchestrator.PeerAvailability{ - UseStorage: true, - MemfileHeader: hdrs.memfileHeader, - RootfsHeader: hdrs.rootfsHeader, - } + return &orchestrator.PeerAvailability{UseStorage: true} } func (s *Server) GetBuildFileSize(ctx context.Context, req *orchestrator.GetBuildFileSizeRequest) (*orchestrator.GetBuildFileSizeResponse, error) { diff --git a/packages/orchestrator/pkg/server/main.go b/packages/orchestrator/pkg/server/main.go index e56357e907..fff2ee386a 100644 --- a/packages/orchestrator/pkg/server/main.go +++ b/packages/orchestrator/pkg/server/main.go @@ -28,11 +28,6 @@ import ( // templates they refer to and are cleaned up automatically. const uploadedBuildsTTL = 1 * time.Hour -type uploadedBuildHeaders struct { - memfileHeader []byte - rootfsHeader []byte -} - type Server struct { orchestrator.UnimplementedSandboxServiceServer orchestrator.UnimplementedChunkServiceServer @@ -49,7 +44,8 @@ type Server struct { sbxEventsService *events.EventsService startingSandboxes *semaphore.Weighted peerRegistry peerclient.Registry - uploadedBuilds *ttlcache.Cache[string, *uploadedBuildHeaders] + uploadedBuilds *ttlcache.Cache[string, struct{}] + uploads *sandbox.Uploads sandboxCreateDuration metric.Int64Histogram } @@ -66,11 +62,12 @@ type ServiceConfig struct { FeatureFlags *featureflags.Client SbxEventsService *events.EventsService PeerRegistry peerclient.Registry + Uploads *sandbox.Uploads } func New(cfg ServiceConfig) (*Server, error) { uploadedBuilds := ttlcache.New( - ttlcache.WithTTL[string, *uploadedBuildHeaders](uploadedBuildsTTL), + ttlcache.WithTTL[string, struct{}](uploadedBuildsTTL), ) go uploadedBuilds.Start() @@ -88,6 +85,7 @@ func New(cfg ServiceConfig) (*Server, error) { startingSandboxes: semaphore.NewWeighted(maxStartingInstancesPerNode), peerRegistry: cfg.PeerRegistry, uploadedBuilds: uploadedBuilds, + uploads: cfg.Uploads, } meter := cfg.Tel.MeterProvider.Meter("github.com/e2b-dev/infra/packages/orchestrator/pkg/server") @@ -109,3 +107,9 @@ func New(cfg ServiceConfig) (*Server, error) { return server, nil } + +func (s *Server) Close() error { + s.uploadedBuilds.Stop() + + return nil +} diff --git a/packages/orchestrator/pkg/server/sandboxes.go b/packages/orchestrator/pkg/server/sandboxes.go index e53df28e43..bf65f2e93d 100644 --- a/packages/orchestrator/pkg/server/sandboxes.go +++ b/packages/orchestrator/pkg/server/sandboxes.go @@ -666,8 +666,8 @@ func (s *Server) Checkpoint(ctx context.Context, in *orchestrator.SandboxCheckpo uploadCtx, cancel := context.WithTimeout(context.WithoutCancel(ctx), uploadTimeout) defer cancel() - memHdr, rootHdr, err := res.snapshot.Upload(uploadCtx, s.persistence, res.paths, s.config.StorageConfig.CompressConfig, s.featureFlags, storage.UseCasePause) - defer res.completeUpload(uploadCtx, memHdr, rootHdr) + err := res.upload.Run(uploadCtx) + defer res.completeUpload(uploadCtx, err) if err != nil { telemetry.ReportCriticalError(ctx, "error uploading snapshot for checkpoint", err, telemetry.WithSandboxID(in.GetSandboxId())) @@ -721,9 +721,8 @@ func (s *Server) getSandboxExecutionData(sbx *sandbox.Sandbox) map[string]any { // need to start the background GCS upload. type snapshotResult struct { meta metadata.Template - snapshot *sandbox.Snapshot - paths storage.Paths - completeUpload func(ctx context.Context, memfileHdr, rootfsHdr []byte) + upload *sandbox.Upload + completeUpload func(ctx context.Context, uploadErr error) } // snapshotAndCacheSandbox creates a snapshot of a sandbox and adds it to the local @@ -764,42 +763,43 @@ func (s *Server) snapshotAndCacheSandbox( return nil, fmt.Errorf("error adding snapshot to template cache: %w", err) } + // Register the upload only after the snapshot is in the local cache, so a + // failed AddSnapshot doesn't leave an orphan future blocking re-registration. + upload, err := sandbox.NewUpload(ctx, s.uploads, snapshot, s.persistence, s.config.StorageConfig.CompressConfig, s.featureFlags, storage.UseCasePause) + if err != nil { + return nil, fmt.Errorf("register upload: %w", err) + } + telemetry.ReportEvent(ctx, "added snapshot to template cache") - paths := storage.Paths{BuildID: meta.Template.BuildID} + // Capture once so Register and the symmetric Unregister inside + // completeUpload don't drift if the flag flips mid-upload. + peerEnabled := s.featureFlags.BoolFlag(ctx, featureflags.PeerToPeerChunkTransferFlag) - // Register in Redis so other orchestrators can find us for peer routing. - if s.featureFlags.BoolFlag(ctx, featureflags.PeerToPeerChunkTransferFlag) { - if err := s.peerRegistry.Register(ctx, meta.Template.BuildID, redisPeerKeyTTL); err != nil { - logger.L().Warn(ctx, "failed to register peer address for routing", zap.String("build_id", meta.Template.BuildID), zap.Error(err)) + completeUpload := func(ctx context.Context, uploadErr error) { + upload.Finish(ctx, uploadErr) + + if !peerEnabled { + return } - completeUpload := func(ctx context.Context, memfileHdr, rootfsHdr []byte) { - // Signal in-flight peer streams to switch to GCS. - s.uploadedBuilds.Set(meta.Template.BuildID, &uploadedBuildHeaders{ - memfileHeader: memfileHdr, - rootfsHeader: rootfsHdr, - }, ttlcache.DefaultTTL) + s.uploadedBuilds.Set(meta.Template.BuildID, struct{}{}, ttlcache.DefaultTTL) - // Remove from Redis so new nodes go directly to GCS. - if err := s.peerRegistry.Unregister(ctx, meta.Template.BuildID); err != nil { - logger.L().Warn(ctx, "failed to unregister peer address from routing", zap.String("build_id", meta.Template.BuildID), zap.Error(err)) - } + if err := s.peerRegistry.Unregister(ctx, meta.Template.BuildID); err != nil { + logger.L().Warn(ctx, "failed to unregister peer address from routing", zap.String("build_id", meta.Template.BuildID), zap.Error(err)) } + } - return &snapshotResult{ - meta: meta, - snapshot: snapshot, - paths: paths, - completeUpload: completeUpload, - }, nil + if peerEnabled { + if err := s.peerRegistry.Register(ctx, meta.Template.BuildID, redisPeerKeyTTL); err != nil { + logger.L().Warn(ctx, "failed to register peer address for routing", zap.String("build_id", meta.Template.BuildID), zap.Error(err)) + } } return &snapshotResult{ meta: meta, - snapshot: snapshot, - paths: paths, - completeUpload: func(context.Context, []byte, []byte) {}, + upload: upload, + completeUpload: completeUpload, }, nil } @@ -812,14 +812,17 @@ func (s *Server) uploadSnapshotAsync(ctx context.Context, sbx *sandbox.Sandbox, go func() { defer cancel() - memHdr, rootHdr, err := res.snapshot.Upload(ctx, s.persistence, res.paths, s.config.StorageConfig.CompressConfig, s.featureFlags, storage.UseCasePause) + ctx, span := tracer.Start(ctx, "upload snapshot") + defer span.End() + + err := res.upload.Run(ctx) if err != nil { sbxlogger.I(sbx).Error(ctx, "error uploading snapshot files", zap.Error(err)) } else { sbxlogger.I(sbx).Info(ctx, "snapshot finished uploading successfully") } - res.completeUpload(ctx, memHdr, rootHdr) + res.completeUpload(ctx, err) }() } diff --git a/packages/orchestrator/pkg/template/build/builder.go b/packages/orchestrator/pkg/template/build/builder.go index f30a069d5d..425ab7501c 100644 --- a/packages/orchestrator/pkg/template/build/builder.go +++ b/packages/orchestrator/pkg/template/build/builder.go @@ -61,6 +61,7 @@ type Builder struct { templateCache *sbxtemplate.Cache metrics *metrics.BuildMetrics featureFlags *featureflags.Client + uploads *sandbox.Uploads } func NewBuilder( @@ -76,6 +77,7 @@ func NewBuilder( sandboxes *sandbox.Map, templateCache *sbxtemplate.Cache, buildMetrics *metrics.BuildMetrics, + uploads *sandbox.Uploads, ) *Builder { return &Builder{ config: config, @@ -90,6 +92,7 @@ func NewBuilder( sandboxes: sandboxes, templateCache: templateCache, metrics: buildMetrics, + uploads: uploads, } } @@ -259,8 +262,6 @@ func runBuild( index := cache.NewHashIndex(bc.CacheScope, builder.buildStorage, templateStorage) - uploadTracker := layer.NewUploadTracker() - layerExecutor := layer.NewLayerExecutor( bc, builder.logger, @@ -270,7 +271,7 @@ func runBuild( templateStorage, builder.buildStorage, index, - uploadTracker, + builder.uploads, builder.config.StorageConfig.CompressConfig, builder.featureFlags, ) diff --git a/packages/orchestrator/pkg/template/build/layer/layer_executor.go b/packages/orchestrator/pkg/template/build/layer/layer_executor.go index 739e6878ca..cf5294ee42 100644 --- a/packages/orchestrator/pkg/template/build/layer/layer_executor.go +++ b/packages/orchestrator/pkg/template/build/layer/layer_executor.go @@ -34,7 +34,7 @@ type LayerExecutor struct { templateStorage storage.StorageProvider buildStorage storage.StorageProvider index cache.Index - uploadTracker *UploadTracker + uploads *sandbox.Uploads compressConfig storage.CompressConfig ff *featureflags.Client } @@ -48,7 +48,7 @@ func NewLayerExecutor( templateStorage storage.StorageProvider, buildStorage storage.StorageProvider, index cache.Index, - uploadTracker *UploadTracker, + uploads *sandbox.Uploads, compressConfig storage.CompressConfig, ff *featureflags.Client, ) *LayerExecutor { @@ -63,7 +63,7 @@ func NewLayerExecutor( templateStorage: templateStorage, buildStorage: buildStorage, index: index, - uploadTracker: uploadTracker, + uploads: uploads, compressConfig: compressConfig, ff: ff, } @@ -283,35 +283,21 @@ func (lb *LayerExecutor) PauseAndUpload( // Upload snapshot async, it's added to the template cache immediately userLogger.Debug(ctx, fmt.Sprintf("Saving: %s", meta.Template.BuildID)) - // Register this upload and get functions to signal completion and wait for previous uploads - completeUpload, waitForPreviousUploads := lb.uploadTracker.StartUpload() - uploader := sandbox.NewBuildUploader(ctx, snapshot, lb.templateStorage, storage.Paths{BuildID: meta.Template.BuildID}, lb.compressConfig, lb.ff, storage.UseCaseBuild, lb.uploadTracker.Pending()) + upload, err := sandbox.NewUpload(ctx, lb.uploads, snapshot, lb.templateStorage, lb.compressConfig, lb.ff, storage.UseCaseBuild) + if err != nil { + return fmt.Errorf("register upload: %w", err) + } - lb.UploadErrGroup.Go(func() error { + lb.UploadErrGroup.Go(func() (uploadErr error) { ctx := context.WithoutCancel(ctx) ctx, span := tracer.Start(ctx, "upload snapshot") defer span.End() - // Always signal completion to unblock waiting goroutines, even on error. - // This prevents deadlocks when an earlier layer fails - later layers can - // still unblock and the errgroup can properly collect all errors. - defer completeUpload() - - if err := uploader.UploadData(ctx); err != nil { - return fmt.Errorf("error uploading data files: %w", err) - } - - // Wait for all previous layer uploads to complete before saving the cache entry. - // This prevents race conditions where another build hits this cache entry - // before its dependencies (previous layers) are available in storage. - // For compressed builds, this also ensures all ancestor frame tables are - // available so headers can reference mappings from earlier layers. - if err := waitForPreviousUploads(ctx); err != nil { - return fmt.Errorf("error waiting for previous uploads: %w", err) - } + // Signal even on error so child layers waiting on this build can abort. + defer func() { upload.Finish(ctx, uploadErr) }() - if _, _, err := uploader.FinalizeHeaders(ctx); err != nil { - return fmt.Errorf("error finalizing headers: %w", err) + if err := upload.Run(ctx); err != nil { + return fmt.Errorf("error uploading snapshot: %w", err) } if err := lb.index.SaveLayerMeta(ctx, hash, cache.LayerMetadata{ diff --git a/packages/orchestrator/pkg/template/build/layer/upload_tracker.go b/packages/orchestrator/pkg/template/build/layer/upload_tracker.go deleted file mode 100644 index 72db831eea..0000000000 --- a/packages/orchestrator/pkg/template/build/layer/upload_tracker.go +++ /dev/null @@ -1,71 +0,0 @@ -package layer - -import ( - "context" - "sync" - - "github.com/e2b-dev/infra/packages/orchestrator/pkg/sandbox" -) - -// UploadTracker tracks in-flight uploads and allows waiting for all previous uploads to complete. -// This prevents race conditions where a layer's cache entry is saved before its -// dependencies (previous layers) are fully uploaded. -// -// It also owns a shared PendingBuildInfo that collects frame tables from compressed -// uploads across all layers. waitForPreviousUploads guarantees that by the time -// layer N finalizes its compressed headers, all upstream layers (0..N-1) have -// completed both their data and header uploads, so all upstream frame tables -// are available for cross-pollination. -type UploadTracker struct { - mu sync.Mutex - waitChs []chan struct{} - - // pending collects frame tables from compressed uploads across all layers. - pending *sandbox.PendingBuildInfo -} - -func NewUploadTracker() *UploadTracker { - return &UploadTracker{ - waitChs: make([]chan struct{}, 0), - pending: &sandbox.PendingBuildInfo{}, - } -} - -// Pending returns the shared PendingBuildInfo for collecting frame tables. -func (t *UploadTracker) Pending() *sandbox.PendingBuildInfo { - return t.pending -} - -// StartUpload registers that a new upload has started. -// Returns a function that should be called when the upload completes. -func (t *UploadTracker) StartUpload() (complete func(), waitForPrevious func(context.Context) error) { - t.mu.Lock() - defer t.mu.Unlock() - - // Create a channel for this upload - ch := make(chan struct{}) - t.waitChs = append(t.waitChs, ch) - - // Capture the channels we need to wait for (all previous uploads) - previousChs := make([]chan struct{}, len(t.waitChs)-1) - copy(previousChs, t.waitChs[:len(t.waitChs)-1]) - - complete = func() { - close(ch) - } - - waitForPrevious = func(ctx context.Context) error { - for _, prevCh := range previousChs { - select { - case <-prevCh: - // Previous upload completed - case <-ctx.Done(): - return ctx.Err() - } - } - - return nil - } - - return complete, waitForPrevious -} diff --git a/packages/orchestrator/pkg/template/build/layer/upload_tracker_test.go b/packages/orchestrator/pkg/template/build/layer/upload_tracker_test.go deleted file mode 100644 index 8b0923c6cb..0000000000 --- a/packages/orchestrator/pkg/template/build/layer/upload_tracker_test.go +++ /dev/null @@ -1,281 +0,0 @@ -package layer - -import ( - "context" - "sync" - "sync/atomic" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestUploadTracker_SingleUpload(t *testing.T) { - t.Parallel() - - tracker := NewUploadTracker() - - complete, waitForPrevious := tracker.StartUpload() - - // First upload has no previous uploads to wait for - ctx := context.Background() - err := waitForPrevious(ctx) - require.NoError(t, err) - - complete() -} - -func TestUploadTracker_SequentialUploads(t *testing.T) { - t.Parallel() - - tracker := NewUploadTracker() - - // Start first upload - complete1, waitForPrevious1 := tracker.StartUpload() - - // Start second upload - complete2, waitForPrevious2 := tracker.StartUpload() - - // Start third upload - complete3, waitForPrevious3 := tracker.StartUpload() - - ctx := context.Background() - - // First upload has no dependencies - err := waitForPrevious1(ctx) - require.NoError(t, err) - complete1() - - // Second upload waits for first - err = waitForPrevious2(ctx) - require.NoError(t, err) - complete2() - - // Third upload waits for first and second - err = waitForPrevious3(ctx) - require.NoError(t, err) - complete3() -} - -func TestUploadTracker_WaitBlocksUntilComplete(t *testing.T) { - t.Parallel() - - tracker := NewUploadTracker() - - // Start first upload - complete1, _ := tracker.StartUpload() - - // Start second upload - _, waitForPrevious2 := tracker.StartUpload() - - // Second upload should block until first completes - done := make(chan struct{}) - go func() { - ctx := context.Background() - _ = waitForPrevious2(ctx) - close(done) - }() - - // Should not complete immediately - select { - case <-done: - t.Fatal("waitForPrevious should have blocked") - case <-time.After(50 * time.Millisecond): - // Expected - still waiting - } - - // Complete first upload - complete1() - - // Now second should complete - select { - case <-done: - // Expected - case <-time.After(time.Second): - t.Fatal("waitForPrevious should have completed after first upload finished") - } -} - -func TestUploadTracker_ContextCancellation(t *testing.T) { - t.Parallel() - - tracker := NewUploadTracker() - - // Start first upload (don't complete it) - _, _ = tracker.StartUpload() - - // Start second upload - _, waitForPrevious2 := tracker.StartUpload() - - // Create a cancellable context - ctx, cancel := context.WithCancel(context.Background()) - - // Start waiting in a goroutine - errCh := make(chan error, 1) - go func() { - errCh <- waitForPrevious2(ctx) - }() - - // Cancel the context - cancel() - - // Should return context error - select { - case err := <-errCh: - require.ErrorIs(t, err, context.Canceled) - case <-time.After(time.Second): - t.Fatal("waitForPrevious should have returned after context cancellation") - } -} - -func TestUploadTracker_ConcurrentUploads(t *testing.T) { - t.Parallel() - - tracker := NewUploadTracker() - - const numUploads = 10 - var completeFuncs []func() - var waitFuncs []func(context.Context) error - - // Start all uploads - for range numUploads { - complete, wait := tracker.StartUpload() - completeFuncs = append(completeFuncs, complete) - waitFuncs = append(waitFuncs, wait) - } - - // Track completion order and errors - var completionOrder []int - var mu sync.Mutex - var wg sync.WaitGroup - errCh := make(chan error, numUploads) - - // Start all waits concurrently - for i := range numUploads { - wg.Add(1) - go func(idx int) { - defer wg.Done() - ctx := context.Background() - err := waitFuncs[idx](ctx) - if err != nil { - errCh <- err - - return - } - - mu.Lock() - completionOrder = append(completionOrder, idx) - mu.Unlock() - }(i) - } - - // Complete uploads in order - for i := range numUploads { - completeFuncs[i]() - // Small delay to allow goroutines to process - time.Sleep(10 * time.Millisecond) - } - - wg.Wait() - close(errCh) - - // Check for errors - for err := range errCh { - require.NoError(t, err) - } - - // Verify all completed - assert.Len(t, completionOrder, numUploads) -} - -func TestUploadTracker_OutOfOrderCompletion(t *testing.T) { - t.Parallel() - - tracker := NewUploadTracker() - - // Start three uploads - complete1, waitForPrevious1 := tracker.StartUpload() - complete2, waitForPrevious2 := tracker.StartUpload() - complete3, waitForPrevious3 := tracker.StartUpload() - - ctx := context.Background() - - // Track when each wait completes - var wait1Done, wait2Done, wait3Done atomic.Bool - - var wg sync.WaitGroup - wg.Add(3) - - go func() { - defer wg.Done() - _ = waitForPrevious1(ctx) - wait1Done.Store(true) - }() - - go func() { - defer wg.Done() - _ = waitForPrevious2(ctx) - wait2Done.Store(true) - }() - - go func() { - defer wg.Done() - _ = waitForPrevious3(ctx) - wait3Done.Store(true) - }() - - // Wait 1 should complete immediately (no dependencies) - time.Sleep(50 * time.Millisecond) - assert.True(t, wait1Done.Load(), "wait1 should complete immediately") - assert.False(t, wait2Done.Load(), "wait2 should still be waiting") - assert.False(t, wait3Done.Load(), "wait3 should still be waiting") - - // Complete upload 1 - complete1() - time.Sleep(50 * time.Millisecond) - - // Wait 2 should now complete - assert.True(t, wait2Done.Load(), "wait2 should complete after upload1") - assert.False(t, wait3Done.Load(), "wait3 should still be waiting for upload2") - - // Complete upload 2 - complete2() - time.Sleep(50 * time.Millisecond) - - // Wait 3 should now complete - assert.True(t, wait3Done.Load(), "wait3 should complete after upload2") - - // Complete upload 3 for cleanup - complete3() - - wg.Wait() -} - -func TestUploadTracker_CompleteBeforeWait(t *testing.T) { - t.Parallel() - - tracker := NewUploadTracker() - - // Start and complete first upload before second even starts waiting - complete1, _ := tracker.StartUpload() - complete1() - - // Start second upload - _, waitForPrevious2 := tracker.StartUpload() - - // Should not block since first is already complete - ctx := context.Background() - done := make(chan struct{}) - go func() { - _ = waitForPrevious2(ctx) - close(done) - }() - - select { - case <-done: - // Expected - should complete immediately - case <-time.After(time.Second): - t.Fatal("waitForPrevious should have completed immediately since previous upload is done") - } -} diff --git a/packages/orchestrator/pkg/template/server/main.go b/packages/orchestrator/pkg/template/server/main.go index 30e240eb0c..1ece7defda 100644 --- a/packages/orchestrator/pkg/template/server/main.go +++ b/packages/orchestrator/pkg/template/server/main.go @@ -61,6 +61,7 @@ func New( templateCache *sbxtemplate.Cache, templatePersistence storage.StorageProvider, buildPersistence storage.StorageProvider, + uploads *sandbox.Uploads, ) (s *ServerStore, e error) { logger.Info(ctx, "Initializing template manager") @@ -107,6 +108,7 @@ func New( sandboxFactory.Sandboxes, templateCache, buildMetrics, + uploads, ) store := &ServerStore{ diff --git a/packages/shared/pkg/grpc/orchestrator/chunks.pb.go b/packages/shared/pkg/grpc/orchestrator/chunks.pb.go index e02396c301..388c9bd808 100644 --- a/packages/shared/pkg/grpc/orchestrator/chunks.pb.go +++ b/packages/shared/pkg/grpc/orchestrator/chunks.pb.go @@ -33,12 +33,6 @@ type PeerAvailability struct { // use_storage is true when the GCS upload has completed and the caller // should switch to reading from GCS/NFS directly instead of this peer. UseStorage bool `protobuf:"varint,2,opt,name=use_storage,json=useStorage,proto3" json:"use_storage,omitempty"` - // memfile_header contains the serialized V4 header (with FrameTables) - // for the memfile, included when use_storage is true and the upload was compressed. - MemfileHeader []byte `protobuf:"bytes,3,opt,name=memfile_header,json=memfileHeader,proto3" json:"memfile_header,omitempty"` - // rootfs_header contains the serialized V4 header (with FrameTables) - // for the rootfs, included when use_storage is true and the upload was compressed. - RootfsHeader []byte `protobuf:"bytes,4,opt,name=rootfs_header,json=rootfsHeader,proto3" json:"rootfs_header,omitempty"` } func (x *PeerAvailability) Reset() { @@ -87,20 +81,6 @@ func (x *PeerAvailability) GetUseStorage() bool { return false } -func (x *PeerAvailability) GetMemfileHeader() []byte { - if x != nil { - return x.MemfileHeader - } - return nil -} - -func (x *PeerAvailability) GetRootfsHeader() []byte { - if x != nil { - return x.RootfsHeader - } - return nil -} - type GetBuildFileSizeRequest struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache @@ -557,90 +537,85 @@ func (x *GetBuildBlobResponse) GetAvailability() *PeerAvailability { var File_chunks_proto protoreflect.FileDescriptor var file_chunks_proto_rawDesc = []byte{ - 0x0a, 0x0c, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0xa4, - 0x01, 0x0a, 0x10, 0x50, 0x65, 0x65, 0x72, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, - 0x69, 0x74, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x6e, 0x6f, 0x74, 0x5f, 0x61, 0x76, 0x61, 0x69, 0x6c, - 0x61, 0x62, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x6e, 0x6f, 0x74, 0x41, - 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x75, 0x73, 0x65, 0x5f, - 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x75, - 0x73, 0x65, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x12, 0x25, 0x0a, 0x0e, 0x6d, 0x65, 0x6d, - 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x0c, 0x52, 0x0d, 0x6d, 0x65, 0x6d, 0x66, 0x69, 0x6c, 0x65, 0x48, 0x65, 0x61, 0x64, 0x65, 0x72, - 0x12, 0x23, 0x0a, 0x0d, 0x72, 0x6f, 0x6f, 0x74, 0x66, 0x73, 0x5f, 0x68, 0x65, 0x61, 0x64, 0x65, - 0x72, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x0c, 0x72, 0x6f, 0x6f, 0x74, 0x66, 0x73, 0x48, - 0x65, 0x61, 0x64, 0x65, 0x72, 0x22, 0x51, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, - 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, - 0x12, 0x19, 0x0a, 0x08, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, - 0x28, 0x09, 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x66, - 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, - 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x70, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x42, - 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x52, 0x65, 0x73, 0x70, - 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x5f, 0x73, 0x69, - 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x6f, 0x74, 0x61, 0x6c, 0x53, - 0x69, 0x7a, 0x65, 0x12, 0x35, 0x0a, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, - 0x69, 0x74, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, 0x65, 0x65, 0x72, - 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, 0x0c, 0x61, 0x76, - 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x22, 0x53, 0x0a, 0x19, 0x47, 0x65, - 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x45, 0x78, 0x69, 0x73, 0x74, 0x73, - 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x75, 0x69, 0x6c, 0x64, - 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, - 0x49, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, - 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, - 0x53, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x45, - 0x78, 0x69, 0x73, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x35, 0x0a, - 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, 0x65, 0x65, 0x72, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, - 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, - 0x6c, 0x69, 0x74, 0x79, 0x22, 0x84, 0x01, 0x0a, 0x1a, 0x52, 0x65, 0x61, 0x64, 0x41, 0x74, 0x42, - 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x71, 0x75, + 0x0a, 0x0c, 0x63, 0x68, 0x75, 0x6e, 0x6b, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x22, 0x58, + 0x0a, 0x10, 0x50, 0x65, 0x65, 0x72, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, + 0x74, 0x79, 0x12, 0x23, 0x0a, 0x0d, 0x6e, 0x6f, 0x74, 0x5f, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, + 0x62, 0x6c, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0c, 0x6e, 0x6f, 0x74, 0x41, 0x76, + 0x61, 0x69, 0x6c, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x1f, 0x0a, 0x0b, 0x75, 0x73, 0x65, 0x5f, 0x73, + 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x08, 0x52, 0x0a, 0x75, 0x73, + 0x65, 0x53, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x22, 0x51, 0x0a, 0x17, 0x47, 0x65, 0x74, 0x42, + 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x16, 0x0a, 0x06, 0x6f, - 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x6f, 0x66, 0x66, - 0x73, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x18, 0x04, 0x20, - 0x01, 0x28, 0x03, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x22, 0x68, 0x0a, 0x1b, 0x52, - 0x65, 0x61, 0x64, 0x41, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, - 0x6c, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, - 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, 0x12, 0x35, - 0x0a, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x18, 0x02, - 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, 0x65, 0x65, 0x72, 0x41, 0x76, 0x61, 0x69, 0x6c, - 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, - 0x69, 0x6c, 0x69, 0x74, 0x79, 0x22, 0x4d, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, - 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, - 0x62, 0x75, 0x69, 0x6c, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, - 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, - 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, - 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x61, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, - 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x12, 0x0a, 0x04, - 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, 0x61, - 0x12, 0x35, 0x0a, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, - 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, 0x65, 0x65, 0x72, 0x41, 0x76, 0x61, - 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, - 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x32, 0xb9, 0x02, 0x0a, 0x0c, 0x43, 0x68, 0x75, 0x6e, - 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x47, 0x0a, 0x10, 0x47, 0x65, 0x74, 0x42, - 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x18, 0x2e, 0x47, + 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x70, 0x0a, 0x18, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x52, - 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, - 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x12, 0x4d, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, - 0x65, 0x45, 0x78, 0x69, 0x73, 0x74, 0x73, 0x12, 0x1a, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, - 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x45, 0x78, 0x69, 0x73, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, - 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, + 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x1d, 0x0a, 0x0a, 0x74, 0x6f, 0x74, 0x61, 0x6c, + 0x5f, 0x73, 0x69, 0x7a, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x03, 0x52, 0x09, 0x74, 0x6f, 0x74, + 0x61, 0x6c, 0x53, 0x69, 0x7a, 0x65, 0x12, 0x35, 0x0a, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, + 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, + 0x65, 0x65, 0x72, 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, + 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x22, 0x53, 0x0a, + 0x19, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x45, 0x78, 0x69, + 0x73, 0x74, 0x73, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x75, + 0x69, 0x6c, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x62, 0x75, + 0x69, 0x6c, 0x64, 0x49, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, + 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, + 0x6d, 0x65, 0x22, 0x53, 0x0a, 0x1a, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x45, 0x78, 0x69, 0x73, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, - 0x12, 0x52, 0x0a, 0x13, 0x52, 0x65, 0x61, 0x64, 0x41, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, - 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x1b, 0x2e, 0x52, 0x65, 0x61, 0x64, 0x41, 0x74, - 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x71, - 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x52, 0x65, 0x61, 0x64, 0x41, 0x74, 0x42, 0x75, 0x69, - 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, - 0x73, 0x65, 0x30, 0x01, 0x12, 0x3d, 0x0a, 0x0c, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, - 0x42, 0x6c, 0x6f, 0x62, 0x12, 0x14, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x42, - 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x15, 0x2e, 0x47, 0x65, 0x74, - 0x42, 0x75, 0x69, 0x6c, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, - 0x65, 0x30, 0x01, 0x42, 0x2f, 0x5a, 0x2d, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, 0x2f, 0x2f, 0x67, - 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x65, 0x32, 0x62, 0x2d, 0x64, 0x65, - 0x76, 0x2f, 0x69, 0x6e, 0x66, 0x72, 0x61, 0x2f, 0x6f, 0x72, 0x63, 0x68, 0x65, 0x73, 0x74, 0x72, - 0x61, 0x74, 0x6f, 0x72, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x12, 0x35, 0x0a, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, 0x65, 0x65, 0x72, 0x41, 0x76, 0x61, + 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, + 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x22, 0x84, 0x01, 0x0a, 0x1a, 0x52, 0x65, 0x61, 0x64, + 0x41, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x19, 0x0a, 0x08, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x5f, + 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, + 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x12, 0x16, + 0x0a, 0x06, 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x18, 0x03, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, + 0x6f, 0x66, 0x66, 0x73, 0x65, 0x74, 0x12, 0x16, 0x0a, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, + 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x06, 0x6c, 0x65, 0x6e, 0x67, 0x74, 0x68, 0x22, 0x68, + 0x0a, 0x1b, 0x52, 0x65, 0x61, 0x64, 0x41, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x65, + 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x12, 0x0a, + 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, 0x61, 0x74, + 0x61, 0x12, 0x35, 0x0a, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, + 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, 0x65, 0x65, 0x72, 0x41, 0x76, + 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, 0x0c, 0x61, 0x76, 0x61, 0x69, + 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x22, 0x4d, 0x0a, 0x13, 0x47, 0x65, 0x74, 0x42, + 0x75, 0x69, 0x6c, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, + 0x19, 0x0a, 0x08, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x5f, 0x69, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x07, 0x62, 0x75, 0x69, 0x6c, 0x64, 0x49, 0x64, 0x12, 0x1b, 0x0a, 0x09, 0x66, 0x69, + 0x6c, 0x65, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x66, + 0x69, 0x6c, 0x65, 0x4e, 0x61, 0x6d, 0x65, 0x22, 0x61, 0x0a, 0x14, 0x47, 0x65, 0x74, 0x42, 0x75, + 0x69, 0x6c, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, + 0x12, 0x0a, 0x04, 0x64, 0x61, 0x74, 0x61, 0x18, 0x01, 0x20, 0x01, 0x28, 0x0c, 0x52, 0x04, 0x64, + 0x61, 0x74, 0x61, 0x12, 0x35, 0x0a, 0x0c, 0x61, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, + 0x69, 0x74, 0x79, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x11, 0x2e, 0x50, 0x65, 0x65, 0x72, + 0x41, 0x76, 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x52, 0x0c, 0x61, 0x76, + 0x61, 0x69, 0x6c, 0x61, 0x62, 0x69, 0x6c, 0x69, 0x74, 0x79, 0x32, 0xb9, 0x02, 0x0a, 0x0c, 0x43, + 0x68, 0x75, 0x6e, 0x6b, 0x53, 0x65, 0x72, 0x76, 0x69, 0x63, 0x65, 0x12, 0x47, 0x0a, 0x10, 0x47, + 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x12, + 0x18, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, + 0x7a, 0x65, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x19, 0x2e, 0x47, 0x65, 0x74, 0x42, + 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x53, 0x69, 0x7a, 0x65, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x4d, 0x0a, 0x12, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, + 0x46, 0x69, 0x6c, 0x65, 0x45, 0x78, 0x69, 0x73, 0x74, 0x73, 0x12, 0x1a, 0x2e, 0x47, 0x65, 0x74, + 0x42, 0x75, 0x69, 0x6c, 0x64, 0x46, 0x69, 0x6c, 0x65, 0x45, 0x78, 0x69, 0x73, 0x74, 0x73, 0x52, + 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1b, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, + 0x64, 0x46, 0x69, 0x6c, 0x65, 0x45, 0x78, 0x69, 0x73, 0x74, 0x73, 0x52, 0x65, 0x73, 0x70, 0x6f, + 0x6e, 0x73, 0x65, 0x12, 0x52, 0x0a, 0x13, 0x52, 0x65, 0x61, 0x64, 0x41, 0x74, 0x42, 0x75, 0x69, + 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x12, 0x1b, 0x2e, 0x52, 0x65, 0x61, + 0x64, 0x41, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, + 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x1c, 0x2e, 0x52, 0x65, 0x61, 0x64, 0x41, 0x74, + 0x42, 0x75, 0x69, 0x6c, 0x64, 0x53, 0x65, 0x65, 0x6b, 0x61, 0x62, 0x6c, 0x65, 0x52, 0x65, 0x73, + 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x12, 0x3d, 0x0a, 0x0c, 0x47, 0x65, 0x74, 0x42, 0x75, + 0x69, 0x6c, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x12, 0x14, 0x2e, 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, + 0x6c, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x1a, 0x15, 0x2e, + 0x47, 0x65, 0x74, 0x42, 0x75, 0x69, 0x6c, 0x64, 0x42, 0x6c, 0x6f, 0x62, 0x52, 0x65, 0x73, 0x70, + 0x6f, 0x6e, 0x73, 0x65, 0x30, 0x01, 0x42, 0x2f, 0x5a, 0x2d, 0x68, 0x74, 0x74, 0x70, 0x73, 0x3a, + 0x2f, 0x2f, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x65, 0x32, 0x62, + 0x2d, 0x64, 0x65, 0x76, 0x2f, 0x69, 0x6e, 0x66, 0x72, 0x61, 0x2f, 0x6f, 0x72, 0x63, 0x68, 0x65, + 0x73, 0x74, 0x72, 0x61, 0x74, 0x6f, 0x72, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( diff --git a/packages/shared/pkg/storage/header/header.go b/packages/shared/pkg/storage/header/header.go index 7607ec0daf..ecc30e39ca 100644 --- a/packages/shared/pkg/storage/header/header.go +++ b/packages/shared/pkg/storage/header/header.go @@ -18,7 +18,7 @@ import ( // BuildData holds per-build metadata stored in V4 headers. // Each layer's header carries a Builds map; child headers inherit parent -// entries for still-referenced build IDs via NewHeaderWithBuilds. +// entries for still-referenced build IDs via newDiffHeader. type BuildData struct { Size int64 // uncompressed file size Checksum [32]byte // SHA-256 of uncompressed data; zero value means unknown @@ -35,6 +35,13 @@ type Header struct { Builds map[uuid.UUID]BuildData Mapping []BuildMap + + // IncompletePendingUpload is set on diff headers produced by ToDiffHeader and + // cleared on the finalized headers swapped in by the upload pipeline. It + // is in-memory only (never serialized), and signals that the build's data + // has not yet reached object storage β€” readers must serve from the local + // cache and skip FrameTable lookups for the still-missing self entry. + IncompletePendingUpload bool } // CloneForUpload returns a clone with copied Mapping and Builds, safe to @@ -86,11 +93,7 @@ func NewHeader(metadata *Metadata, mapping []BuildMap) (*Header, error) { }, nil } -// NewHeaderWithBuilds creates a header and copies the subset of sourceBuilds -// referenced by the mappings. This propagates ancestor build metadata through -// the template chain (parent β†’ child β†’ grandchild). -// Returns nil Builds when sourceBuilds is nil (V3 / uncompressed). -func NewHeaderWithBuilds(metadata *Metadata, mapping []BuildMap, sourceBuilds map[uuid.UUID]BuildData) (*Header, error) { +func newDiffHeader(metadata *Metadata, mapping []BuildMap, sourceBuilds map[uuid.UUID]BuildData) (*Header, error) { h, err := NewHeader(metadata, mapping) if err != nil { return nil, err @@ -110,6 +113,8 @@ func NewHeaderWithBuilds(metadata *Metadata, mapping []BuildMap, sourceBuilds ma } } + h.IncompletePendingUpload = true + return h, nil } diff --git a/packages/shared/pkg/storage/header/metadata.go b/packages/shared/pkg/storage/header/metadata.go index 38587378f7..ffd8732abe 100644 --- a/packages/shared/pkg/storage/header/metadata.go +++ b/packages/shared/pkg/storage/header/metadata.go @@ -151,7 +151,7 @@ func (d *DiffMetadata) ToDiffHeader( attribute.String("snapshot.metadata.base_build_id", metadata.BaseBuildId.String()), ) - header, err := NewHeaderWithBuilds(metadata, m, originalHeader.Builds) + header, err := newDiffHeader(metadata, m, originalHeader.Builds) if err != nil { return nil, fmt.Errorf("failed to create header: %w", err) } diff --git a/packages/shared/pkg/storage/header/serialization.go b/packages/shared/pkg/storage/header/serialization.go index 6ab7707a4d..c9c86fe0b9 100644 --- a/packages/shared/pkg/storage/header/serialization.go +++ b/packages/shared/pkg/storage/header/serialization.go @@ -10,13 +10,13 @@ import ( // SerializeHeader serializes a header, dispatching to the version-specific format. // // V3 (Version <= 3): [Metadata] [v3 mappings…] -// V4 (Version >= 4): [Metadata] [uint32 uncompressedSize] [LZ4( Builds + v4 mappings )] +// V4 (Version >= 4): [Metadata] [uint8 flags] [uint32 uncompressedSize] [LZ4( Builds + v4 mappings )] func SerializeHeader(h *Header) ([]byte, error) { if h.Metadata.Version <= 3 { return serializeV3(h.Metadata, h.Mapping) } - return serializeV4(h.Metadata, h.Builds, h.Mapping) + return serializeV4(h.Metadata, h.Builds, h.Mapping, h.IncompletePendingUpload) } // DeserializeBytes auto-detects the header version and deserializes accordingly. @@ -56,20 +56,25 @@ func LoadHeader(ctx context.Context, s storage.StorageProvider, path string) (*H return DeserializeBytes(data) } -// StoreHeader serializes a header and uploads it to storage. -// Inverse of LoadHeader. -func StoreHeader(ctx context.Context, s storage.StorageProvider, path string, h *Header) ([]byte, error) { +// StoreHeader serializes a header and uploads it to long-term storage. +// Refuses to persist a header still flagged as in-flight β€” the upload pipeline +// must clear IncompletePendingUpload before reaching here. +func StoreHeader(ctx context.Context, s storage.StorageProvider, path string, h *Header) error { + if h.IncompletePendingUpload { + return fmt.Errorf("refusing to persist incomplete header for %s", path) + } + data, err := SerializeHeader(h) if err != nil { - return nil, fmt.Errorf("serialize header: %w", err) + return fmt.Errorf("serialize header: %w", err) } blob, err := s.OpenBlob(ctx, path, storage.MetadataObjectType) if err != nil { - return nil, fmt.Errorf("open blob %s: %w", path, err) + return fmt.Errorf("open blob %s: %w", path, err) } - return data, blob.Put(ctx, data) + return blob.Put(ctx, data) } // Deserialize reads a header from a storage Blob (legacy API). diff --git a/packages/shared/pkg/storage/header/serialization_v4.go b/packages/shared/pkg/storage/header/serialization_v4.go index 13f9acd025..a88a772ede 100644 --- a/packages/shared/pkg/storage/header/serialization_v4.go +++ b/packages/shared/pkg/storage/header/serialization_v4.go @@ -14,9 +14,18 @@ import ( ) // v4SizePrefixLen is the length of the uint32 size prefix that precedes the -// LZ4-compressed block in the v4 header layout: [metadata][uint32 size][LZ4 block]. +// LZ4-compressed block in the V4 header layout. const v4SizePrefixLen = 4 +// v4FlagsLen is the length of the V4 flags byte. Bit 0 = IncompletePendingUpload. +const v4FlagsLen = 1 + +// v4FlagIncomplete is bit 0 of the V4 flags byte: when set, the header +// describes a build whose upload has not yet finalized (an in-flight diff). +// StoreHeader refuses to persist headers carrying this flag; only the P2P +// peer-server path emits it. +const v4FlagIncomplete uint8 = 1 << 0 + type v4SerializableBuildMap struct { Offset uint64 Length uint64 @@ -32,9 +41,9 @@ type v4SerializableBuildInfo struct { Checksum [32]byte } -// serializeV4 writes [Metadata] [uint32 LZ4 size] [LZ4( Builds[] + Mappings[] )]. +// serializeV4 writes [Metadata] [uint8 flags] [uint32 LZ4 size] [LZ4( Builds[] + Mappings[] )]. // Frame tables are sparse-trimmed to only frames referenced by mappings. -func serializeV4(metadata *Metadata, builds map[uuid.UUID]BuildData, mappings []BuildMap) ([]byte, error) { +func serializeV4(metadata *Metadata, builds map[uuid.UUID]BuildData, mappings []BuildMap, incomplete bool) ([]byte, error) { var metaBuf bytes.Buffer if err := binary.Write(&metaBuf, binary.LittleEndian, metadata); err != nil { return nil, fmt.Errorf("failed to write metadata: %w", err) @@ -92,28 +101,36 @@ func serializeV4(metadata *Metadata, builds map[uuid.UUID]BuildData, mappings [] } } - // LZ4-compress the block and assemble: [metadata] [uint32 size] [compressed block]. + // LZ4-compress the block and assemble: [metadata] [uint8 flags] [uint32 size] [compressed block]. blockBytes := block.Bytes() compressed, err := compressLZ4(blockBytes) if err != nil { return nil, fmt.Errorf("failed to LZ4-compress v4 header block: %w", err) } - result := make([]byte, metadataSize+v4SizePrefixLen+len(compressed)) + var flags uint8 + if incomplete { + flags |= v4FlagIncomplete + } + + result := make([]byte, metadataSize+v4FlagsLen+v4SizePrefixLen+len(compressed)) copy(result, metaBuf.Bytes()) - binary.LittleEndian.PutUint32(result[metadataSize:], uint32(len(blockBytes))) - copy(result[metadataSize+v4SizePrefixLen:], compressed) + result[metadataSize] = flags + binary.LittleEndian.PutUint32(result[metadataSize+v4FlagsLen:], uint32(len(blockBytes))) + copy(result[metadataSize+v4FlagsLen+v4SizePrefixLen:], compressed) return result, nil } // deserializeV4 decompresses and reads the V4 block. func deserializeV4(metadata *Metadata, blockData []byte) (*Header, error) { - if len(blockData) < v4SizePrefixLen { - return nil, fmt.Errorf("v4 header block too short for size prefix: %d bytes", len(blockData)) + if len(blockData) < v4FlagsLen+v4SizePrefixLen { + return nil, fmt.Errorf("v4 header block too short for flags + size prefix: %d bytes", len(blockData)) } - decompressed, err := decompressLZ4(blockData[v4SizePrefixLen:]) + flags := blockData[0] + + decompressed, err := decompressLZ4(blockData[v4FlagsLen+v4SizePrefixLen:]) if err != nil { return nil, fmt.Errorf("failed to LZ4-decompress v4 header block: %w", err) } @@ -178,6 +195,7 @@ func deserializeV4(metadata *Metadata, blockData []byte) (*Header, error) { return nil, err } h.Builds = builds + h.IncompletePendingUpload = flags&v4FlagIncomplete != 0 return h, nil } diff --git a/packages/shared/pkg/storage/paths.go b/packages/shared/pkg/storage/paths.go index 4edb250364..6a9a9df74d 100644 --- a/packages/shared/pkg/storage/paths.go +++ b/packages/shared/pkg/storage/paths.go @@ -34,7 +34,7 @@ func (p Paths) Memfile() string { } func (p Paths) MemfileHeader() string { - return fmt.Sprintf("%s/%s%s", p.BuildID, MemfileName, HeaderSuffix) + return p.HeaderFile(MemfileName) } func (p Paths) Rootfs() string { @@ -42,7 +42,7 @@ func (p Paths) Rootfs() string { } func (p Paths) RootfsHeader() string { - return fmt.Sprintf("%s/%s%s", p.BuildID, RootfsName, HeaderSuffix) + return p.HeaderFile(RootfsName) } func (p Paths) Snapfile() string { @@ -71,6 +71,12 @@ func (p Paths) DataFile(name string, ct CompressionType) string { return fmt.Sprintf("%s/%s%s", p.BuildID, name, ct.Suffix()) } +// HeaderFile returns the storage path for a header sidecar of a data file +// (e.g. "memfile" β†’ "{buildID}/memfile.header"). +func (p Paths) HeaderFile(name string) string { + return fmt.Sprintf("%s/%s%s", p.BuildID, name, HeaderSuffix) +} + // SplitPath splits a storage path of the form "{buildID}/{fileName}" // back into its components. This is the inverse of the path methods. func SplitPath(path string) (buildID, fileName string) { diff --git a/packages/shared/pkg/storage/storage.go b/packages/shared/pkg/storage/storage.go index 219fd0ec0f..c34b6d4eea 100644 --- a/packages/shared/pkg/storage/storage.go +++ b/packages/shared/pkg/storage/storage.go @@ -140,14 +140,11 @@ func UploadBlob(ctx context.Context, provider StorageProvider, remotePath string } // PeerTransitionedError is returned by the peer Seekable when the GCS upload -// has completed and serialized V4 headers are available. -type PeerTransitionedError struct { - MemfileHeader []byte - RootfsHeader []byte -} +// has completed; the caller should re-load the V4 header from storage. +type PeerTransitionedError struct{} func (e *PeerTransitionedError) Error() string { - return "peer upload completed, headers available" + return "peer upload completed, reload header from storage" } // StorageConfig holds the configuration for creating a storage provider. diff --git a/tests/integration/go.mod b/tests/integration/go.mod index 3033c74ede..6378c96e53 100644 --- a/tests/integration/go.mod +++ b/tests/integration/go.mod @@ -34,15 +34,48 @@ require ( ) require ( + cel.dev/expr v0.25.1 // indirect + cloud.google.com/go v0.123.0 // indirect + cloud.google.com/go/auth v0.18.1 // indirect + cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect + cloud.google.com/go/compute/metadata v0.9.0 // indirect + cloud.google.com/go/iam v1.5.3 // indirect + cloud.google.com/go/monitoring v1.24.3 // indirect + cloud.google.com/go/storage v1.59.2 // indirect dario.cat/mergo v1.0.2 // indirect github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect github.com/ClickHouse/ch-go v0.67.0 // indirect github.com/ClickHouse/clickhouse-go/v2 v2.40.1 // indirect github.com/DataDog/datadog-go/v5 v5.2.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect + github.com/RoaringBitmap/roaring/v2 v2.18.0 // indirect github.com/andybalholm/brotli v1.2.0 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect + github.com/aws/aws-sdk-go-v2 v1.41.6 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.9 // indirect + github.com/aws/aws-sdk-go-v2/config v1.32.6 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.19.6 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16 // indirect + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.22 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.22 // indirect + github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.23 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.8 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.14 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.22 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.22 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.100.0 // indirect + github.com/aws/aws-sdk-go-v2/service/signin v1.0.4 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.30.8 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.41.5 // indirect + github.com/aws/smithy-go v1.25.0 // indirect github.com/bitfield/gotestdox v0.2.2 // indirect + github.com/bits-and-blooms/bitset v1.24.4 // indirect github.com/bsm/redislock v0.9.4 // indirect github.com/bytedance/gopkg v0.1.4 // indirect github.com/bytedance/sonic v1.15.0 // indirect @@ -51,6 +84,7 @@ require ( github.com/cenkalti/backoff/v5 v5.0.3 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/cloudwego/base64x v0.1.6 // indirect + github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 // indirect github.com/containerd/errdefs v1.0.0 // indirect github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/log v0.1.0 // indirect @@ -65,6 +99,8 @@ require ( github.com/docker/go-units v0.5.0 // indirect github.com/dprotaso/go-yit v0.0.0-20220510233725-9ba8df137936 // indirect github.com/ebitengine/purego v0.10.0 // indirect + github.com/envoyproxy/go-control-plane/envoy v1.36.0 // indirect + github.com/envoyproxy/protoc-gen-validate v1.3.0 // indirect github.com/exaring/otelpgx v0.9.3 // indirect github.com/fatih/color v1.18.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -76,6 +112,7 @@ require ( github.com/gin-gonic/gin v1.12.0 // indirect github.com/go-faster/city v1.0.1 // indirect github.com/go-faster/errors v0.7.1 // indirect + github.com/go-jose/go-jose/v4 v4.1.4 // indirect github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/go-ole/go-ole v1.3.0 // indirect @@ -89,10 +126,16 @@ require ( github.com/golang-jwt/jwt/v5 v5.3.1 // indirect github.com/google/go-cmp v0.7.0 // indirect github.com/google/nftables v0.3.0 // indirect + github.com/google/s2a-go v0.1.9 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect + github.com/googleapis/enterprise-certificate-proxy v0.3.11 // indirect + github.com/googleapis/gax-go/v2 v2.17.0 // indirect github.com/gorilla/mux v1.8.1 // indirect + github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 // indirect github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect + github.com/hashicorp/go-cleanhttp v0.5.2 // indirect + github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/jackc/pgerrcode v0.0.0-20250907135507-afb5586c32a6 // indirect github.com/jackc/pgpassfile v1.0.0 // indirect github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect @@ -102,6 +145,14 @@ require ( github.com/json-iterator/go v1.1.12 // indirect github.com/klauspost/compress v1.18.5 // indirect github.com/klauspost/cpuid/v2 v2.3.0 // indirect + github.com/launchdarkly/ccache v1.1.0 // indirect + github.com/launchdarkly/eventsource v1.10.0 // indirect + github.com/launchdarkly/go-jsonstream/v3 v3.1.0 // indirect + github.com/launchdarkly/go-sdk-common/v3 v3.3.0 // indirect + github.com/launchdarkly/go-sdk-events/v3 v3.5.0 // indirect + github.com/launchdarkly/go-semver v1.0.3 // indirect + github.com/launchdarkly/go-server-sdk-evaluation/v3 v3.0.1 // indirect + github.com/launchdarkly/go-server-sdk/v7 v7.13.0 // indirect github.com/leodido/go-urn v1.4.0 // indirect github.com/lib/pq v1.11.2 // indirect github.com/lufia/plan9stats v0.0.0-20240909124753-873cd0166683 // indirect @@ -124,6 +175,7 @@ require ( github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee // indirect github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect + github.com/mschoch/smat v0.2.0 // indirect github.com/ngrok/firewall_toolkit v0.0.18 // indirect github.com/oapi-codegen/gin-middleware v1.0.2 // indirect github.com/oapi-codegen/oapi-codegen/v2 v2.6.0 // indirect @@ -132,10 +184,12 @@ require ( github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.1.1 // indirect github.com/orcaman/concurrent-map/v2 v2.0.1 // indirect + github.com/patrickmn/go-cache v2.1.0+incompatible // indirect github.com/paulmach/orb v0.11.1 // indirect github.com/pelletier/go-toml/v2 v2.3.0 // indirect github.com/perimeterx/marshmallow v1.1.5 // indirect github.com/pierrec/lz4/v4 v4.1.22 // indirect + github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/pressly/goose/v3 v3.26.0 // indirect @@ -150,6 +204,8 @@ require ( github.com/sirupsen/logrus v1.9.4 // indirect github.com/speakeasy-api/jsonpath v0.6.0 // indirect github.com/speakeasy-api/openapi-overlay v0.10.2 // indirect + github.com/spiffe/go-spiffe/v2 v2.6.0 // indirect + github.com/stretchr/objx v0.5.3 // indirect github.com/testcontainers/testcontainers-go v0.42.0 // indirect github.com/tklauser/go-sysconf v0.3.16 // indirect github.com/tklauser/numcpus v0.11.0 // indirect @@ -161,6 +217,7 @@ require ( go.mongodb.org/mongo-driver/v2 v2.5.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect go.opentelemetry.io/contrib/bridges/otelzap v0.14.0 // indirect + go.opentelemetry.io/contrib/detectors/gcp v1.39.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 // indirect go.opentelemetry.io/contrib/instrumentation/runtime v0.66.0 // indirect @@ -180,12 +237,17 @@ require ( go.uber.org/zap v1.27.1 // indirect golang.org/x/arch v0.25.0 // indirect golang.org/x/crypto v0.50.0 // indirect + golang.org/x/exp v0.0.0-20260212183809-81e46e3db34a // indirect golang.org/x/mod v0.35.0 // indirect golang.org/x/net v0.53.0 // indirect + golang.org/x/oauth2 v0.36.0 // indirect golang.org/x/sys v0.43.0 // indirect golang.org/x/term v0.42.0 // indirect golang.org/x/text v0.36.0 // indirect + golang.org/x/time v0.14.0 // indirect golang.org/x/tools v0.44.0 // indirect + google.golang.org/api v0.267.0 // indirect + google.golang.org/genproto v0.0.0-20260128011058-8636f8732409 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect diff --git a/tests/integration/go.sum b/tests/integration/go.sum index 3dc530375c..d5f7d7e164 100644 --- a/tests/integration/go.sum +++ b/tests/integration/go.sum @@ -1,3 +1,25 @@ +cel.dev/expr v0.25.1 h1:1KrZg61W6TWSxuNZ37Xy49ps13NUovb66QLprthtwi4= +cel.dev/expr v0.25.1/go.mod h1:hrXvqGP6G6gyx8UAHSHJ5RGk//1Oj5nXQ2NI02Nrsg4= +cloud.google.com/go v0.123.0 h1:2NAUJwPR47q+E35uaJeYoNhuNEM9kM8SjgRgdeOJUSE= +cloud.google.com/go v0.123.0/go.mod h1:xBoMV08QcqUGuPW65Qfm1o9Y4zKZBpGS+7bImXLTAZU= +cloud.google.com/go/auth v0.18.1 h1:IwTEx92GFUo2pJ6Qea0EU3zYvKnTAeRCODxfA/G5UWs= +cloud.google.com/go/auth v0.18.1/go.mod h1:GfTYoS9G3CWpRA3Va9doKN9mjPGRS+v41jmZAhBzbrA= +cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= +cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= +cloud.google.com/go/compute/metadata v0.9.0 h1:pDUj4QMoPejqq20dK0Pg2N4yG9zIkYGdBtwLoEkH9Zs= +cloud.google.com/go/compute/metadata v0.9.0/go.mod h1:E0bWwX5wTnLPedCKqk3pJmVgCBSM6qQI1yTBdEb3C10= +cloud.google.com/go/iam v1.5.3 h1:+vMINPiDF2ognBJ97ABAYYwRgsaqxPbQDlMnbHMjolc= +cloud.google.com/go/iam v1.5.3/go.mod h1:MR3v9oLkZCTlaqljW6Eb2d3HGDGK5/bDv93jhfISFvU= +cloud.google.com/go/logging v1.13.1 h1:O7LvmO0kGLaHY/gq8cV7T0dyp6zJhYAOtZPX4TF3QtY= +cloud.google.com/go/logging v1.13.1/go.mod h1:XAQkfkMBxQRjQek96WLPNze7vsOmay9H5PqfsNYDqvw= +cloud.google.com/go/longrunning v0.8.0 h1:LiKK77J3bx5gDLi4SMViHixjD2ohlkwBi+mKA7EhfW8= +cloud.google.com/go/longrunning v0.8.0/go.mod h1:UmErU2Onzi+fKDg2gR7dusz11Pe26aknR4kHmJJqIfk= +cloud.google.com/go/monitoring v1.24.3 h1:dde+gMNc0UhPZD1Azu6at2e79bfdztVDS5lvhOdsgaE= +cloud.google.com/go/monitoring v1.24.3/go.mod h1:nYP6W0tm3N9H/bOw8am7t62YTzZY+zUeQ+Bi6+2eonI= +cloud.google.com/go/storage v1.59.2 h1:gmOAuG1opU8YvycMNpP+DvHfT9BfzzK5Cy+arP+Nocw= +cloud.google.com/go/storage v1.59.2/go.mod h1:cMWbtM+anpC74gn6qjLh+exqYcfmB9Hqe5z6adx+CLI= +cloud.google.com/go/trace v1.11.7 h1:kDNDX8JkaAG3R2nq1lIdkb7FCSi1rCmsEtKVsty7p+U= +cloud.google.com/go/trace v1.11.7/go.mod h1:TNn9d5V3fQVf6s4SCveVMIBS2LJUqo73GACmq/Tky0s= connectrpc.com/connect v1.18.1 h1:PAg7CjSAGvscaf6YZKUefjoih5Z/qYkyaTrBW8xvYPw= connectrpc.com/connect v1.18.1/go.mod h1:0292hj1rnx8oFrStN7cB4jjVBeqs+Yx5yDIC2prWDO8= dario.cat/mergo v1.0.2 h1:85+piFYR1tMbRrLcDwR18y4UKJ3aH1Tbzi24VRW1TK8= @@ -12,16 +34,68 @@ github.com/ClickHouse/clickhouse-go/v2 v2.40.1 h1:PbwsHBgqXRydU7jKULD1C8CHmifczf github.com/ClickHouse/clickhouse-go/v2 v2.40.1/go.mod h1:GDzSBLVhladVm8V01aEB36IoBOVLLICfyeuiIp/8Ezc= github.com/DataDog/datadog-go/v5 v5.2.0 h1:kSptqUGSNK67DgA+By3rwtFnAh6pTBxJ7Hn8JCLZcKY= github.com/DataDog/datadog-go/v5 v5.2.0/go.mod h1:XRDJk1pTc00gm+ZDiBKsjh7oOOtJfYfglVCmFb8C2+Q= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0 h1:DHa2U07rk8syqvCge0QIGMCE1WxGj9njT44GH7zNJLQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.31.0/go.mod h1:P4WPRUkOhJC13W//jWpyfJNDAIpvRbAUIYLX/4jtlE0= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0 h1:lhhYARPUu3LmHysQ/igznQphfzynnqI3D75oUyw1HXk= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.54.0/go.mod h1:l9rva3ApbBpEJxSNYnwT9N4CDLrWgtq3u8736C5hyJw= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.54.0 h1:xfK3bbi6F2RDtaZFtUdKO3osOBIhNb+xTs8lFW6yx9o= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.54.0/go.mod h1:vB2GH9GAYYJTO3mEn8oYwzEdhlayZIdQz6zdzgUIRvA= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0 h1:s0WlVbf9qpvkh1c/uDAPElam0WrL7fHRIidgZJ7UqZI= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.54.0/go.mod h1:Mf6O40IAyB9zR/1J8nGDDPirZQQPbYJni8Yisy7NTMc= github.com/Microsoft/go-winio v0.5.0/go.mod h1:JPGBdM1cNvN/6ISo+n8V5iA4v8pBzdOpzfwIujj1a84= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= +github.com/RoaringBitmap/roaring/v2 v2.18.0 h1:h7sS0VqCkfBMGgcHaudJFB4FE6Td71H6svRB2poRnGY= +github.com/RoaringBitmap/roaring/v2 v2.18.0/go.mod h1:eq4wdNXxtJIS/oikeCzdX1rBzek7ANzbth041hrU8Q4= github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ= github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= +github.com/aws/aws-sdk-go-v2 v1.41.6 h1:1AX0AthnBQzMx1vbmir3Y4WsnJgiydmnJjiLu+LvXOg= +github.com/aws/aws-sdk-go-v2 v1.41.6/go.mod h1:dy0UzBIfwSeot4grGvY1AqFWN5zgziMmWGzysDnHFcQ= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.9 h1:adBsCIIpLbLmYnkQU+nAChU5yhVTvu5PerROm+/Kq2A= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.9/go.mod h1:uOYhgfgThm/ZyAuJGNQ5YgNyOlYfqnGpTHXvk3cpykg= +github.com/aws/aws-sdk-go-v2/config v1.32.6 h1:hFLBGUKjmLAekvi1evLi5hVvFQtSo3GYwi+Bx4lpJf8= +github.com/aws/aws-sdk-go-v2/config v1.32.6/go.mod h1:lcUL/gcd8WyjCrMnxez5OXkO3/rwcNmvfno62tnXNcI= +github.com/aws/aws-sdk-go-v2/credentials v1.19.6 h1:F9vWao2TwjV2MyiyVS+duza0NIRtAslgLUM0vTA1ZaE= +github.com/aws/aws-sdk-go-v2/credentials v1.19.6/go.mod h1:SgHzKjEVsdQr6Opor0ihgWtkWdfRAIwxYzSJ8O85VHY= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16 h1:80+uETIWS1BqjnN9uJ0dBUaETh+P1XwFy5vwHwK5r9k= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.16/go.mod h1:wOOsYuxYuB/7FlnVtzeBYRcjSRtQpAW0hCP7tIULMwo= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12 h1:Zy6Tme1AA13kX8x3CnkHx5cqdGWGaj/anwOiWGnA0Xo= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.12/go.mod h1:ql4uXYKoTM9WUAUSmthY4AtPVrlTBZOvnBJTiCUdPxI= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.22 h1:GmLa5Kw1ESqtFpXsx5MmC84QWa/ZrLZvlJGa2y+4kcQ= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.22/go.mod h1:6sW9iWm9DK9YRpRGga/qzrzNLgKpT2cIxb7Vo2eNOp0= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.22 h1:dY4kWZiSaXIzxnKlj17nHnBcXXBfac6UlsAx2qL6XrU= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.22/go.mod h1:KIpEUx0JuRZLO7U6cbV204cWAEco2iC3l061IxlwLtI= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 h1:WKuaxf++XKWlHWu9ECbMlha8WOEGm0OUEZqm4K/Gcfk= +github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4/go.mod h1:ZWy7j6v1vWGmPReu0iSGvRiise4YI5SkR3OHKTZ6Wuc= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.23 h1:FPXsW9+gMuIeKmz7j6ENWcWtBGTe1kH8r9thNt5Uxx4= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.23/go.mod h1:7J8iGMdRKk6lw2C+cMIphgAnT8uTwBwNOsGkyOCm80U= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.8 h1:HtOTYcbVcGABLOVuPYaIihj6IlkqubBwFj10K5fxRek= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.8/go.mod h1:VsK9abqQeGlzPgUr+isNWzPlK2vKe9INMLWnY65f5Xs= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.14 h1:xnvDEnw+pnj5mctWiYuFbigrEzSm35x7k4KS/ZkCANg= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.14/go.mod h1:yS5rNogD8e0Wu9+l3MUwr6eENBzEeGejvINpN5PAYfY= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.22 h1:PUmZeJU6Y1Lbvt9WFuJ0ugUK2xn6hIWUBBbKuOWF30s= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.22/go.mod h1:nO6egFBoAaoXze24a2C0NjQCvdpk8OueRoYimvEB9jo= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.22 h1:SE+aQ4DEqG53RRCAIHlCf//B2ycxGH7jFkpnAh/kKPM= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.22/go.mod h1:ES3ynECd7fYeJIL6+oax+uIEljmfps0S70BaQzbMd/o= +github.com/aws/aws-sdk-go-v2/service/s3 v1.100.0 h1:7G26Sae6PMKn4kMcU5JzNfrm1YrKwyOhowXPYR2WiWY= +github.com/aws/aws-sdk-go-v2/service/s3 v1.100.0/go.mod h1:Fw9aqhJicIVee1VytBBjH+l+5ov6/PhbtIK/u3rt/ls= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.4 h1:HpI7aMmJ+mm1wkSHIA2t5EaFFv5EFYXePW30p1EIrbQ= +github.com/aws/aws-sdk-go-v2/service/signin v1.0.4/go.mod h1:C5RdGMYGlfM0gYq/tifqgn4EbyX99V15P2V3R+VHbQU= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.8 h1:aM/Q24rIlS3bRAhTyFurowU8A0SMyGDtEOY/l/s/1Uw= +github.com/aws/aws-sdk-go-v2/service/sso v1.30.8/go.mod h1:+fWt2UHSb4kS7Pu8y+BMBvJF0EWx+4H0hzNwtDNRTrg= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12 h1:AHDr0DaHIAo8c9t1emrzAlVDFp+iMMKnPdYy6XO4MCE= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.12/go.mod h1:GQ73XawFFiWxyWXMHWfhiomvP3tXtdNar/fi8z18sx0= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.5 h1:SciGFVNZ4mHdm7gpD1dgZYnCuVdX1s+lFTg4+4DOy70= +github.com/aws/aws-sdk-go-v2/service/sts v1.41.5/go.mod h1:iW40X4QBmUxdP+fZNOpfmkdMZqsovezbAeO+Ubiv2pk= +github.com/aws/smithy-go v1.25.0 h1:Sz/XJ64rwuiKtB6j98nDIPyYrV1nVNJ4YU74gttcl5U= +github.com/aws/smithy-go v1.25.0/go.mod h1:YE2RhdIuDbA5E5bTdciG9KrW3+TiEONeUWCqxX9i1Fc= github.com/bitfield/gotestdox v0.2.2 h1:x6RcPAbBbErKLnapz1QeAlf3ospg8efBsedU93CDsnE= github.com/bitfield/gotestdox v0.2.2/go.mod h1:D+gwtS0urjBrzguAkTM2wodsTQYFHdpx8eqRJ3N+9pY= +github.com/bits-and-blooms/bitset v1.24.4 h1:95H15Og1clikBrKr/DuzMXkQzECs1M6hhoGXLwLQOZE= +github.com/bits-and-blooms/bitset v1.24.4/go.mod h1:7hO7Gc7Pp1vODcmWvKMRA9BNmbv6a/7QIWpPxHddWR8= github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/bsm/ginkgo/v2 v2.12.0 h1:Ny8MWAHyOepLGlLKYmXG4IEkioBysk6GpaRTLC8zwWs= github.com/bsm/ginkgo/v2 v2.12.0/go.mod h1:SwYbGRRDovPVboqFv0tPTcG1sN61LM1Z4ARdbAV9g4c= @@ -46,6 +120,8 @@ github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5P github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU= github.com/cloudwego/base64x v0.1.6 h1:t11wG9AECkCDk5fMSoxmufanudBtJ+/HemLstXDLI2M= github.com/cloudwego/base64x v0.1.6/go.mod h1:OFcloc187FXDaYHvrNIjxSe8ncn0OOM8gEHfghB2IPU= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5 h1:6xNmx7iTtyBRev0+D/Tv1FZd4SCg8axKApyNyRsAt/w= +github.com/cncf/xds/go v0.0.0-20251210132809-ee656c7534f5/go.mod h1:KdCmV+x/BuvyMxRnYBlmVaq4OLiKW6iRQfvC62cvdkI= github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= @@ -82,6 +158,14 @@ github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkp github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/ebitengine/purego v0.10.0 h1:QIw4xfpWT6GWTzaW5XEKy3HXoqrJGx1ijYHzTF0/ISU= github.com/ebitengine/purego v0.10.0/go.mod h1:iIjxzd6CiRiOG0UyXP+V1+jWqUXVjPKLAI0mRfJZTmQ= +github.com/envoyproxy/go-control-plane v0.14.0 h1:hbG2kr4RuFj222B6+7T83thSPqLjwBIfQawTkC++2HA= +github.com/envoyproxy/go-control-plane v0.14.0/go.mod h1:NcS5X47pLl/hfqxU70yPwL9ZMkUlwlKxtAohpi2wBEU= +github.com/envoyproxy/go-control-plane/envoy v1.36.0 h1:yg/JjO5E7ubRyKX3m07GF3reDNEnfOboJ0QySbH736g= +github.com/envoyproxy/go-control-plane/envoy v1.36.0/go.mod h1:ty89S1YCCVruQAm9OtKeEkQLTb+Lkz0k8v9W0Oxsv98= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0 h1:/G9QYbddjL25KvtKTv3an9lx6VBE2cnb8wp1vEGNYGI= +github.com/envoyproxy/go-control-plane/ratelimit v0.1.0/go.mod h1:Wk+tMFAFbCXaJPzVVHnPgRKdUdwW/KdbRt94AzgRee4= +github.com/envoyproxy/protoc-gen-validate v1.3.0 h1:TvGH1wof4H33rezVKWSpqKz5NXWg5VPuZ0uONDT6eb4= +github.com/envoyproxy/protoc-gen-validate v1.3.0/go.mod h1:HvYl7zwPa5mffgyeTUHA9zHIH36nmrm7oCbo4YKoSWA= github.com/exaring/otelpgx v0.9.3 h1:4yO02tXC7ZJZ+hcqcUkfxblYNCIFGVhpUWI0iw1TzPU= github.com/exaring/otelpgx v0.9.3/go.mod h1:R5/M5LWsPPBZc1SrRE5e0DiU48bI78C1/GPTWs6I66U= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= @@ -106,6 +190,8 @@ github.com/go-faster/city v1.0.1 h1:4WAxSZ3V2Ws4QRDrscLEDcibJY8uf41H6AhXDrNDcGw= github.com/go-faster/city v1.0.1/go.mod h1:jKcUJId49qdW3L1qKHH/3wPeUstCVpVSXTM6vO3VcTw= github.com/go-faster/errors v0.7.1 h1:MkJTnDoEdi9pDabt1dpWf7AA8/BaSYZqibYyhZ20AYg= github.com/go-faster/errors v0.7.1/go.mod h1:5ySTjWFiphBs07IKuiL69nxdfd5+fzh1u7FPGZP2quo= +github.com/go-jose/go-jose/v4 v4.1.4 h1:moDMcTHmvE6Groj34emNPLs/qtYXRVcd6S7NHbHz3kA= +github.com/go-jose/go-jose/v4 v4.1.4/go.mod h1:x4oUasVrzR7071A4TnHLGSPpNOm2a21K9Kf04k1rs08= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -161,19 +247,35 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= +github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= github.com/google/nftables v0.3.0 h1:bkyZ0cbpVeMHXOrtlFc8ISmfVqq5gPJukoYieyVmITg= github.com/google/nftables v0.3.0/go.mod h1:BCp9FsrbF1Fn/Yu6CLUc9GGZFw/+hsxfluNXXmxBfRM= github.com/google/pprof v0.0.0-20210407192527-94a9f03dee38/go.mod h1:kpwsk12EmLew5upagYY7GY0pfYCcupk39gWOCRROcvE= +github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= +github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 h1:El6M4kTTCOh6aBiKaUGG7oYTSPP8MxqL4YI3kZKwcP4= github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510/go.mod h1:pupxD2MaaD3pAXIBCelhxNneeOaAeabZDe5s4K6zSpQ= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/googleapis/enterprise-certificate-proxy v0.3.11 h1:vAe81Msw+8tKUxi2Dqh/NZMz7475yUvmRIkXr4oN2ao= +github.com/googleapis/enterprise-certificate-proxy v0.3.11/go.mod h1:RFV7MUdlb7AgEq2v7FmMCfeSMCllAzWxFgRdusoGks8= +github.com/googleapis/gax-go/v2 v2.17.0 h1:RksgfBpxqff0EZkDWYuz9q/uWsTVz+kf43LsZ1J6SMc= +github.com/googleapis/gax-go/v2 v2.17.0/go.mod h1:mzaqghpQp4JDh3HvADwrat+6M3MOIDp5YKHhb9PAgDY= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= +github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79 h1:+ngKgrYPPJrOjhax5N+uePQ0Fh1Z7PheYoUI/0nzkPA= +github.com/gregjones/httpcache v0.0.0-20190611155906-901d90724c79/go.mod h1:FecbI9+v66THATjSRHfNgh1IVFe/9kFxbXtjV0ctIMA= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2 h1:sGm2vDRFUrQJO/Veii4h4zG2vvqG6uWNkBHSTqXOZk0= github.com/grpc-ecosystem/go-grpc-middleware/v2 v2.3.2/go.mod h1:wd1YpapPLivG6nQgbf7ZkG1hhSOXDhhn4MLTknx2aAc= github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs= github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c= +github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= +github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= +github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/ianlancetaylor/demangle v0.0.0-20200824232613-28f6c0f3b639/go.mod h1:aSSvb/t6k1mPoxDqO4vJh6VOCGPwU4O0C2/Eqndh1Sc= github.com/jackc/pgerrcode v0.0.0-20250907135507-afb5586c32a6 h1:D/V0gu4zQ3cL2WKeVNVM4r2gLxGGf6McLwgXzRTo2RQ= @@ -193,6 +295,8 @@ github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFF github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= +github.com/karlseguin/expect v1.0.2-0.20190806010014-778a5f0c6003 h1:vJ0Snvo+SLMY72r5J4sEfkuE7AFbixEP2qRbEcum/wA= +github.com/karlseguin/expect v1.0.2-0.20190806010014-778a5f0c6003/go.mod h1:zNBxMY8P21owkeogJELCLeHIt+voOSduHYTFUbwRAV8= github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk= @@ -207,6 +311,24 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/launchdarkly/ccache v1.1.0 h1:voD1M+ZJXR3MREOKtBwgTF9hYHl1jg+vFKS/+VAkR2k= +github.com/launchdarkly/ccache v1.1.0/go.mod h1:TlxzrlnzvYeXiLHmesMuvoZetu4Z97cV1SsdqqBJi1Q= +github.com/launchdarkly/eventsource v1.10.0 h1:H9Tp6AfGu/G2qzBJC26iperrvwhzdbiA/gx7qE2nDFI= +github.com/launchdarkly/eventsource v1.10.0/go.mod h1:J3oa50bPvJesZqNAJtb5btSIo5N6roDWhiAS3IpsKck= +github.com/launchdarkly/go-jsonstream/v3 v3.1.0 h1:U/7/LplZO72XefBQ+FzHf6o4FwLHVqBE+4V58Ornu/E= +github.com/launchdarkly/go-jsonstream/v3 v3.1.0/go.mod h1:2Pt4BR5AwWgsuVTCcIpB6Os04JFIKWfoA+7faKkZB5E= +github.com/launchdarkly/go-sdk-common/v3 v3.3.0 h1:kkf78wcKX+DOXzNjG29i+py/P+XMIw8/mXS7eEWGQwU= +github.com/launchdarkly/go-sdk-common/v3 v3.3.0/go.mod h1:mXFmDGEh4ydK3QilRhrAyKuf9v44VZQWnINyhqbbOd0= +github.com/launchdarkly/go-sdk-events/v3 v3.5.0 h1:Yav8Thm70dZbO8U1foYwZPf3w60n/lNBRaYeeNM/qg4= +github.com/launchdarkly/go-sdk-events/v3 v3.5.0/go.mod h1:oepYWQ2RvvjfL2WxkE1uJJIuRsIMOP4WIVgUpXRPcNI= +github.com/launchdarkly/go-semver v1.0.3 h1:agIy/RN3SqeQDIfKkl+oFslEdeIs7pgsJBs3CdCcGQM= +github.com/launchdarkly/go-semver v1.0.3/go.mod h1:xFmMwXba5Mb+3h72Z+VeSs9ahCvKo2QFUTHRNHVqR28= +github.com/launchdarkly/go-server-sdk-evaluation/v3 v3.0.1 h1:rTgcYAFraGFj7sBMB2b7JCYCm0b9kph4FaMX02t4osQ= +github.com/launchdarkly/go-server-sdk-evaluation/v3 v3.0.1/go.mod h1:fPS5d+zOsgFnMunj+Ki6jjlZtFvo4h9iNbtNXxzYn58= +github.com/launchdarkly/go-server-sdk/v7 v7.13.0 h1:ajiZOPBwmWVFFgP+EMdy3oS1Xl9wNDlEd/7Zn/0I2JU= +github.com/launchdarkly/go-server-sdk/v7 v7.13.0/go.mod h1:6krbDWp417H7lIg+3ehh/A/AW5xwHtiUFg06fvNYHAk= +github.com/launchdarkly/go-test-helpers/v3 v3.1.0 h1:E3bxJMzMoA+cJSF3xxtk2/chr1zshl1ZWa0/oR+8bvg= +github.com/launchdarkly/go-test-helpers/v3 v3.1.0/go.mod h1:Ake5+hZFS/DmIGKx/cizhn5W9pGA7pplcR7xCxWiLIo= github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= github.com/lib/pq v1.11.2 h1:x6gxUeu39V0BHZiugWe8LXZYZ+Utk7hSJGThs8sdzfs= @@ -257,6 +379,8 @@ github.com/modern-go/reflect2 v1.0.3-0.20250322232337-35a7c28c31ee/go.mod h1:yWu github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= github.com/montanaflynn/stats v0.0.0-20171201202039-1bf9dbcd8cbe/go.mod h1:wL8QJuTMNUDYhXwkmfOly8iTdp5TEcJFWZD2D7SIkUc= +github.com/mschoch/smat v0.2.0 h1:8imxQsjDm8yFEAVBe7azKmKSgzSkZXDuKkSq9374khM= +github.com/mschoch/smat v0.2.0/go.mod h1:kc9mz7DoBKqDyiRL7VZN8KvXQMWeTaVnttLRXOlotKw= github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/ngrok/firewall_toolkit v0.0.18 h1:/+Rx/5qXXO8FpOoKpPnyR2nw8Y3KumuulSNZa3XGZE8= @@ -294,6 +418,8 @@ github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJw github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M= github.com/orcaman/concurrent-map/v2 v2.0.1 h1:jOJ5Pg2w1oeB6PeDurIYf6k9PQ+aTITr/6lP/L/zp6c= github.com/orcaman/concurrent-map/v2 v2.0.1/go.mod h1:9Eq3TG2oBe5FirmYWQfYO5iH1q0Jv47PLaNK++uCdOM= +github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc= +github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ= github.com/paulmach/orb v0.11.1 h1:3koVegMC4X/WeiXYz9iswopaTwMem53NzTJuTF20JzU= github.com/paulmach/orb v0.11.1/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/EnuLaLU= github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= @@ -304,6 +430,8 @@ github.com/perimeterx/marshmallow v1.1.5/go.mod h1:dsXbUu8CRzfYP5a87xpp0xq9S3u0V github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 h1:GFCKgmp0tecUJ0sJuv4pzYCqS9+RGSn52M3FUwPs+uo= +github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10/go.mod h1:t/avpk3KcrXxUnYOhZhMXJlSEyie6gQbtLq5NM3loB8= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -342,6 +470,8 @@ github.com/speakeasy-api/jsonpath v0.6.0 h1:IhtFOV9EbXplhyRqsVhHoBmmYjblIRh5D1/g github.com/speakeasy-api/jsonpath v0.6.0/go.mod h1:ymb2iSkyOycmzKwbEAYPJV/yi2rSmvBCLZJcyD+VVWw= github.com/speakeasy-api/openapi-overlay v0.10.2 h1:VOdQ03eGKeiHnpb1boZCGm7x8Haj6gST0P3SGTX95GU= github.com/speakeasy-api/openapi-overlay v0.10.2/go.mod h1:n0iOU7AqKpNFfEt6tq7qYITC4f0yzVVdFw0S7hukemg= +github.com/spiffe/go-spiffe/v2 v2.6.0 h1:l+DolpxNWYgruGQVV0xsfeya3CsC7m8iBzDnMpsbLuo= +github.com/spiffe/go-spiffe/v2 v2.6.0/go.mod h1:gm2SeUoMZEtpnzPNs2Csc0D/gX33k1xIx7lEzqblHEs= github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= @@ -379,6 +509,8 @@ github.com/vmware-labs/yaml-jsonpath v0.3.2 h1:/5QKeCBGdsInyDCyVNLbXyilb61MXGi9N github.com/vmware-labs/yaml-jsonpath v0.3.2/go.mod h1:U6whw1z03QyqgWdgXxvVnQ90zN1BWz5V+51Ewf8k+rQ= github.com/woodsbury/decimal128 v1.4.0 h1:xJATj7lLu4f2oObouMt2tgGiElE5gO6mSWUjQsBgUlc= github.com/woodsbury/decimal128 v1.4.0/go.mod h1:BP46FUrVjVhdTbKT+XuQh2xfQaGki9LMIRJSFuh6THU= +github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0 h1:3UeQBvD0TFrlVjOeLOBz+CPAI8dnbqNSVwUwRrkp7vQ= +github.com/wsxiaoys/terminal v0.0.0-20160513160801-0940f3fc43a0/go.mod h1:IXCdmsXIht47RaVFLEdVnh1t+pgYtTAhQGj73kz+2DM= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= @@ -397,6 +529,8 @@ go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/bridges/otelzap v0.14.0 h1:2nKw2ZXZOC0N8RBsBbYwGwfKR7kJWzzyCZ6QfUGW/es= go.opentelemetry.io/contrib/bridges/otelzap v0.14.0/go.mod h1:kvyVt0WEI5BB6XaIStXPIkCSQ2nSkyd8IZnAHLEXge4= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0 h1:kWRNZMsfBHZ+uHjiH4y7Etn2FK26LAGkNFw7RHv1DhE= +go.opentelemetry.io/contrib/detectors/gcp v1.39.0/go.mod h1:t/OGqzHBa5v6RHZwrDBJ2OirWc+4q/w2fTbLZwAKjTk= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0 h1:XmiuHzgJt067+a6kwyAzkhXooYVv3/TOw9cM2VfJgUM= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.65.0/go.mod h1:KDgtbWKTQs4bM+VPUr6WlL9m/WXcmkCcBlIzqxPGzmI= go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.64.0 h1:ssfIgGNANqpVFCndZvcuyKbl0g+UAVcbBcqGkG28H0Y= @@ -413,6 +547,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0 h1:88Y4s2C8oTui1LGM6bT go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.43.0/go.mod h1:Vl1/iaggsuRlrHf/hfPJPvVag77kKyvrLeD10kpMl+A= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0 h1:in9O8ESIOlwJAEGTkkf34DesGRAc/Pn8qJ7k3r/42LM= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.39.0/go.mod h1:Rp0EXBm5tfnv0WL+ARyO/PHBEaEAT8UUHQ6AGJcSq6c= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0 h1:5gn2urDL/FBnK8OkCfD1j3/ER79rUuTYmCvlXBKeYL8= +go.opentelemetry.io/otel/exporters/stdout/stdoutmetric v1.39.0/go.mod h1:0fBG6ZJxhqByfFZDwSwpZGzJU671HkwpWaNe2t4VUPI= go.opentelemetry.io/otel/log v0.19.0 h1:KUZs/GOsw79TBBMfDWsXS+KZ4g2Ckzksd1ymzsIEbo4= go.opentelemetry.io/otel/log v0.19.0/go.mod h1:5DQYeGmxVIr4n0/BcJvF4upsraHjg6vudJJpnkL6Ipk= go.opentelemetry.io/otel/log/logtest v0.15.0 h1:porNFuxAjodl6LhePevOc3n7bo3Wi3JhGXNWe7KP8iU= @@ -468,6 +604,8 @@ golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qx golang.org/x/net v0.0.0-20220225172249-27dd8689420f/go.mod h1:CfG3xpIq0wQ8r1q4Su4UZFWDARRcnwPjda9FqA0JpMk= golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= +golang.org/x/oauth2 v0.36.0 h1:peZ/1z27fi9hUOFCAZaHyrpWG5lwe0RJEEEeH0ThlIs= +golang.org/x/oauth2 v0.36.0/go.mod h1:YDBUJMTkDnJS+A4BP4eZBjCqtokkg1hODuPjwiGPO7Q= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= @@ -512,6 +650,8 @@ golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= +golang.org/x/time v0.14.0 h1:MRx4UaLrDotUKUdCIqzPC48t1Y9hANFKIRpNx+Te8PI= +golang.org/x/time v0.14.0/go.mod h1:eL/Oa2bBBK0TkX57Fyni+NgnyQQN4LitPmob2Hjnqw4= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= @@ -526,6 +666,10 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= +google.golang.org/api v0.267.0 h1:w+vfWPMPYeRs8qH1aYYsFX68jMls5acWl/jocfLomwE= +google.golang.org/api v0.267.0/go.mod h1:Jzc0+ZfLnyvXma3UtaTl023TdhZu6OMBP9tJ+0EmFD0= +google.golang.org/genproto v0.0.0-20260128011058-8636f8732409 h1:VQZ/yAbAtjkHgH80teYd2em3xtIkkHd7ZhqfH2N9CsM= +google.golang.org/genproto v0.0.0-20260128011058-8636f8732409/go.mod h1:rxKD3IEILWEu3P44seeNOAwZN4SaoKaQ/2eTg4mM6EM= google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9/go.mod h1:7QBABkRtR8z+TEnmXTqIqwJLlzrZKVfAUm7tY3yGv0M= google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 h1:m8qni9SQFH0tJc1X0vmnpw/0t+AImlSvp30sEupozUg= diff --git a/tests/integration/internal/tests/api/sandboxes/sandbox_rapid_pause_resume_test.go b/tests/integration/internal/tests/api/sandboxes/sandbox_rapid_pause_resume_test.go new file mode 100644 index 0000000000..0cdbd75eb9 --- /dev/null +++ b/tests/integration/internal/tests/api/sandboxes/sandbox_rapid_pause_resume_test.go @@ -0,0 +1,191 @@ +package sandboxes + +import ( + "context" + "crypto/sha256" + "io" + "net/http" + "os" + "testing" + "time" + + "github.com/google/uuid" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/e2b-dev/infra/packages/shared/pkg/storage" + "github.com/e2b-dev/infra/packages/shared/pkg/storage/header" + "github.com/e2b-dev/infra/tests/integration/internal/api" + "github.com/e2b-dev/infra/tests/integration/internal/setup" + "github.com/e2b-dev/infra/tests/integration/internal/utils" +) + +// TestSandboxRapidSnapshotForkChain builds a tree of snapshots in rapid +// succession, exercising the multi-layer upload coordination: +// +// A +// β”œβ”€β”€ B ── D +// └── C +// +// Each child snapshot is created (and a sandbox forked from it) before the +// parent's upload has finalized. The verifier reads each build's V4 header +// directly from object storage and checks (a) ancestor lineage in the Builds +// map, and (b) self's data file checksum against BuildData.Checksum. If the +// inter-uploader sync was wrong, ancestors would be missing or self's data +// would not match its recorded checksum. +func TestSandboxRapidSnapshotForkChain(t *testing.T) { + t.Parallel() + c := setup.GetAPIClient() + ctx := t.Context() + + rootSbx := utils.SetupSandboxWithCleanup(t, c, utils.WithAutoPause(false)) + + snapA := createSnapshotTemplateWithCleanup(t, c, rootSbx.SandboxID, nil) + buildA := defaultTagBuildID(t, ctx, c, snapA.SnapshotID) + + sbxB := utils.SetupSandboxWithCleanup(t, c, + utils.WithTemplateID(snapA.SnapshotID), + utils.WithAutoPause(false), + ) + snapB := createSnapshotTemplateWithCleanup(t, c, sbxB.SandboxID, nil) + buildB := defaultTagBuildID(t, ctx, c, snapB.SnapshotID) + + sbxC := utils.SetupSandboxWithCleanup(t, c, + utils.WithTemplateID(snapA.SnapshotID), + utils.WithAutoPause(false), + ) + snapC := createSnapshotTemplateWithCleanup(t, c, sbxC.SandboxID, nil) + buildC := defaultTagBuildID(t, ctx, c, snapC.SnapshotID) + + sbxD := utils.SetupSandboxWithCleanup(t, c, + utils.WithTemplateID(snapB.SnapshotID), + utils.WithAutoPause(false), + ) + snapD := createSnapshotTemplateWithCleanup(t, c, sbxD.SandboxID, nil) + buildD := defaultTagBuildID(t, ctx, c, snapD.SnapshotID) + + chain := []chainNode{ + {name: "A", templateID: snapA.SnapshotID, buildID: buildA, parent: ""}, + {name: "B", templateID: snapB.SnapshotID, buildID: buildB, parent: buildA}, + {name: "C", templateID: snapC.SnapshotID, buildID: buildC, parent: buildA}, + {name: "D", templateID: snapD.SnapshotID, buildID: buildD, parent: buildB}, + } + + verifyChainOnStorage(t, ctx, chain) +} + +type chainNode struct { + name string + templateID string + buildID string + parent string // empty for root +} + +// verifyChainOnStorage loads each build's V4 memfile/rootfs headers directly +// from the configured storage backend and asserts (a) ancestor lineage in +// the Builds map and (b) self's data file matches its recorded checksum. +// +// Skipped when TEMPLATE_BUCKET_NAME / STORAGE_PROVIDER aren't set. +func verifyChainOnStorage(t *testing.T, ctx context.Context, chain []chainNode) { + t.Helper() + + if os.Getenv("TEMPLATE_BUCKET_NAME") == "" && !storage.IsLocal() { + t.Log("storage env not configured (TEMPLATE_BUCKET_NAME / STORAGE_PROVIDER); skipping direct storage verification") + + return + } + + persistence, err := storage.GetStorageProvider(ctx, storage.TemplateStorageConfig) + require.NoError(t, err, "build storage provider") + + ancestors := make(map[string][]string, len(chain)) + for _, node := range chain { + var chainAncestors []string + if node.parent != "" { + chainAncestors = append(chainAncestors, ancestors[node.parent]...) + chainAncestors = append(chainAncestors, node.parent) + } + ancestors[node.buildID] = chainAncestors + + paths := storage.Paths{BuildID: node.buildID} + verifyHeader(t, ctx, persistence, node, paths, storage.MemfileName, paths.MemfileHeader(), storage.MemfileObjectType, chainAncestors) + verifyHeader(t, ctx, persistence, node, paths, storage.RootfsName, paths.RootfsHeader(), storage.RootFSObjectType, chainAncestors) + } +} + +func verifyHeader(t *testing.T, ctx context.Context, persistence storage.StorageProvider, node chainNode, paths storage.Paths, fileName, headerPath string, objType storage.SeekableObjectType, ancestors []string) { + t.Helper() + + h := loadHeaderWithPolling(t, ctx, persistence, headerPath, node.name, fileName) + require.NotNilf(t, h.Builds, "%s/%s: V4 header should carry Builds map", node.name, fileName) + + selfUUID := uuid.MustParse(node.buildID) + bd, ok := h.Builds[selfUUID] + require.Truef(t, ok, "%s/%s: Builds map missing self entry %s", node.name, fileName, node.buildID) + + for _, ancestor := range ancestors { + ancUUID := uuid.MustParse(ancestor) + _, ok := h.Builds[ancUUID] + assert.Truef(t, ok, "%s/%s: Builds map missing ancestor %s β€” child finalized before parent's SwapHeader", node.name, fileName, ancestor) + } + + verifyChecksum(t, ctx, persistence, node, paths, fileName, objType, bd) +} + +// verifyChecksum streams self's data file through SHA-256 and compares to +// BuildData.Checksum. For unchanged files (empty diff) the entry has zero +// values and this is a no-op. +func verifyChecksum(t *testing.T, ctx context.Context, persistence storage.StorageProvider, node chainNode, paths storage.Paths, fileName string, objType storage.SeekableObjectType, bd header.BuildData) { + t.Helper() + + if bd.Size == 0 { + return // no data uploaded for this file in this layer + } + + dataPath := paths.DataFile(fileName, bd.FrameData.CompressionType()) + + obj, err := persistence.OpenSeekable(ctx, dataPath, objType) + require.NoErrorf(t, err, "%s/%s: open data file %s", node.name, fileName, dataPath) + + rc, err := obj.OpenRangeReader(ctx, 0, bd.Size, bd.FrameData) + require.NoErrorf(t, err, "%s/%s: open range reader", node.name, fileName) + defer rc.Close() + + hasher := sha256.New() + n, err := io.Copy(hasher, rc) + require.NoErrorf(t, err, "%s/%s: stream data through hasher", node.name, fileName) + require.Equalf(t, bd.Size, n, "%s/%s: streamed bytes (%d) differ from BuildData.Size (%d)", node.name, fileName, n, bd.Size) + + var got [32]byte + copy(got[:], hasher.Sum(nil)) + require.Equalf(t, bd.Checksum, got, "%s/%s: data SHA-256 does not match BuildData.Checksum β€” upload corrupted or checksum stale", node.name, fileName) +} + +// loadHeaderWithPolling waits for the V4 header to appear in object storage β€” +// snapshot uploads are async, so the header may not be present immediately +// after the snapshot endpoint returns 201. +func loadHeaderWithPolling(t *testing.T, ctx context.Context, persistence storage.StorageProvider, path, name, fileLabel string) *header.Header { + t.Helper() + + var h *header.Header + require.Eventually(t, func() bool { + var err error + h, err = header.LoadHeader(ctx, persistence, path) + + return err == nil && h != nil + }, 2*time.Minute, 500*time.Millisecond, "%s/%s: %s never appeared in storage", name, fileLabel, path) + + return h +} + +func defaultTagBuildID(t *testing.T, ctx context.Context, c *api.ClientWithResponses, snapshotID string) string { + t.Helper() + + tagsResp, err := c.GetTemplatesTemplateIDTagsWithResponse(ctx, snapshotID, setup.WithAPIKey()) + require.NoError(t, err) + require.Equal(t, http.StatusOK, tagsResp.StatusCode()) + require.NotNil(t, tagsResp.JSON200) + require.NotEmpty(t, *tagsResp.JSON200) + + return findDefaultTagBuildID(t, *tagsResp.JSON200).String() +}