-
Notifications
You must be signed in to change notification settings - Fork 292
feat(api): add PATCH /sandboxes/{id}/metadata #2464
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,48 @@ | ||
| package handlers | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "net/http" | ||
|
|
||
| "github.com/gin-gonic/gin" | ||
|
|
||
| "github.com/e2b-dev/infra/packages/api/internal/api" | ||
| "github.com/e2b-dev/infra/packages/api/internal/utils" | ||
| "github.com/e2b-dev/infra/packages/auth/pkg/auth" | ||
| "github.com/e2b-dev/infra/packages/shared/pkg/ginutils" | ||
| "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" | ||
| ) | ||
|
|
||
| func (a *APIStore) PatchSandboxesSandboxIDMetadata( | ||
| c *gin.Context, | ||
| sandboxID string, | ||
| ) { | ||
| ctx := c.Request.Context() | ||
|
|
||
| var err error | ||
| sandboxID, err = utils.ShortID(sandboxID) | ||
| if err != nil { | ||
| a.sendAPIStoreError(c, http.StatusBadRequest, "Invalid sandbox ID") | ||
|
|
||
| return | ||
| } | ||
|
|
||
| team := auth.MustGetTeamInfo(c) | ||
|
|
||
| body, err := ginutils.ParseBody[api.PatchSandboxesSandboxIDMetadataJSONRequestBody](ctx, c) | ||
| if err != nil { | ||
| a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err)) | ||
| telemetry.ReportCriticalError(ctx, "error when parsing request", err) | ||
|
|
||
| return | ||
| } | ||
|
|
||
| if apiErr := a.orchestrator.PatchSandboxMetadata(ctx, team.ID, sandboxID, body); apiErr != nil { | ||
| telemetry.ReportErrorByCode(ctx, apiErr.Code, "error patching sandbox metadata", apiErr.Err) | ||
| a.sendAPIStoreError(c, apiErr.Code, apiErr.ClientMsg) | ||
|
|
||
| return | ||
| } | ||
|
|
||
| c.Status(http.StatusNoContent) | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,121 @@ | ||
| package orchestrator | ||
|
|
||
| import ( | ||
| "context" | ||
| "errors" | ||
| "fmt" | ||
| "maps" | ||
| "net/http" | ||
|
|
||
| "github.com/google/uuid" | ||
| "go.opentelemetry.io/otel/attribute" | ||
| "go.opentelemetry.io/otel/trace" | ||
| "google.golang.org/grpc/codes" | ||
| "google.golang.org/grpc/status" | ||
|
|
||
| "github.com/e2b-dev/infra/packages/api/internal/api" | ||
| "github.com/e2b-dev/infra/packages/api/internal/sandbox" | ||
| "github.com/e2b-dev/infra/packages/api/internal/utils" | ||
| orchestratorgrpc "github.com/e2b-dev/infra/packages/shared/pkg/grpc/orchestrator" | ||
| "github.com/e2b-dev/infra/packages/shared/pkg/telemetry" | ||
| ) | ||
|
|
||
| // PatchSandboxMetadata applies a JSON-Merge-Patch-style update: non-nil values | ||
| // upsert the key, a nil pointer (or empty string) removes it, and absent keys | ||
| // are left alone. | ||
| func (o *Orchestrator) PatchSandboxMetadata( | ||
| ctx context.Context, | ||
| teamID uuid.UUID, | ||
| sandboxID string, | ||
| patch map[string]*string, | ||
| ) *api.APIError { | ||
| var merged map[string]string | ||
|
|
||
| updateFunc := func(sbx sandbox.Sandbox) (sandbox.Sandbox, error) { | ||
| if sbx.State != sandbox.StateRunning { | ||
| return sbx, &sandbox.NotRunningError{SandboxID: sandboxID, State: sbx.State} | ||
| } | ||
|
|
||
| merged = applyMetadataPatch(sbx.Metadata, patch) | ||
| sbx.Metadata = merged | ||
|
|
||
| return sbx, nil | ||
| } | ||
|
|
||
| var sbxNotRunningErr *sandbox.NotRunningError | ||
|
|
||
| sbx, err := o.sandboxStore.Update(ctx, teamID, sandboxID, updateFunc) | ||
| if err != nil { | ||
| switch { | ||
| case errors.As(err, &sbxNotRunningErr): | ||
| return &api.APIError{Code: http.StatusConflict, ClientMsg: utils.SandboxChangingStateMsg(sandboxID, sbxNotRunningErr.State), Err: err} | ||
| case errors.Is(err, sandbox.ErrNotFound): | ||
| return &api.APIError{Code: http.StatusNotFound, ClientMsg: utils.SandboxNotFoundMsg(sandboxID), Err: err} | ||
| default: | ||
| return &api.APIError{Code: http.StatusInternalServerError, ClientMsg: "Error patching sandbox metadata", Err: err} | ||
| } | ||
| } | ||
|
|
||
| return o.patchSandboxMetadataOnNode(ctx, sbx, merged) | ||
|
Check failure on line 59 in packages/api/internal/orchestrator/patch_metadata.go
|
||
|
Comment on lines
+47
to
+59
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 The two-phase write in Extended reasoning...What the bug is and how it manifests
Bug 1 – No rollback on gRPC failure (split-brain)
Bug 2 – Concurrent PATCH calls silently overwrite each other at the node The Redis-level Why existing code does not prevent this The orchestrator-side Step-by-step proof of Bug 1
Step-by-step proof of Bug 2 Initial metadata: How to fix Bug 1: Capture old metadata before calling |
||
| } | ||
|
|
||
| func applyMetadataPatch(current map[string]string, patch map[string]*string) map[string]string { | ||
| out := make(map[string]string, len(current)+len(patch)) | ||
| maps.Copy(out, current) | ||
| for k, v := range patch { | ||
| if v == nil || *v == "" { | ||
| delete(out, k) | ||
| } else { | ||
| out[k] = *v | ||
| } | ||
| } | ||
|
|
||
| return out | ||
| } | ||
|
|
||
| func (o *Orchestrator) patchSandboxMetadataOnNode( | ||
| ctx context.Context, | ||
| sbx sandbox.Sandbox, | ||
| metadata map[string]string, | ||
| ) *api.APIError { | ||
| ctx, span := tracer.Start(ctx, "patch-sandbox-metadata-on-node", | ||
| trace.WithAttributes( | ||
| attribute.String("instance.id", sbx.SandboxID), | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. telemetry.WithSandboxID |
||
| ), | ||
| ) | ||
| defer span.End() | ||
|
|
||
| node := o.getOrConnectNode(ctx, sbx.ClusterID, sbx.NodeID) | ||
| if node == nil { | ||
| return &api.APIError{ | ||
| Code: http.StatusInternalServerError, | ||
| ClientMsg: fmt.Sprintf("Node hosting sandbox '%s' not found", sbx.SandboxID), | ||
| Err: fmt.Errorf("node '%s' not found for cluster '%s'", sbx.NodeID, sbx.ClusterID), | ||
| } | ||
| } | ||
|
|
||
| client, ctx := node.GetClient(ctx) | ||
| _, err := client.Sandbox.Update(ctx, &orchestratorgrpc.SandboxUpdateRequest{ | ||
| SandboxId: sbx.SandboxID, | ||
| Metadata: &orchestratorgrpc.SandboxMetadataUpdate{Entries: metadata}, | ||
| }) | ||
| if err != nil { | ||
| grpcErr, ok := status.FromError(err) | ||
| if ok && grpcErr.Code() == codes.NotFound { | ||
| return &api.APIError{Code: http.StatusNotFound, ClientMsg: utils.SandboxNotFoundMsg(sbx.SandboxID), Err: err} | ||
| } | ||
|
|
||
| err = utils.UnwrapGRPCError(err) | ||
| telemetry.ReportCriticalError(ctx, "failed to patch sandbox metadata on node", err) | ||
|
|
||
| return &api.APIError{ | ||
| Code: http.StatusInternalServerError, | ||
| ClientMsg: "Error applying metadata to sandbox", | ||
| Err: fmt.Errorf("failed to patch sandbox metadata on node: %w", err), | ||
| } | ||
| } | ||
|
|
||
| telemetry.ReportEvent(ctx, "Patched sandbox metadata on node") | ||
|
|
||
| return nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -101,6 +101,11 @@ message SandboxUpdateRequest { | |
| // All fields are optional — only set fields are applied. | ||
| optional google.protobuf.Timestamp end_time = 2; | ||
| optional SandboxNetworkEgressConfig egress = 3; | ||
| optional SandboxMetadataUpdate metadata = 4; | ||
| } | ||
|
|
||
| message SandboxMetadataUpdate { | ||
| map<string, string> entries = 1; | ||
| } | ||
|
|
||
| message SandboxDeleteRequest { | ||
|
|
@@ -127,6 +132,10 @@ message RunningSandbox { | |
|
|
||
| google.protobuf.Timestamp start_time = 3; | ||
| google.protobuf.Timestamp end_time = 4; | ||
|
|
||
| // Live user-facing metadata tags. Authoritative over config.metadata, which | ||
| // reflects only the create-time snapshot. | ||
| map<string, string> metadata = 5; | ||
|
Comment on lines
+135
to
+138
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why od we need this change? |
||
| } | ||
|
|
||
| message SandboxListResponse { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -336,6 +336,17 @@ | |
| }) | ||
| } | ||
|
|
||
| if req.GetMetadata() != nil { | ||
|
levb marked this conversation as resolved.
|
||
| updates = append(updates, func(_ context.Context) (func(context.Context), error) { | ||
| oldMetadata := sbx.GetAPIMetadata() | ||
| sbx.SetAPIMetadata(req.GetMetadata().GetEntries()) | ||
|
|
||
| return func(_ context.Context) { | ||
| sbx.SetAPIMetadata(oldMetadata) | ||
| }, nil | ||
| }) | ||
| } | ||
|
|
||
| if err := utils.ApplyAllOrNone(ctx, updates); err != nil { | ||
| telemetry.ReportCriticalError(ctx, "failed to update sandbox", err) | ||
|
|
||
|
|
@@ -355,7 +366,6 @@ | |
| "allowed_domains": egress.GetAllowedDomains(), | ||
| } | ||
| } | ||
|
|
||
| go s.sbxEventsService.Publish( | ||
| context.WithoutCancel(ctx), | ||
| teamID, | ||
|
|
@@ -401,6 +411,7 @@ | |
| ClientId: s.info.ClientId, | ||
| StartTime: timestamppb.New(startedAt), | ||
| EndTime: timestamppb.New(sbx.GetEndAt()), | ||
| Metadata: sbx.GetAPIMetadata(), | ||
| }) | ||
| } | ||
|
|
||
|
|
@@ -629,12 +640,15 @@ | |
| ) | ||
| if err != nil { | ||
| telemetry.ReportCriticalError(ctx, "error resuming sandbox after checkpoint", err, telemetry.WithSandboxID(in.GetSandboxId())) | ||
|
|
||
| return nil, status.Errorf(codes.Internal, "error resuming sandbox after checkpoint: %s", err) | ||
| } | ||
| // ResumeSandbox seeds apiMetadata from the (immutable) APIStoredConfig | ||
| // snapshot — override with the live value so any PATCH carries over. | ||
| resumedSbx.SetAPIMetadata(sbx.GetAPIMetadata()) | ||
|
|
||
| // Collect prefetch data immediately after resume while it's most accurate | ||
| prefetchData, prefetchErr := resumedSbx.MemoryPrefetchData(ctx) | ||
|
Check failure on line 651 in packages/orchestrator/pkg/server/sandboxes.go
|
||
|
Comment on lines
643
to
651
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🔴 The Checkpoint handler calls Extended reasoning...What the bug is and how it manifests Inside The specific code path that triggers it
Why existing code does not prevent it The What the impact would be Consider:
Any subsequent How to fix it Option A (preferred — atomic): Pass the live metadata into Option B (surgical): Extract Step-by-step proof of divergence
|
||
| if prefetchErr != nil { | ||
| sbxlogger.I(resumedSbx).Warn(ctx, "failed to get prefetch data for checkpoint", zap.Error(prefetchErr)) | ||
| } | ||
|
|
@@ -692,13 +706,13 @@ | |
| } | ||
|
|
||
| buildId := "" | ||
| eventData := make(map[string]any) | ||
| if sbx.APIStoredConfig != nil { | ||
| buildId = sbx.APIStoredConfig.GetBuildId() | ||
| if sbx.APIStoredConfig.Metadata != nil { | ||
| // Copy the map to avoid race conditions | ||
| eventData["sandbox_metadata"] = utils.ShallowCopyMap(sbx.APIStoredConfig.GetMetadata()) | ||
| } | ||
| } | ||
|
|
||
| eventData := make(map[string]any) | ||
| if md := sbx.GetAPIMetadata(); len(md) > 0 { | ||
| eventData["sandbox_metadata"] = md | ||
| } | ||
|
|
||
| return teamID, buildId, eventData | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.