Skip to content
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
541 changes: 379 additions & 162 deletions packages/api/internal/api/api.gen.go

Large diffs are not rendered by default.

50 changes: 50 additions & 0 deletions packages/api/internal/handlers/sandbox_metadata_update.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
package handlers

import (
"fmt"
"net/http"

"github.com/gin-gonic/gin"

"github.com/e2b-dev/infra/packages/api/internal/api"
"github.com/e2b-dev/infra/packages/api/internal/utils"
"github.com/e2b-dev/infra/packages/auth/pkg/auth"
"github.com/e2b-dev/infra/packages/shared/pkg/ginutils"
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
)

func (a *APIStore) PutSandboxesSandboxIDMetadata(
c *gin.Context,
sandboxID string,
) {
ctx := c.Request.Context()

var err error
sandboxID, err = utils.ShortID(sandboxID)
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, "Invalid sandbox ID")

return
}

team := auth.MustGetTeamInfo(c)

body, err := ginutils.ParseBody[api.PutSandboxesSandboxIDMetadataJSONRequestBody](ctx, c)
if err != nil {
a.sendAPIStoreError(c, http.StatusBadRequest, fmt.Sprintf("Error when parsing request: %s", err))
telemetry.ReportCriticalError(ctx, "error when parsing request", err)

return
}

metadata := map[string]string(body)

if apiErr := a.orchestrator.UpdateSandboxMetadata(ctx, team.ID, sandboxID, metadata); apiErr != nil {
telemetry.ReportErrorByCode(ctx, apiErr.Code, "error updating sandbox metadata", apiErr.Err)
a.sendAPIStoreError(c, apiErr.Code, apiErr.ClientMsg)

return
}

c.Status(http.StatusNoContent)
}
100 changes: 100 additions & 0 deletions packages/api/internal/orchestrator/update_metadata.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
package orchestrator

import (
"context"
"errors"
"fmt"
"net/http"

"github.com/google/uuid"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"

"github.com/e2b-dev/infra/packages/api/internal/api"
"github.com/e2b-dev/infra/packages/api/internal/sandbox"
"github.com/e2b-dev/infra/packages/api/internal/utils"
orchestratorgrpc "github.com/e2b-dev/infra/packages/shared/pkg/grpc/orchestrator"
"github.com/e2b-dev/infra/packages/shared/pkg/telemetry"
)

func (o *Orchestrator) UpdateSandboxMetadata(
ctx context.Context,
teamID uuid.UUID,
sandboxID string,
metadata map[string]string,
) *api.APIError {
updateFunc := func(sbx sandbox.Sandbox) (sandbox.Sandbox, error) {
if sbx.State != sandbox.StateRunning {
return sbx, &sandbox.NotRunningError{SandboxID: sandboxID, State: sbx.State}
}

sbx.Metadata = metadata

return sbx, nil
}

var sbxNotRunningErr *sandbox.NotRunningError

sbx, err := o.sandboxStore.Update(ctx, teamID, sandboxID, updateFunc)
if err != nil {
switch {
case errors.As(err, &sbxNotRunningErr):
return &api.APIError{Code: http.StatusConflict, ClientMsg: utils.SandboxChangingStateMsg(sandboxID, sbxNotRunningErr.State), Err: err}
case errors.Is(err, sandbox.ErrNotFound):
return &api.APIError{Code: http.StatusNotFound, ClientMsg: utils.SandboxNotFoundMsg(sandboxID), Err: err}
default:
return &api.APIError{Code: http.StatusInternalServerError, ClientMsg: "Error updating sandbox metadata", Err: err}
}
}

return o.updateSandboxMetadataOnNode(ctx, sbx, metadata)

Check failure on line 52 in packages/api/internal/orchestrator/update_metadata.go

View check run for this annotation

Claude / Claude Code Review

No rollback of API store when gRPC metadata node update fails

In UpdateSandboxMetadata, the API store is updated before the gRPC call to the node, but there is no rollback if the gRPC call fails — leaving the API store with new metadata while the running sandbox on the node retains the old metadata. Any subsequent GET /sandboxes call will return the new metadata while the actual sandbox environment has different values; since the API store is Redis-backed this divergence persists across restarts. Fix by saving the old metadata before sandboxStore.Update an
Comment thread
levb marked this conversation as resolved.
Outdated
}

func (o *Orchestrator) updateSandboxMetadataOnNode(
ctx context.Context,
sbx sandbox.Sandbox,
metadata map[string]string,
) *api.APIError {
ctx, span := tracer.Start(ctx, "update-sandbox-metadata-on-node",
trace.WithAttributes(
attribute.String("instance.id", sbx.SandboxID),
),
)
defer span.End()

node := o.getOrConnectNode(ctx, sbx.ClusterID, sbx.NodeID)
if node == nil {
return &api.APIError{
Code: http.StatusInternalServerError,
ClientMsg: fmt.Sprintf("Node hosting sandbox '%s' not found", sbx.SandboxID),
Err: fmt.Errorf("node '%s' not found for cluster '%s'", sbx.NodeID, sbx.ClusterID),
}
}

client, ctx := node.GetClient(ctx)
_, err := client.Sandbox.Update(ctx, &orchestratorgrpc.SandboxUpdateRequest{
SandboxId: sbx.SandboxID,
Metadata: &orchestratorgrpc.SandboxMetadataUpdate{Entries: metadata},
})
if err != nil {
grpcErr, ok := status.FromError(err)
if ok && grpcErr.Code() == codes.NotFound {
return &api.APIError{Code: http.StatusNotFound, ClientMsg: utils.SandboxNotFoundMsg(sbx.SandboxID), Err: err}
}

err = utils.UnwrapGRPCError(err)
telemetry.ReportCriticalError(ctx, "failed to update sandbox metadata on node", err)

return &api.APIError{
Code: http.StatusInternalServerError,
ClientMsg: "Error applying metadata to sandbox",
Err: fmt.Errorf("failed to update sandbox metadata on node: %w", err),
}
}

telemetry.ReportEvent(ctx, "Updated sandbox metadata on node")

return nil
}
5 changes: 5 additions & 0 deletions packages/orchestrator/orchestrator.proto
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,11 @@ message SandboxUpdateRequest {
// All fields are optional — only set fields are applied.
optional google.protobuf.Timestamp end_time = 2;
optional SandboxNetworkEgressConfig egress = 3;
optional SandboxMetadataUpdate metadata = 4;
}

message SandboxMetadataUpdate {
map<string, string> entries = 1;
}

message SandboxDeleteRequest {
Expand Down
28 changes: 27 additions & 1 deletion packages/orchestrator/pkg/sandbox/sandbox.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,9 +182,10 @@
Config *Config
Runtime RuntimeMetadata

rwmu sync.RWMutex // protects startedAt, endAt
rwmu sync.RWMutex // protects startedAt, endAt, metadata
startedAt time.Time
endAt time.Time
metadata map[string]string
}

// GetEndAt returns the sandbox end time in a thread-safe manner.
Expand All @@ -203,6 +204,29 @@
m.endAt = t
}

// GetMetadata returns the sandbox metadata. The caller must not mutate it;
// SetMetadata only replaces the slot, so the returned reference is stable.
func (m *Metadata) GetMetadata() map[string]string {
m.rwmu.RLock()
defer m.rwmu.RUnlock()

return m.metadata
}

// SetMetadata replaces the sandbox metadata in a thread-safe manner. A nil
// map is normalized to an empty map so GetMetadata never returns nil. The
// caller must not mutate the map after handing it over.
func (m *Metadata) SetMetadata(metadata map[string]string) {
if metadata == nil {
metadata = map[string]string{}
}

m.rwmu.Lock()
defer m.rwmu.Unlock()

m.metadata = metadata
}

Check failure on line 228 in packages/orchestrator/pkg/sandbox/sandbox.go

View check run for this annotation

Claude / Claude Code Review

SetMetadata does not sync APIStoredConfig, causing Checkpoint to resume with stale metadata

SetMetadata() in sandbox.go:219-228 updates only the live Metadata.metadata field but never syncs sbx.APIStoredConfig.Metadata, so the Checkpoint handler resumes sandboxes with stale (pre-update) metadata. The List RPC and memory-backend API recovery are similarly affected because both read from APIStoredConfig. Fix by also updating APIStoredConfig.Metadata inside SetMetadata(), or by reading sbx.GetMetadata() instead of forwarding APIStoredConfig verbatim in the Checkpoint path.
Comment thread
levb marked this conversation as resolved.
Outdated

type Sandbox struct {
*Resources
*Metadata
Expand Down Expand Up @@ -458,6 +482,7 @@

startedAt: time.Now(),
endAt: time.Now().Add(sandboxTimeout),
metadata: apiConfigToStore.GetMetadata(),
}

sbx := &Sandbox{
Expand Down Expand Up @@ -805,6 +830,7 @@

startedAt: startedAt,
endAt: endAt,
metadata: apiConfigToStore.GetMetadata(),
}

sbx := &Sandbox{
Expand Down
21 changes: 15 additions & 6 deletions packages/orchestrator/pkg/server/sandboxes.go
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,16 @@ func (s *Server) Update(ctx context.Context, req *orchestrator.SandboxUpdateRequ
})
}

if req.GetMetadata() != nil {
Comment thread
levb marked this conversation as resolved.
updates = append(updates, func(_ context.Context) (func(context.Context), error) {
oldMetadata := sbx.GetMetadata()

sbx.SetMetadata(req.GetMetadata().GetEntries())
Comment thread
levb marked this conversation as resolved.
Outdated
Comment thread
levb marked this conversation as resolved.
Outdated

return func(_ context.Context) { sbx.SetMetadata(oldMetadata) }, nil
})
}

if err := utils.ApplyAllOrNone(ctx, updates); err != nil {
telemetry.ReportCriticalError(ctx, "failed to update sandbox", err)

Expand All @@ -355,7 +365,6 @@ func (s *Server) Update(ctx context.Context, req *orchestrator.SandboxUpdateRequ
"allowed_domains": egress.GetAllowedDomains(),
}
}

go s.sbxEventsService.Publish(
context.WithoutCancel(ctx),
teamID,
Expand Down Expand Up @@ -692,13 +701,13 @@ func (s *Server) prepareSandboxEventData(ctx context.Context, sbx *sandbox.Sandb
}

buildId := ""
eventData := make(map[string]any)
if sbx.APIStoredConfig != nil {
buildId = sbx.APIStoredConfig.GetBuildId()
if sbx.APIStoredConfig.Metadata != nil {
// Copy the map to avoid race conditions
eventData["sandbox_metadata"] = utils.ShallowCopyMap(sbx.APIStoredConfig.GetMetadata())
}
}

eventData := make(map[string]any)
if md := sbx.GetMetadata(); len(md) > 0 {
eventData["sandbox_metadata"] = md
}

return teamID, buildId, eventData
Expand Down
Loading
Loading