Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion .github/workflows/pr-tests-arm64.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
name: ARM64 tests on PRs

on: [workflow_call]
on: [ workflow_call ]

permissions:
contents: read
Expand Down Expand Up @@ -108,6 +108,22 @@ jobs:
sudo apt-get update && sudo apt-get install -y bindfs
if: matrix.package == 'packages/envd'

- name: Diagnose runner system info
if: matrix.package == 'packages/orchestrator'
run: |
echo "::group::system info"
echo "PAGESIZE=$(getconf PAGESIZE)"
echo "uname -a: $(uname -a)"
echo "uname -m: $(uname -m)"
echo "uname -r: $(uname -r)"
echo "/proc/cpuinfo (first 20 lines):"
head -20 /proc/cpuinfo
echo "/proc/meminfo (first 10 lines):"
head -10 /proc/meminfo
echo "hugepages dir:"
ls /sys/kernel/mm/hugepages/ 2>/dev/null || echo "(none)"
echo "::endgroup::"

- name: Setup orchestrator tests
run: |
make -C packages/orchestrator fetch-busybox BUILD_ARCH=arm64
Expand Down
46 changes: 24 additions & 22 deletions packages/orchestrator/pkg/sandbox/uffd/userfaultfd/userfaultfd.go
Original file line number Diff line number Diff line change
Expand Up @@ -325,36 +325,38 @@ func (u *Userfaultfd) Serve(
return fmt.Errorf("failed to map: %w", err)
}

// State read happens before the worker takes settleRequests.RLock,
// so a REMOVE arriving in the parent's next iteration can race
// with an already-scheduled worker. Tracked as a known race; fix
// re-reads state under RLock in the worker.
var source block.Slicer
switch state := u.pageTracker.get(addr); state {
case faulted:
// Already mapped (prefault or earlier fault in this batch).
// Only a UFFD_EVENT_REMOVE can transition out of `faulted`;
// the used pages must not be swappable for this to hold.
continue
case removed:
// Zero-fill: source stays nil.
case missing:
source = u.src
default:
return fmt.Errorf("unexpected pageState: %#v", state)
}

u.wg.Go(func() error {
if h := u.testFaultHook.Load(); h != nil {
(*h)(addr, faultPhaseBeforeRLock)
}

// RLock inside the goroutine so RUnlock runs via defer even on
// early return, and so it pairs with the prefetchTracker write
// below.
// RLock must be inside the goroutine so RUnlock runs via defer.
// The state read below MUST happen after RLock: the read+act+commit
// sequence (state lookup → faultPage → setState(faulted)) must be
// atomic with any concurrent REMOVE batch (settleRequests.Lock()).
// A state read in the parent loop would leave a window where a REMOVE
// lands after the read but before RLock, and the worker would
// overwrite `removed` with `faulted`.
u.settleRequests.RLock()
defer u.settleRequests.RUnlock()

var source block.Slicer

switch state := u.pageTracker.get(addr); state {
case faulted:
// Already mapped; only UFFD_EVENT_REMOVE transitions out of
// faulted. Pages must not be swappable for this to hold.
return nil
case removed:
// Zero-fill. The kernel still expects an UFFDIO_COPY/ZEROPAGE
// ack for the original MISSING fault, otherwise the faulting
// thread stays blocked.
case missing:
source = u.src
default:
return fmt.Errorf("unexpected pageState: %#v", state)
}

var accessType block.AccessType
if pf.flags&UFFD_PAGEFAULT_FLAG_WRITE == 0 {
accessType = block.Read
Expand Down
Loading