From 8b9b1c620a4e7c4dd9afd301cf85e8950d6ff26f Mon Sep 17 00:00:00 2001 From: Max Isbey <224885523+maxisbey@users.noreply.github.com> Date: Fri, 19 Jun 2026 13:39:15 +0000 Subject: [PATCH] ci(conformance): add 2026-07-28 carried-forward leg + bump harness to 0.2.0-alpha.4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds a third server step and second client step that run `--suite all --spec-version 2026-07-28` against a separate baseline file. The existing `--suite draft` step only runs scenarios introduced in 2026-07-28; this new leg re-runs every applicable scenario at the 2026 wire so regressions in 2025-era behaviour under the stateless protocol are caught. The 2026 leg needs its own expected-failures.2026-07-28.yml because baseline entries are keyed by scenario name only — a scenario that passes at its default version but fails when forced to 2026 cannot share a file with the 2025 legs (the passing leg flags it stale). Baseline derived empirically from a local alpha.4 build. The alpha.3 -> alpha.4 bump required no entry changes to the existing expected-failures.yml. client.py now reads MCP_CONFORMANCE_PROTOCOL_VERSION (set by the harness on every invocation) so handlers can branch on it once the stateless client path lands; today it is logged only. --- .github/actions/conformance/client.py | 11 ++ .../expected-failures.2026-07-28.yml | 149 ++++++++++++++++++ .../actions/conformance/expected-failures.yml | 5 +- .github/workflows/conformance.yml | 18 ++- 4 files changed, 179 insertions(+), 4 deletions(-) create mode 100644 .github/actions/conformance/expected-failures.2026-07-28.yml diff --git a/.github/actions/conformance/client.py b/.github/actions/conformance/client.py index 58f684f016..a438cb29ee 100644 --- a/.github/actions/conformance/client.py +++ b/.github/actions/conformance/client.py @@ -6,6 +6,9 @@ Contract: - MCP_CONFORMANCE_SCENARIO env var -> scenario name - MCP_CONFORMANCE_CONTEXT env var -> optional JSON (for client-credentials scenarios) + - MCP_CONFORMANCE_PROTOCOL_VERSION env var -> spec version the harness mock + server is speaking (e.g. "2025-11-25", "2026-07-28"). Always set; defaults + to the harness's LATEST_SPEC_VERSION when --spec-version is omitted. - Server URL as last CLI argument (sys.argv[1]) - Must exit 0 within 30 seconds @@ -50,6 +53,13 @@ ) logger = logging.getLogger(__name__) +#: Spec version the harness is running this scenario at (e.g. "2025-11-25", +#: "2026-07-28"). The harness always sets this (it falls back to its own +#: LATEST_SPEC_VERSION when --spec-version is omitted), so None means we were +#: invoked outside the harness. Handlers that need to take the stateless 2026 +#: path will branch on this once the SDK has one; today it is logged only. +PROTOCOL_VERSION: str | None = os.environ.get("MCP_CONFORMANCE_PROTOCOL_VERSION") + # Type for async scenario handler functions ScenarioHandler = Callable[[str], Coroutine[Any, None, None]] @@ -347,6 +357,7 @@ def main() -> None: server_url = sys.argv[1] scenario = os.environ.get("MCP_CONFORMANCE_SCENARIO") + logger.debug(f"Conformance protocol version: {PROTOCOL_VERSION!r}") if scenario: logger.debug(f"Running explicit scenario '{scenario}' against {server_url}") diff --git a/.github/actions/conformance/expected-failures.2026-07-28.yml b/.github/actions/conformance/expected-failures.2026-07-28.yml new file mode 100644 index 0000000000..76793dd635 --- /dev/null +++ b/.github/actions/conformance/expected-failures.2026-07-28.yml @@ -0,0 +1,149 @@ +# Expected failures for the carried-forward 2026-07-28 legs +# (`--suite all --spec-version 2026-07-28` for both server and client). +# +# This baseline is separate from expected-failures.yml because entries are +# keyed by scenario name only: a scenario that passes at its default version +# in the 2025 legs but fails when forced to 2026-07-28 (or vice versa) cannot +# be expressed in a shared file (the passing leg would flag the entry as +# stale). Like expected-failures.yml, this single file covers both +# directions: the client 2026 leg reads the `client:` section and the server +# 2026 leg reads the `server:` section. Both burn down independently of the +# 2025 legs. +# +# Baseline established against @modelcontextprotocol/conformance pinned in +# .github/workflows/conformance.yml (CONFORMANCE_VERSION = 0.2.0-alpha.4). +# New conformance releases are adopted by deliberately bumping that pin and +# reconciling both this file and expected-failures.yml in the same change. +# +# Entries are grouped by what unblocks them. As each gap closes the +# corresponding scenarios start passing and MUST be removed from this list +# (the runner fails on stale entries), so the baseline burns down per +# milestone. + +client: + # --- No stateless client path on main yet --- + # client.py drives the 2025 stateful lifecycle (initialize handshake + + # session). The 2026-mode mock server is stateless, so the call sequence + # never reaches the assertion. Unblocks when client.py's is_modern_protocol() + # branch takes the per-request _meta path. + - tools_call + + # --- SEP-837 (application_type during DCR) --- + # The sep-837-application-type-present check only fires on 2026-version + # runs; the client omits application_type during Dynamic Client + # Registration, so every auth scenario that reaches DCR fails it on this + # leg (the same scenarios pass at their default version in the 2025 legs). + - auth/metadata-default + - auth/metadata-var1 + - auth/metadata-var2 + - auth/metadata-var3 + - auth/scope-from-www-authenticate + - auth/scope-from-scopes-supported + - auth/scope-omitted-when-undefined + - auth/token-endpoint-auth-basic + - auth/token-endpoint-auth-post + - auth/token-endpoint-auth-none + - auth/offline-access-not-supported + + # --- Auth scenarios cut short by the 2026 connection lifecycle --- + # The auth fixture flow drives the 2025 stateful lifecycle; the 2026-mode + # mock rejects the MCP POST before the scope-escalation behaviour these + # scenarios measure, so no authorization requests are observed. Unblocks + # when client.py's auth flow speaks the 2026 per-request lifecycle. + - auth/scope-step-up + - auth/scope-retry-limit + + # --- Same gaps as the 2025 baseline (fail identically when forced to 2026-07-28) --- + # SEP-2575 (request metadata / _meta envelope): client does not populate the + # _meta envelope or the MCP-Protocol-Version header semantics yet. + - request-metadata + # SEP-2322 (multi-round-trip requests): client does not echo requestState / + # handle IncompleteResult yet. + - sep-2322-client-request-state + # SEP-2243 (HTTP standardization): no fixture handler / client header support yet. + - http-custom-headers + - http-invalid-tool-headers + # SEP-2106 (JSON Schema $ref handling): client still dereferences network $refs. + - json-schema-ref-no-deref + # SEP-2468 (authorization response iss parameter): not implemented in the client. + - auth/iss-supported + - auth/iss-not-advertised + - auth/iss-supported-missing + - auth/iss-wrong-issuer + - auth/iss-unexpected + - auth/iss-normalized + - auth/metadata-issuer-mismatch + # SEP-2352 (authorization server migration): client does not re-register when + # PRM authorization_servers changes. + - auth/authorization-server-migration + # auth/enterprise-managed-authorization (SEP-990) is in the 2025 baseline but + # NOT here: the harness skips it as inapplicable at --spec-version 2026-07-28 + # (it is an extension scenario not carried into the 2026 wire), so it is + # neither run nor evaluated on this leg. + +server: + # --- No stateless server path on main yet (carried-forward 2025-era scenarios) --- + # mcp-everything-server only runs in 2025 stateful mode. With + # --spec-version 2026-07-28 the harness sends stateless requests + # (MCP-Protocol-Version: 2026-07-28, _meta envelope, no initialize), which + # the server rejects before the handler runs. These scenarios all pass on + # the 2025 legs; they unblock once mcp-everything-server routes 2026 + # requests through a stateless path. + - completion-complete + - tools-list + - tools-call-simple-text + - tools-call-image + - tools-call-audio + - tools-call-embedded-resource + - tools-call-mixed-content + - tools-call-error + - tools-call-with-progress + - server-sse-multiple-streams + - resources-list + - resources-read-text + - resources-read-binary + - resources-templates-read + - prompts-list + - prompts-get-simple + - prompts-get-with-args + - prompts-get-embedded-resource + - prompts-get-with-image + - dns-rebinding-protection + # SEP-2106 (JSON Schema 2020-12 in tool inputSchema): the fixture tool's + # schema has none of the 2020-12 keywords the scenario checks. The scenario + # is in `--suite all` but not `--suite active`, so this is the only leg that + # runs it; it fails identically at 2025-11-25 (not a 2026-path regression). + - json-schema-2020-12 + + # --- Draft scenarios (same failures and reasons as the `--suite draft` leg) --- + # SEP-2575 (stateless HTTP / _meta envelope): server has no stateless mode, + # _meta-derived capabilities, error-code mappings, or server/discover yet. + - server-stateless + # SEP-2322 (multi-round-trip requests / IncompleteResult): not implemented. + - input-required-result-basic-elicitation + - input-required-result-basic-sampling + - input-required-result-basic-list-roots + - input-required-result-request-state + - input-required-result-multiple-input-requests + - input-required-result-multi-round + - input-required-result-non-tool-request + - input-required-result-result-type + - input-required-result-tampered-state + - input-required-result-capability-check + # SEP-2549 (caching): no ttlMs/cacheScope support. + - caching + # SEP-2243 (HTTP header standardization): -32001 HeaderMismatch handling and + # case-insensitive/whitespace-trimmed header validation not implemented. + - http-header-validation + - http-custom-header-server-validation + + # --- WARNING-only entries --- + # These scenarios emit no FAILURE checks, only SHOULD-level WARNINGs, but + # the expected-failures evaluator counts WARNINGs as failures. Same entries + # as the draft suite in expected-failures.yml. + # SEP-2164: server returns -32600 (not -32602) and omits error.data.uri. + - sep-2164-resource-not-found + # SEP-2322 SHOULD-level behaviours (re-request missing inputResponses, + # ignore unrecognized inputResponses keys). + - input-required-result-missing-input-response + - input-required-result-ignore-extra-params diff --git a/.github/actions/conformance/expected-failures.yml b/.github/actions/conformance/expected-failures.yml index b27a833067..78ef609c50 100644 --- a/.github/actions/conformance/expected-failures.yml +++ b/.github/actions/conformance/expected-failures.yml @@ -2,9 +2,10 @@ # CI exits 0 if only these fail, exits 1 on unexpected failures or stale entries. # # Baseline established against @modelcontextprotocol/conformance pinned in -# .github/workflows/conformance.yml (CONFORMANCE_VERSION = 0.2.0-alpha.3). +# .github/workflows/conformance.yml (CONFORMANCE_VERSION = 0.2.0-alpha.4). # New conformance releases are adopted by deliberately bumping that pin and -# reconciling this file in the same change. +# reconciling both this file and expected-failures.2026-07-28.yml in the same +# change. # # Entries are grouped by SEP. As each SEP lands in the SDK the corresponding # scenarios start passing and MUST be removed from this list (the runner fails diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml index be7682663d..24486151cb 100644 --- a/.github/workflows/conformance.yml +++ b/.github/workflows/conformance.yml @@ -15,8 +15,9 @@ permissions: env: # Pinned conformance harness version. Bump deliberately and reconcile - # .github/actions/conformance/expected-failures.yml in the same change. - CONFORMANCE_VERSION: "0.2.0-alpha.3" + # both .github/actions/conformance/expected-failures*.yml files in the + # same change. + CONFORMANCE_VERSION: "0.2.0-alpha.4" jobs: server-conformance: @@ -43,6 +44,12 @@ jobs: ./.github/actions/conformance/run-server.sh --suite draft --expected-failures ./.github/actions/conformance/expected-failures.yml + - name: Run server conformance (2026-07-28 wire, all suite) + run: >- + ./.github/actions/conformance/run-server.sh + --suite all + --spec-version 2026-07-28 + --expected-failures ./.github/actions/conformance/expected-failures.2026-07-28.yml client-conformance: runs-on: ubuntu-latest @@ -64,3 +71,10 @@ jobs: --command 'uv run --frozen python .github/actions/conformance/client.py' --suite all --expected-failures ./.github/actions/conformance/expected-failures.yml + - name: Run client conformance (2026-07-28 wire, all suite) + run: >- + npx --yes @modelcontextprotocol/conformance@"$CONFORMANCE_VERSION" client + --command 'uv run --frozen python .github/actions/conformance/client.py' + --suite all + --spec-version 2026-07-28 + --expected-failures ./.github/actions/conformance/expected-failures.2026-07-28.yml