Skip to content

Commit dd966b3

Browse files
authored
Add support for S3 Multi-Region Access Point (MRAP) URLs (#557)
* Add MRAP URL support * test MRAP * update history * use walrus operator * cache bucket parsing * address pr review * fix linter issues * url escape `:` on win and cache * handle Windows drive in test assertion * make lint
1 parent 5124aa0 commit dd966b3

4 files changed

Lines changed: 152 additions & 8 deletions

File tree

HISTORY.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# cloudpathlib Changelog
22

33
## UNRELEASED
4+
- Added support for S3 Multi-Region Access Point (MRAP) URLs in `S3Path` (Issue [#556](https://github.com/drivendataorg/cloudpathlib/issues/556), PR [#557](https://github.com/drivendataorg/cloudpathlib/pull/557))
45
- Added support for Pydantic serialization (Issue [#537](https://github.com/drivendataorg/cloudpathlib/issues/537), PR [#538](https://github.com/drivendataorg/cloudpathlib/pull/538))
56

67
## v0.23.0 (2025-10-07)

cloudpathlib/s3/s3path.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
import os
2+
import re
3+
import sys
24
from pathlib import Path
35
from tempfile import TemporaryDirectory
46
from typing import Any, Optional, TYPE_CHECKING
@@ -8,6 +10,10 @@
810
if TYPE_CHECKING:
911
from .s3client import S3Client
1012

13+
_MRAP_PATTERN = re.compile(
14+
r"^s3://(?P<arn>arn:aws:s3::\d{12}:accesspoint/[^/]+\.mrap)(?:/(?P<key>.*))?$"
15+
)
16+
1117

1218
@register_path_class("s3")
1319
class S3Path(CloudPath):
@@ -26,6 +32,8 @@ class S3Path(CloudPath):
2632

2733
cloud_prefix: str = "s3://"
2834
client: "S3Client"
35+
_bucket: str
36+
_local_path: Path
2937

3038
@property
3139
def drive(self) -> str:
@@ -74,7 +82,17 @@ def stat(self, follow_symlinks=True):
7482

7583
@property
7684
def bucket(self) -> str:
77-
return self._no_prefix.split("/", 1)[0]
85+
"""The bucket name, or the full MRAP ARN for MRAP paths.
86+
87+
:type: :class:`str`
88+
"""
89+
if hasattr(self, "_bucket"):
90+
return self._bucket
91+
if match := _MRAP_PATTERN.match(str(self)):
92+
self._bucket = match.group("arn")
93+
else:
94+
self._bucket = self._no_prefix.split("/", 1)[0]
95+
return self._bucket
7896

7997
@property
8098
def key(self) -> str:
@@ -90,3 +108,14 @@ def key(self) -> str:
90108
@property
91109
def etag(self):
92110
return self.client._get_metadata(self).get("etag")
111+
112+
@property
113+
def _local(self) -> Path:
114+
if hasattr(self, "_local_path"):
115+
return self._local_path
116+
no_prefix = self._no_prefix
117+
# `:` is invalid in Windows paths; percent-encode it for MRAP ARNs
118+
if sys.platform == "win32":
119+
no_prefix = no_prefix.replace(":", "%3A")
120+
self._local_path = self.client._local_cache_dir / no_prefix
121+
return self._local_path

tests/mock_clients/mock_s3.py

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -220,14 +220,11 @@ def list_buckets(self):
220220
return {"Buckets": [{"Name": DEFAULT_S3_BUCKET_NAME}]}
221221

222222
def head_object(self, Bucket, Key, **kwargs):
223-
if (
224-
not (self.root / Key).exists()
225-
or (self.root / Key).is_dir()
226-
or Bucket != DEFAULT_S3_BUCKET_NAME
227-
):
223+
if not (self.root / Key).exists() or (self.root / Key).is_dir():
228224
raise ClientError({}, {})
229-
else:
230-
return {"key": Key}
225+
if Bucket != DEFAULT_S3_BUCKET_NAME and ".mrap" not in Bucket:
226+
raise ClientError({}, {})
227+
return {"key": Key}
231228

232229
def generate_presigned_url(self, op: str, Params: dict, ExpiresIn: int):
233230
mock_presigned_url = f"https://{Params['Bucket']}.s3.amazonaws.com/{Params['Key']}?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=TEST%2FTEST%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20240131T194721Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=TEST"

tests/test_s3_specific.py

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,3 +290,120 @@ def test_as_url_presign(s3_rig):
290290
assert "Signature" in query_params
291291
else:
292292
assert False, "Unknown presigned URL format"
293+
294+
295+
_MRAP_ARN = "arn:aws:s3::123456789012:accesspoint/my-mrap.mrap"
296+
297+
298+
def test_mrap_bucket_and_key():
299+
"""MRAP paths return the full ARN as bucket and the path suffix as key."""
300+
# MRAP path without key
301+
p = S3Path(f"s3://{_MRAP_ARN}")
302+
assert p.bucket == _MRAP_ARN
303+
assert p.key == ""
304+
305+
# MRAP path with trailing slash
306+
p2 = S3Path(f"s3://{_MRAP_ARN}/")
307+
assert p2.bucket == _MRAP_ARN
308+
assert p2.key == ""
309+
310+
# MRAP path with a single key segment
311+
p3 = S3Path(f"s3://{_MRAP_ARN}/file.txt")
312+
assert p3.bucket == _MRAP_ARN
313+
assert p3.key == "file.txt"
314+
315+
# MRAP path with a nested key
316+
p4 = S3Path(f"s3://{_MRAP_ARN}/folder/sub/file.txt")
317+
assert p4.bucket == _MRAP_ARN
318+
assert p4.key == "folder/sub/file.txt"
319+
320+
# Regular S3 path is unaffected
321+
p5 = S3Path("s3://my-bucket/folder/file.txt")
322+
assert p5.bucket == "my-bucket"
323+
assert p5.key == "folder/file.txt"
324+
325+
# ARN-like strings that are NOT valid MRAPs fall back to normal bucket parsing
326+
# (wrong account ID length, missing .mrap suffix)
327+
p6 = S3Path("s3://arn:aws:s3::12345:accesspoint/x.mrap/key")
328+
assert p6.bucket == "arn:aws:s3::12345:accesspoint" # treated as normal bucket
329+
330+
p7 = S3Path("s3://arn:aws:s3::123456789012:accesspoint/notmrap/key")
331+
assert p7.bucket == "arn:aws:s3::123456789012:accesspoint" # treated as normal bucket
332+
333+
334+
def test_mrap_path_manipulation():
335+
"""MRAP paths support standard path manipulation operations."""
336+
base = S3Path(f"s3://{_MRAP_ARN}")
337+
338+
# Joining via /
339+
child = base / "folder" / "file.txt"
340+
assert str(child) == f"s3://{_MRAP_ARN}/folder/file.txt"
341+
assert child.bucket == _MRAP_ARN
342+
assert child.key == "folder/file.txt"
343+
344+
# name, stem, suffix
345+
assert child.name == "file.txt"
346+
assert child.stem == "file"
347+
assert child.suffix == ".txt"
348+
349+
# parent preserves the MRAP ARN as bucket
350+
parent = child.parent
351+
assert str(parent) == f"s3://{_MRAP_ARN}/folder"
352+
assert parent.bucket == _MRAP_ARN
353+
assert parent.key == "folder"
354+
355+
# with_name and with_suffix
356+
assert str(child.with_name("other.csv")) == f"s3://{_MRAP_ARN}/folder/other.csv"
357+
assert str(child.with_suffix(".csv")) == f"s3://{_MRAP_ARN}/folder/file.csv"
358+
359+
# str / repr round-trip
360+
url = f"s3://{_MRAP_ARN}/folder/file.txt"
361+
assert str(S3Path(url)) == url
362+
assert repr(S3Path(url)) == f"S3Path('{url}')"
363+
364+
365+
def test_mrap_file_operations(s3_rig):
366+
"""MRAP paths work end-to-end with the mock S3 backend."""
367+
client = s3_rig.client_class()
368+
base = f"s3://{_MRAP_ARN}/{s3_rig.test_dir}"
369+
370+
# seeded file from test assets
371+
existing = client.CloudPath(f"{base}/dir_0/file0_0.txt")
372+
assert existing.exists()
373+
assert existing.is_file()
374+
assert not existing.is_dir()
375+
assert client.CloudPath(f"{base}/dir_0").is_dir()
376+
377+
# iterdir on the test_dir level: expects dir_0 and dir_1
378+
top_level = list(client.CloudPath(base).iterdir())
379+
assert len(top_level) == 2
380+
assert all(p.is_dir() for p in top_level)
381+
assert {p.name for p in top_level} == {"dir_0", "dir_1"}
382+
383+
# iterdir on dir_0: expects 3 files
384+
dir0_contents = list(client.CloudPath(f"{base}/dir_0").iterdir())
385+
assert len(dir0_contents) == 3
386+
assert all(p.is_file() for p in dir0_contents)
387+
388+
# write / read / delete
389+
new_file = client.CloudPath(f"{base}/mrap_write_test.txt")
390+
assert not new_file.exists()
391+
new_file.write_text("hello from mrap")
392+
assert new_file.exists()
393+
assert new_file.read_text() == "hello from mrap"
394+
assert new_file.bucket == _MRAP_ARN
395+
new_file.unlink()
396+
assert not new_file.exists()
397+
398+
399+
def test_mrap_local_path_windows_encoding(monkeypatch, s3_rig):
400+
"""On Windows, colons in MRAP ARNs must be percent-encoded in the local cache path."""
401+
import cloudpathlib.s3.s3path as s3path_module
402+
403+
monkeypatch.setattr(s3path_module.sys, "platform", "win32")
404+
client = s3_rig.client_class()
405+
p = client.CloudPath(f"s3://{_MRAP_ARN}/some/key.txt")
406+
# strip drive (e.g. "C:") since it legitimately contains a colon on Windows
407+
local_no_drive = str(p._local)[len(p._local.drive) :]
408+
assert ":" not in local_no_drive, f"Colon found in local path on simulated Windows: {p._local}"
409+
assert "%3A" in local_no_drive

0 commit comments

Comments
 (0)