Skip to content

Commit 5f97f08

Browse files
authored
Update _s3_file_query to check if an object exists explicitly. (#168)
* Check file existence first so public AWS access is feasible * Use PurePosixPath in mock to support Windows * Revert unnecessary test changes * Remove import and write that are not needed * Changelog and release version * Remove cleanup now handled by the rig
1 parent ea3053f commit 5f97f08

5 files changed

Lines changed: 64 additions & 42 deletions

File tree

HISTORY.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
# cloudpathlib Changelog
22

3-
## v0.6.1 (unreleased)
3+
## v0.6.1 (2021-09-17)
44

55
- Fixed absolute documentation URLs to point to the new versioned documentation pages.
6+
- Fixed bug where `no_sign_request` couldn't be used to download files since our code required list permissions to the bucket to do so, which is tracked in issue [#169](https://github.com/drivendataorg/cloudpathlib/issues/169). PR [#168](https://github.com/drivendataorg/cloudpathlib/pull/168).
67

78
## v0.6.0 (2021-09-07)
89

cloudpathlib/s3/s3client.py

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
from pathlib import Path, PurePosixPath
33
from typing import Any, Dict, Iterable, Optional, Union
44

5+
from botocore.exceptions import ClientError
6+
57
from ..client import Client, register_client_class
68
from ..cloudpath import implementation_registry
79
from .s3path import S3Path
@@ -130,17 +132,26 @@ def _exists(self, cloud_path: S3Path) -> bool:
130132

131133
def _s3_file_query(self, cloud_path: S3Path):
132134
"""Boto3 query used for quick checks of existence and if path is file/dir"""
133-
return next(
134-
(
135-
obj
136-
for obj in (
137-
self.s3.Bucket(cloud_path.bucket)
138-
.objects.filter(Prefix=cloud_path.key)
139-
.limit(1)
140-
)
141-
),
142-
None,
143-
)
135+
# first check if this is an object that we can access directly
136+
137+
try:
138+
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)
139+
obj.load()
140+
return obj
141+
142+
# else, confirm it is a dir by filtering to the first item under the prefix
143+
except ClientError:
144+
return next(
145+
(
146+
obj
147+
for obj in (
148+
self.s3.Bucket(cloud_path.bucket)
149+
.objects.filter(Prefix=cloud_path.key)
150+
.limit(1)
151+
)
152+
),
153+
None,
154+
)
144155

145156
def _list_dir(self, cloud_path: S3Path, recursive=False) -> Iterable[S3Path]:
146157
bucket = self.s3.Bucket(cloud_path.bucket)
@@ -200,12 +211,12 @@ def _remove(self, cloud_path: S3Path) -> None:
200211
obj = self.s3.Object(cloud_path.bucket, cloud_path.key)
201212

202213
# will throw if not a file
203-
obj.get()
214+
obj.load()
204215

205216
resp = obj.delete()
206217
assert resp.get("ResponseMetadata").get("HTTPStatusCode") == 204
207218

208-
except self.client.exceptions.NoSuchKey:
219+
except ClientError:
209220
# try to delete as a direcotry instead
210221
bucket = self.s3.Bucket(cloud_path.bucket)
211222

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,5 +59,5 @@ def load_requirements(path: Path):
5959
"Source Code": "https://github.com/drivendataorg/cloudpathlib",
6060
},
6161
url="https://github.com/drivendataorg/cloudpathlib",
62-
version="0.6.0",
62+
version="0.6.1",
6363
)

tests/mock_clients/mock_s3.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from tempfile import TemporaryDirectory
66

77
from boto3.session import Session
8+
from botocore.exceptions import ClientError
89

910
from .utils import delete_empty_parents_up_to_root
1011

@@ -64,7 +65,17 @@ def get(self):
6465
if not self.path.exists() or self.path.is_dir():
6566
raise NoSuchKey({}, {})
6667
else:
67-
return {"key": self.path}
68+
return {"key": str(PurePosixPath(self.path))}
69+
70+
def load(self):
71+
if not self.path.exists() or self.path.is_dir():
72+
raise ClientError({}, {})
73+
else:
74+
return {"key": str(PurePosixPath(self.path))}
75+
76+
@property
77+
def key(self):
78+
return str(PurePosixPath(self.path).relative_to(PurePosixPath(self.root)))
6879

6980
def copy_from(self, CopySource=None, Metadata=None, MetadataDirective=None):
7081
if CopySource["Key"] == str(self.path.relative_to(self.root)):

tests/test_s3_specific.py

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from concurrent.futures import ProcessPoolExecutor
22
from time import sleep
3+
34
import pytest
45

56
from boto3.s3.transfer import TransferConfig
@@ -30,6 +31,7 @@ def test_transfer_config(s3_rig, tmp_path):
3031
# download
3132
client.set_as_default_client()
3233
p = s3_rig.create_cloud_path("dir_0/file0_0.txt")
34+
3335
dl_dir = tmp_path
3436
assert not (dl_dir / p.name).exists()
3537
p.download_to(dl_dir)
@@ -54,37 +56,31 @@ def _download_with_threads(s3_rig, tmp_path, use_threads):
5456
"""Job used by tests to ensure Transfer config changes are
5557
actually passed through to boto3 and respected.
5658
"""
57-
try:
58-
sleep(1) # give test monitoring process time to start watching
59-
60-
transfer_config = TransferConfig(
61-
max_concurrency=100,
62-
use_threads=use_threads,
63-
multipart_chunksize=1 * 1024,
64-
multipart_threshold=10 * 1024,
65-
)
66-
client = s3_rig.client_class(boto3_transfer_config=transfer_config)
67-
p = client.CloudPath(f"s3://{s3_rig.drive}/dir_0/file0_to_download.txt")
68-
69-
assert not p.exists()
59+
sleep(1) # give test monitoring process time to start watching
60+
61+
transfer_config = TransferConfig(
62+
max_concurrency=100,
63+
use_threads=use_threads,
64+
multipart_chunksize=1 * 1024,
65+
multipart_threshold=10 * 1024,
66+
)
67+
client = s3_rig.client_class(boto3_transfer_config=transfer_config)
68+
p = client.CloudPath(f"s3://{s3_rig.drive}/{s3_rig.test_dir}/dir_0/file0_to_download.txt")
7069

71-
# file should be about 60KB
72-
text = "lalala" * 10_000
73-
p.write_text(text)
70+
assert not p.exists()
7471

75-
assert p.exists()
72+
# file should be about 60KB
73+
text = "lalala" * 10_000
74+
p.write_text(text)
7675

77-
# assert not (dl_dir / p.name).exists()
78-
p.download_to(tmp_path)
76+
assert p.exists()
7977

80-
p.unlink()
78+
# assert not (dl_dir / p.name).exists()
79+
p.download_to(tmp_path)
8180

82-
assert not p.exists()
81+
p.unlink()
8382

84-
finally:
85-
p = s3_rig.create_cloud_path("/dir_0/file0_0.txt")
86-
if p.exists():
87-
p.unlink()
83+
assert not p.exists()
8884

8985

9086
def test_transfer_config_live(s3_rig, tmp_path):
@@ -133,7 +129,7 @@ def _execute_on_subprocess_and_observe(use_threads):
133129
assert _execute_on_subprocess_and_observe(use_threads=True) > 10
134130

135131

136-
def test_no_sign_request(s3_rig):
132+
def test_no_sign_request(s3_rig, tmp_path):
137133
"""Tests that we can pass no_sign_request to the S3Client and we will
138134
be able to access public resources but not private ones.
139135
"""
@@ -146,6 +142,9 @@ def test_no_sign_request(s3_rig):
146142
)
147143
assert p.exists()
148144

145+
p.download_to(tmp_path)
146+
assert (tmp_path / p.name).read_bytes() == p.read_bytes()
147+
149148
# unsigned raises for private S3 file that exists
150149
p = client.CloudPath(f"s3://{s3_rig.drive}/dir_0/file0_to_download.txt")
151150
with pytest.raises(botocore.exceptions.ClientError):

0 commit comments

Comments
 (0)