From 300548610b944acc0aeb70f5640af3e7fdcd723d Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 14 Jul 2022 13:23:44 -0500 Subject: [PATCH 1/2] Support signing collections Adds support for signing collections, similar to items, by signing its assets. --- planetary_computer/sas.py | 19 +++++- tests/data-files/sample-collection.json | 84 +++++++++++++++++++++++++ tests/test_signing.py | 24 +++++++ 3 files changed, 126 insertions(+), 1 deletion(-) create mode 100644 tests/data-files/sample-collection.json diff --git a/planetary_computer/sas.py b/planetary_computer/sas.py index d3fc30c..0a2cfa2 100644 --- a/planetary_computer/sas.py +++ b/planetary_computer/sas.py @@ -9,7 +9,7 @@ from urllib.parse import urlparse, parse_qs import requests from pydantic import BaseModel, Field -from pystac import Asset, Item, ItemCollection, STACObjectType +from pystac import Asset, Item, ItemCollection, STACObjectType, Collection from pystac.utils import datetime_to_str from pystac.serialization.identify import identify_stac_object_type from pystac_client import ItemSearch @@ -131,6 +131,9 @@ def sign_url(url: str, copy: bool = True) -> str: parsed_url = urlparse(url.rstrip("/")) if not parsed_url.netloc.endswith(BLOB_STORAGE_DOMAIN): return url + elif parsed_url.netloc == "ai4edatasetspublicassets.blob.core.windows.net": + # special case for public assets storing thumbnails... + return url parsed_qs = parse_qs(parsed_url.query) if set(parsed_qs) & {"st", "se", "sp"}: @@ -325,6 +328,20 @@ def _search_and_sign(search: ItemSearch, copy: bool = True) -> ItemCollection: return sign(search.get_all_items()) +@sign.register(Collection) +def sign_collection(collection: Collection, copy: bool = True) -> Collection: + if copy: + # https://github.com/stac-utils/pystac/pull/834 fixed asset dropping + assets = collection.assets + collection = collection.clone() + if assets and not collection.assets: + collection.assets = deepcopy(assets) + + for key in collection.assets: + _sign_asset_in_place(collection.assets[key]) + return collection + + @sign.register(collections.abc.Mapping) def sign_mapping(mapping: Mapping, copy: bool = True) -> Mapping: """ diff --git a/tests/data-files/sample-collection.json b/tests/data-files/sample-collection.json new file mode 100644 index 0000000..0162d4c --- /dev/null +++ b/tests/data-files/sample-collection.json @@ -0,0 +1,84 @@ +{ + "id": "daymet-daily-hi", + "type": "Collection", + "links": [], + "title": "Daymet Daily Hawaii", + "assets": { + "thumbnail": { + "href": "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/daymet-daily-hi.png", + "type": "image/png", + "roles": [ + "thumbnail" + ], + "title": "Daymet daily Hawaii map thumbnail" + }, + "zarr-abfs": { + "href": "abfs://daymet-zarr/daily/hi.zarr", + "type": "application/vnd+zarr", + "roles": [ + "data", + "zarr", + "abfs" + ], + "title": "Daily Hawaii Daymet Azure Blob File System Zarr root", + "description": "Azure Blob File System of the daily Hawaii Daymet Zarr Group on Azure Blob Storage for use with adlfs.", + "xarray:open_kwargs": { + "chunks": {}, + "engine": "zarr", + "consolidated": true, + "storage_options": { + "account_name": "daymeteuwest" + } + } + } + }, + "extent": { + "spatial": { + "bbox": [ + [ + -160.3056, + 17.9539, + -154.772, + 23.5186 + ] + ] + }, + "temporal": { + "interval": [ + [ + "1980-01-01T12:00:00Z", + "2020-12-30T12:00:00Z" + ] + ] + } + }, + "license": "proprietary", + "sci:doi": "10.3334/ORNLDAAC/1840", + "keywords": [ + "Daymet", + "Hawaii", + "Temperature", + "Precipitation", + "Vapor Pressure", + "Weather" + ], + "providers": [ + { + "url": "https://doi.org/10.3334/ORNLDAAC/1840", + "name": "ORNL DAAC", + "roles": [ + "producer" + ] + } + ], + "description": "Gridded estimates of daily weather parameters. [Daymet](https://daymet.ornl.gov) Version 4 variables include the following parameters: minimum temperature, maximum temperature, precipitation, shortwave radiation, vapor pressure, snow water equivalent, and day length.\n\n[Daymet](https://daymet.ornl.gov/) provides measurements of near-surface meteorological conditions; the main purpose is to provide data estimates where no instrumentation exists. The dataset covers the period from January 1, 1980 to the present. Each year is processed individually at the close of a calendar year. Data are in a Lambert conformal conic projection for North America and are distributed in Zarr and NetCDF formats, compliant with the [Climate and Forecast (CF) metadata conventions (version 1.6)](http://cfconventions.org/).\n\nUse the DOI at [https://doi.org/10.3334/ORNLDAAC/1840](https://doi.org/10.3334/ORNLDAAC/1840) to cite your usage of the data.\n\nThis dataset provides coverage for Hawaii; North America and Puerto Rico are provided in [separate datasets](https://planetarycomputer.microsoft.com/dataset/group/daymet#daily).\n\n", + "sci:citation": "Thornton, M.M., R. Shrestha, P.E. Thornton, S. Kao, Y. Wei, and B.E. Wilson. 2021. Daymet Version 4 Monthly Latency: Daily Surface Weather Data. ORNL DAAC, Oak Ridge, Tennessee, USA. https://doi.org/10.3334/ORNLDAAC/1904", + "stac_version": "1.0.0", + "msft:group_id": "daymet", + "msft:container": "daymet-zarr", + "stac_extensions": [ + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json" + ], + "msft:storage_account": "daymeteuwest", + "msft:short_description": "Daily surface weather data on a 1-km grid for Hawaii" +} \ No newline at end of file diff --git a/tests/test_signing.py b/tests/test_signing.py index b648a4b..e4ccfba 100644 --- a/tests/test_signing.py +++ b/tests/test_signing.py @@ -72,6 +72,12 @@ def get_sample_references() -> dict: return references +def get_sample_collection() -> dict: + with open(os.fspath(HERE.joinpath("data-files/sample-collection.json"))) as f: + collection = json.load(f) + return collection + + class TestSigning(unittest.TestCase): def assertRootResolved(self, item: Item) -> None: root_link = item.get_root_link() @@ -284,6 +290,24 @@ def test_sign_item_collection_inplace(self) -> None: assert result is item_collection self.assertSigned(item_collection[0].assets["image"].href) + def test_sign_collection(self) -> None: + collection = pystac.Collection.from_dict(get_sample_collection()) + result = pc.sign(collection) + assert result is not collection + asset = result.assets["zarr-abfs"] + self.assertIn( + "credential", + asset.extra_fields["xarray:open_kwargs"]["storage_options"], + ) + + result = pc.sign(collection, copy=False) + assert result is collection + asset = result.assets["zarr-abfs"] + self.assertIn( + "credential", + asset.extra_fields["xarray:open_kwargs"]["storage_options"], + ) + class TestUtils(unittest.TestCase): def test_parse_adlfs_url(self) -> None: From 799ddf117229cfb682a8496fb9f9b458b9794a10 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 14 Jul 2022 13:47:12 -0500 Subject: [PATCH 2/2] Sign raw collections, changelog --- CHANGELOG.md | 4 +++- planetary_computer/sas.py | 5 ++--- tests/test_signing.py | 14 +++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index af3717d..a8c8dd2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,9 @@ ## New Features -* `sign` will now avoid signing URLs that have already been signed. +* `sign` now supports signing URLs that have already been signed. +* `sign` now supports signing raw JSON objects, in addition to `pystac` objects. +* `sign` now supports signing `Collection` objects. # 0.4.6 diff --git a/planetary_computer/sas.py b/planetary_computer/sas.py index 0a2cfa2..6f35703 100644 --- a/planetary_computer/sas.py +++ b/planetary_computer/sas.py @@ -366,13 +366,12 @@ def sign_mapping(mapping: Mapping, copy: bool = True) -> Mapping: if copy: mapping = deepcopy(mapping) + types = (STACObjectType.ITEM, STACObjectType.COLLECTION) if all(k in mapping for k in ["version", "templates", "refs"]): for k, v in mapping["templates"].items(): mapping["templates"][k] = sign_url(v) - elif ( - identify_stac_object_type(cast(Dict[str, Any], mapping)) == STACObjectType.ITEM - ): + elif identify_stac_object_type(cast(Dict[str, Any], mapping)) in types: for k, v in mapping["assets"].items(): v["href"] = sign_url(v["href"]) _sign_fsspec_asset_in_place(v) diff --git a/tests/test_signing.py b/tests/test_signing.py index e4ccfba..edf8bb5 100644 --- a/tests/test_signing.py +++ b/tests/test_signing.py @@ -72,10 +72,10 @@ def get_sample_references() -> dict: return references -def get_sample_collection() -> dict: +def get_sample_collection() -> pystac.Collection: with open(os.fspath(HERE.joinpath("data-files/sample-collection.json"))) as f: collection = json.load(f) - return collection + return pystac.Collection.from_dict(collection) class TestSigning(unittest.TestCase): @@ -291,7 +291,7 @@ def test_sign_item_collection_inplace(self) -> None: self.assertSigned(item_collection[0].assets["image"].href) def test_sign_collection(self) -> None: - collection = pystac.Collection.from_dict(get_sample_collection()) + collection = get_sample_collection() result = pc.sign(collection) assert result is not collection asset = result.assets["zarr-abfs"] @@ -308,6 +308,14 @@ def test_sign_collection(self) -> None: asset.extra_fields["xarray:open_kwargs"]["storage_options"], ) + def test_sign_collection_dict(self) -> None: + collection_dict = get_sample_collection().to_dict() + result = pc.sign(collection_dict) + self.assertIn( + "credential", + result["assets"]["zarr-abfs"]["xarray:open_kwargs"]["storage_options"], + ) + class TestUtils(unittest.TestCase): def test_parse_adlfs_url(self) -> None: