-
Notifications
You must be signed in to change notification settings - Fork 76
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Support reading from S3 (#916)
* Add uproot.source.s3.S3Source * uproot.source.s3.S3Source: support S3_* environment variables (supported by ROOT) and other tools * style: pre-commit fixes * add tests/test_0916-read-from-s3.py [xfail] * S3Source: fix parsing of object key from URI urlparse includes leading slash in the path name, however key does not include that. Apparently leading slash was ignored by presign request. However, presigning is not really a thing without the credentials. In boto3 one needs to explicitly provide `Config(signature_version=botocore.UNSIGNED)`, in minio-py, they build an URL manually: https://github.com/minio/minio-py/blob/4ee1892b2e238da4e3b1bd0cb43cb7e25015e574/minio/api.py#L2063-L2069 Having a key with a extra leading slash would result in incorrect URL being built. That would result in 404 error. * test_0916-read-from-s3.py: fix branch name * update docs * add :doc: --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Jim Pivarski <[email protected]>
- Loading branch information
1 parent
abc3736
commit 130c55b
Showing
12 changed files
with
155 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -58,6 +58,7 @@ dev = [ | |
] | ||
test = [ | ||
"lz4", | ||
"minio", | ||
"pytest>=6", | ||
"pytest-timeout", | ||
"pytest-rerunfailures", | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE | ||
|
||
""" | ||
This module defines a physical layer for remote files, accessed via S3. | ||
""" | ||
|
||
import os | ||
from urllib.parse import parse_qsl, urlparse | ||
|
||
import uproot.extras | ||
import uproot.source.http | ||
|
||
|
||
class S3Source(uproot.source.http.HTTPSource): | ||
""" | ||
Args: | ||
file_path (str): A URL of the file to open. | ||
endpoint: S3 endpoint (defaults to AWS) | ||
access_key: Access key of your S3 account | ||
secret_key: Secret key of your S3 account | ||
session_token: Session token of your S3 account | ||
secure: Flag to enable use of TLS | ||
http_client (urllib3.poolmanager.PoolManager): Instance of :doc:`urllib3.poolmanager.PoolManager` | ||
credentials (minio.credentials.Provider): Instance of :doc:`minio.credentials.Provider` | ||
options: See :doc:`uproot.source.http.HTTPSource.__init__` | ||
""" | ||
|
||
def __init__( | ||
self, | ||
file_path, | ||
endpoint="s3.amazonaws.com", | ||
access_key=None, | ||
secret_key=None, | ||
session_token=None, | ||
secure=True, | ||
region=None, | ||
http_client=None, | ||
credentials=None, | ||
**options, | ||
): | ||
Minio = uproot.extras.Minio_client() | ||
|
||
if access_key is None: | ||
access_key = os.environ.get( | ||
"S3_ACCESS_KEY", os.environ.get("AWS_ACCESS_KEY_ID", None) | ||
) | ||
if secret_key is None: | ||
secret_key = os.environ.get( | ||
"S3_SECRET_KEY", os.environ.get("AWS_SECRET_ACCESS_KEY", None) | ||
) | ||
if session_token is None: | ||
session_token = os.environ.get( | ||
"S3_SESSION_TOKEN", os.environ.get("AWS_SESSION_TOKEN", None) | ||
) | ||
if region is None: | ||
region = os.environ.get("AWS_DEFAULT_REGION", None) | ||
|
||
parsed_url = urlparse(file_path) | ||
|
||
bucket_name = parsed_url.netloc | ||
assert parsed_url.path[0] == "/" | ||
object_name = parsed_url.path[1:] | ||
|
||
parsed_query = dict(parse_qsl(parsed_url.query)) | ||
# There is no standard scheme for s3:// URI query parameters, | ||
# but some are often introduced to support extra flexibility: | ||
if "endpoint" in parsed_query: | ||
endpoint = parsed_query["endpoint"] | ||
if "region" in parsed_query: | ||
region = parsed_query["region"] | ||
|
||
client = Minio( | ||
endpoint, | ||
access_key=access_key, | ||
secret_key=secret_key, | ||
session_token=session_token, | ||
secure=secure, | ||
region=region, | ||
http_client=http_client, | ||
credentials=credentials, | ||
) | ||
|
||
url = client.get_presigned_url("GET", bucket_name, object_name) | ||
|
||
super().__init__(url, **options) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# BSD 3-Clause License; see https://github.com/scikit-hep/uproot5/blob/main/LICENSE | ||
|
||
import pytest | ||
|
||
import uproot | ||
|
||
|
||
@pytest.mark.network | ||
def test_s3_fail(): | ||
with pytest.raises(Exception): | ||
with uproot.source.http.S3Source( | ||
"s3://pivarski-princeton/does-not-exist", timeout=0.1 | ||
) as source: | ||
tobytes(source.chunk(0, 100).raw_data) | ||
|
||
|
||
@pytest.mark.network | ||
def test_read_s3(): | ||
with uproot.open( | ||
"s3://pivarski-princeton/pythia_ppZee_run17emb.picoDst.root:PicoDst" | ||
) as f: | ||
data = f["Event/Event.mEventId"].array(library="np") | ||
assert len(data) == 8004 |