Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ For the FileIO there are several configuration options available:
| s3.secret-access-key | password | Configure the static secret access key used to access the FileIO. |
| s3.session-token | AQoDYXdzEJr... | Configure the static session token used to access the FileIO. |
| s3.signer | bearer | Configure the signature version of the FileIO. |
| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.singer.uri>/v1/aws/s3/sign`. |
| s3.signer.uri | http://my.signer:8080/s3 | Configure the remote signing uri if it differs from the catalog uri. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. |
| s3.signer.endpoint | v1/main/s3-sign | Configure the remote signing endpoint. Remote signing is only implemented for `FsspecFileIO`. The final request is sent to `<s3.signer.uri>/<s3.signer.endpoint>`. (default : v1/aws/s3/sign). |
| s3.region | us-west-2 | Sets the region of the bucket |
| s3.proxy-uri | http://my.proxy.com:8080 | Configure the proxy server to be used by the FileIO. |
| s3.connect-timeout | 60.0 | Configure socket connection timeout, in seconds. |
Expand Down
2 changes: 2 additions & 0 deletions pyiceberg/io/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@
S3_PROXY_URI = "s3.proxy-uri"
S3_CONNECT_TIMEOUT = "s3.connect-timeout"
S3_SIGNER_URI = "s3.signer.uri"
S3_SIGNER_ENDPOINT = "s3.signer.endpoint"
S3_SIGNER_ENDPOINT_DEFAULT = "v1/aws/s3/sign"
HDFS_HOST = "hdfs.host"
HDFS_PORT = "hdfs.port"
HDFS_USER = "hdfs.user"
Expand Down
12 changes: 8 additions & 4 deletions pyiceberg/io/fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,8 @@
S3_REGION,
S3_SECRET_ACCESS_KEY,
S3_SESSION_TOKEN,
S3_SIGNER_ENDPOINT,
S3_SIGNER_ENDPOINT_DEFAULT,
S3_SIGNER_URI,
ADLFS_ClIENT_SECRET,
FileIO,
Expand All @@ -86,6 +88,8 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A
raise SignError("Signer set, but token is not available")

signer_url = properties.get(S3_SIGNER_URI, properties["uri"]).rstrip("/")
signer_endpoint = properties.get(S3_SIGNER_ENDPOINT, S3_SIGNER_ENDPOINT_DEFAULT)

signer_headers = {"Authorization": f"Bearer {properties[TOKEN]}"}
signer_body = {
"method": request.method,
Expand All @@ -94,7 +98,7 @@ def s3v4_rest_signer(properties: Properties, request: AWSRequest, **_: Any) -> A
"headers": {key: [val] for key, val in request.headers.items()},
}

response = requests.post(f"{signer_url}/v1/aws/s3/sign", headers=signer_headers, json=signer_body)
response = requests.post(f"{signer_url}/{signer_endpoint.strip()}", headers=signer_headers, json=signer_body)
try:
response.raise_for_status()
response_json = response.json()
Expand Down Expand Up @@ -131,9 +135,9 @@ def _s3(properties: Properties) -> AbstractFileSystem:

if signer := properties.get("s3.signer"):
logger.info("Loading signer %s", signer)
if singer_func := SIGNERS.get(signer):
singer_func_with_properties = partial(singer_func, properties)
register_events["before-sign.s3"] = singer_func_with_properties
if signer_func := SIGNERS.get(signer):
signer_func_with_properties = partial(signer_func, properties)
register_events["before-sign.s3"] = signer_func_with_properties

# Disable the AWS Signer
config_kwargs["signature_version"] = UNSIGNED
Expand Down
53 changes: 53 additions & 0 deletions tests/io/test_fsspec.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,6 +727,59 @@ def test_s3v4_rest_signer(requests_mock: Mocker) -> None:
}


def test_s3v4_rest_signer_endpoint(requests_mock: Mocker) -> None:
new_uri = "https://other-bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro"
endpoint = "v1/main/s3-sign/foo.bar?e=e&b=b&k=k=k&s=s&w=w"
requests_mock.post(
f"{TEST_URI}/{endpoint}",
json={
"uri": new_uri,
"headers": {
"Authorization": [
"AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02"
],
"Host": ["bucket.s3.us-west-2.amazonaws.com"],
"User-Agent": ["Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"],
"x-amz-content-sha256": ["UNSIGNED-PAYLOAD"],
"X-Amz-Date": ["20221017T102940Z"],
"X-Amz-Security-Token": [
"YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw="
],
},
"extensions": {},
},
status_code=200,
)

request = AWSRequest(
method="HEAD",
url="https://bucket/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro",
headers={"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0"},
data=b"",
params={},
auth_path="/metadata/snap-8048355899640248710-1-a5c8ea2d-aa1f-48e8-89f4-1fa69db8c742.avro",
)
request.context = {
"client_region": "us-west-2",
"has_streaming_input": False,
"auth_type": None,
"signing": {"bucket": "bucket"},
"retries": {"attempt": 1, "invocation-id": "75d143fb-0219-439b-872c-18213d1c8d54"},
}

signed_request = s3v4_rest_signer({"token": "abc", "uri": TEST_URI, "s3.signer.endpoint": endpoint}, request)

assert signed_request.url == new_uri
assert dict(signed_request.headers) == {
"Authorization": "AWS4-HMAC-SHA256 Credential=ASIAQPRZZYGHUT57DL3I/20221017/us-west-2/s3/aws4_request, SignedHeaders=host;x-amz-content-sha256;x-amz-date;x-amz-security-token, Signature=430582a17d61ab02c272896fa59195f277af4bdf2121c441685e589f044bbe02",
"Host": "bucket.s3.us-west-2.amazonaws.com",
"User-Agent": "Botocore/1.27.59 Python/3.10.7 Darwin/21.5.0",
"X-Amz-Date": "20221017T102940Z",
"X-Amz-Security-Token": "YQoJb3JpZ2luX2VjEDoaCXVzLXdlc3QtMiJGMEQCID/fFxZP5oaEgQmcwP6XhZa0xSq9lmLSx8ffaWbySfUPAiAesa7sjd/WV4uwRTO0S03y/MWVtgpH+/NyZQ4bZgLVriqrAggTEAEaDDAzMzQwNzIyMjE1OSIMOeFOWhZIurMmAqjsKogCxMCqxX8ZjK0gacAkcDqBCyA7qTSLhdfKQIH/w7WpLBU1km+cRUWWCudan6gZsAq867DBaKEP7qI05DAWr9MChAkgUgyI8/G3Z23ET0gAedf3GsJbakB0F1kklx8jPmj4BPCht9RcTiXiJ5DxTS/cRCcalIQXmPFbaJSqpBusVG2EkWnm1v7VQrNPE2Os2b2P293vpbhwkyCEQiGRVva4Sw9D1sKvqSsK10QCRG+os6dFEOu1kARaXi6pStvR4OVmj7OYeAYjzaFchn7nz2CSae0M4IluiYQ01eQAywbfRo9DpKSmDM/DnPZWJnD/woLhaaaCrCxSSEaFsvGOHFhLd3Rknw1v0jADMILUtJoGOp4BpqKqyMz0CY3kpKL0jfR3ykTf/ge9wWVE0Alr7wRIkGCIURkhslGHqSyFRGoTqIXaxU+oPbwlw/0w/nYO7qQ6bTANOWye/wgw4h/NmJ6vU7wnZTXwREf1r6MF72++bE/fMk19LfVb8jN/qrUqAUXTc8gBAUxL5pgy8+oT/JnI2BkVrrLS4ilxEXP9Ahm+6GDUYXV4fBpqpZwdkzQ/5Gw=",
"x-amz-content-sha256": "UNSIGNED-PAYLOAD",
}


def test_s3v4_rest_signer_forbidden(requests_mock: Mocker) -> None:
requests_mock.post(
f"{TEST_URI}/v1/aws/s3/sign",
Expand Down