Skip to content

Commit

Permalink
Merge pull request #120 from lsst-sqre/tickets/DM-41550
Browse files Browse the repository at this point in the history
tickets/DM-41550: Allow empty credentials/Workload Identity
  • Loading branch information
rufuspollock authored Nov 22, 2023
2 parents c0935ce + 63cd715 commit 903aacb
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 6 deletions.
6 changes: 6 additions & 0 deletions docs/source/storage-backends.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,12 @@ TRANSFER_ADAPTERS:
bucket_name: git-lfs
account_key_base64: S0m3B4se64RandomStuff.....ThatI5Redac7edHeReF0rRead4b1lity==
```

If you have Workload Identity configured, you can omit the account key
entirely, in which case you will need to supply `serviceaccount_email`
instead to define which Google service account to bind to. That service
account must have the ability to issue tokens in order to generate
signed URLs.
### Amazon S3 Storage

#### `giftless.storage.amazon_s3:AmazonS3Storage`
Expand Down
48 changes: 42 additions & 6 deletions giftless/storage/google_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from datetime import timedelta
from typing import Any, BinaryIO, Dict, Optional

import google.auth
from google.auth import impersonated_credentials
from google.cloud import storage # type: ignore
from google.oauth2 import service_account # type: ignore

Expand All @@ -18,12 +20,25 @@ class GoogleCloudStorage(StreamingStorage, ExternalStorage):
transfers.
"""

def __init__(self, project_name: str, bucket_name: str, account_key_file: Optional[str] = None,
account_key_base64: Optional[str] = None, path_prefix: Optional[str] = None, **_):
def __init__(self,
project_name: str,
bucket_name: str,
account_key_file: Optional[str] = None,
account_key_base64: Optional[str] = None,
path_prefix: Optional[str] = None,
serviceaccount_email: Optional[str] = None,
**_):
self.bucket_name = bucket_name
self.path_prefix = path_prefix
self.credentials = self._load_credentials(account_key_file, account_key_base64)
self.credentials: Optional[service_account.Credentials] = self._load_credentials(account_key_file, account_key_base64)
self.storage_client = storage.Client(project=project_name, credentials=self.credentials)
if not self.credentials:
if not serviceaccount_email:
raise ValueError(
"If no account key is given, serviceaccount_email must "
"be set in order to use workload identity."
)
self._serviceaccount_email=serviceaccount_email

def get(self, prefix: str, oid: str) -> BinaryIO:
bucket = self.storage_client.bucket(self.bucket_name)
Expand Down Expand Up @@ -94,19 +109,22 @@ def _get_blob_path(self, prefix: str, oid: str) -> str:

def _get_signed_url(self, prefix: str, oid: str, expires_in: int, http_method: str = 'GET',
filename: Optional[str] = None, disposition: Optional[str] = None) -> str:
creds = self.credentials
if creds is None:
creds = self._get_workload_identity_credentials(expires_in)
bucket = self.storage_client.bucket(self.bucket_name)
blob = bucket.blob(self._get_blob_path(prefix, oid))
disposition = f'attachment; filename={filename}' if filename else None
if filename and disposition:
disposition = f'{disposition}; filename="{filename}"'

url: str = blob.generate_signed_url(expiration=timedelta(seconds=expires_in), method=http_method, version='v4',
response_disposition=disposition, credentials=self.credentials)
response_disposition=disposition, credentials=creds)
return url

@staticmethod
def _load_credentials(account_key_file: Optional[str], account_key_base64: Optional[str]) \
-> service_account.Credentials:
-> Optional[service_account.Credentials]:
"""Load Google Cloud credentials from passed configuration
"""
if account_key_file and account_key_base64:
Expand All @@ -117,4 +135,22 @@ def _load_credentials(account_key_file: Optional[str], account_key_base64: Optio
account_info = json.loads(base64.b64decode(account_key_base64))
return service_account.Credentials.from_service_account_info(account_info)
else:
raise ValueError('You must provide either account_key_file or account_key_base64')
return None # Will use Workload Identity if available

def _get_workload_identity_credentials(self, expires_in: int) -> None:
lifetime = expires_in
if lifetime > 3600:
lifetime = 3600 # Signing credentials are good for one hour max
email = self._serviceaccount_email
def_creds, _ = google.auth.default()
# Do the switcheroo: impersonate ourselves with an account that can
# grant a temporary signing token
return impersonated_credentials.Credentials(
source_credentials=def_creds,
target_principal=email,
target_scopes=(
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/devstorage.read_write"
),
lifetime=lifetime
)

0 comments on commit 903aacb

Please sign in to comment.