Skip to content

Commit

Permalink
feat(ingest/s3): support path_specs of different S3 buckets in the sa…
Browse files Browse the repository at this point in the history
…me recipe (#7514)
  • Loading branch information
harsha-mandadi-4026 authored Mar 15, 2023
1 parent 756a4f3 commit bf36c93
Show file tree
Hide file tree
Showing 15 changed files with 2,547 additions and 43 deletions.
1 change: 0 additions & 1 deletion gradle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,3 @@ org.gradle.internal.repository.initial.backoff=1000

# Needed to publish to Nexus from a sub-module
gnsp.disableApplyOnlyOnRootProjectEnforcement=true

Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ source:
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"
server: "http://localhost:8080"
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ source:
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"
server: "http://localhost:8080"
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ source:
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"
server: "http://localhost:8080"
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@ source:
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"
server: "http://localhost:8080"
1 change: 0 additions & 1 deletion metadata-ingestion/examples/recipes/s3_to_file.dhub.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,3 @@ sink:
type: "file"
config:
filename: "./s3_data_lake_mces.json"

Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ source:
email_domain: mycompany.com

classification:
enabled: True
enabled: True
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@ source:
sink:
type: "datahub-rest"
config:
server: "http://localhost:8080"
server: "http://localhost:8080"
1 change: 0 additions & 1 deletion metadata-ingestion/scripts/datahub_preflight.sh
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,3 @@ fi


printf "\n\e[38;2;0;255;0m✅ Preflight was successful\e[38;2;255;255;255m\n"

11 changes: 0 additions & 11 deletions metadata-ingestion/src/datahub/ingestion/source/s3/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from datahub.configuration.validate_field_rename import pydantic_renamed_field
from datahub.ingestion.source.aws.aws_common import AwsConnectionConfig
from datahub.ingestion.source.aws.path_spec import PathSpec
from datahub.ingestion.source.aws.s3_util import get_bucket_name
from datahub.ingestion.source.s3.profiling import DataLakeProfilerConfig

# hide annoying debug errors from py4j
Expand Down Expand Up @@ -92,16 +91,6 @@ def check_path_specs_and_infer_platform(
)
guessed_platform = guessed_platforms.pop()

# If platform is s3, check that they're all the same bucket.
if guessed_platform == "s3":
bucket_names = set(
get_bucket_name(path_spec.include) for path_spec in path_specs
)
if len(bucket_names) > 1:
raise ValueError(
f"All path_specs should reference the same s3 bucket. Got {bucket_names}"
)

# Ensure s3 configs aren't used for file sources.
if guessed_platform != "s3" and (
values.get("use_s3_object_tags") or values.get("use_s3_bucket_tags")
Expand Down
Loading

0 comments on commit bf36c93

Please sign in to comment.