Skip to content

Commit

Permalink
config(rest-api): enable authentication and api authorization by defa…
Browse files Browse the repository at this point in the history
…ult (#11484)
  • Loading branch information
david-leifker authored Sep 27, 2024
1 parent 6a58493 commit 4296373
Show file tree
Hide file tree
Showing 76 changed files with 1,864 additions and 1,950 deletions.
2 changes: 1 addition & 1 deletion datahub-frontend/run/frontend.env
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ ELASTIC_CLIENT_PORT=9200
# AUTH_JAAS_ENABLED=false

# Change to disable Metadata Service Authentication
METADATA_SERVICE_AUTH_ENABLED=true
# METADATA_SERVICE_AUTH_ENABLED=false

# Change to override max header count defaults
DATAHUB_AKKA_MAX_HEADER_COUNT=64
Expand Down
4 changes: 2 additions & 2 deletions docker/datahub-frontend/env/docker.env
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ JAVA_OPTS=-Xms512m -Xmx512m -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf
# SSL_TRUSTSTORE_TYPE=jks
# SSL_TRUSTSTORE_PASSWORD=MyTruststorePassword

# Uncomment to enable Metadata Service Authentication
# METADATA_SERVICE_AUTH_ENABLED=true
# Uncomment to disable Metadata Service Authentication
# METADATA_SERVICE_AUTH_ENABLED=false

# Uncomment & populate these configs to enable OIDC SSO in React application.
# Required OIDC configs
Expand Down
4 changes: 2 additions & 2 deletions docker/datahub-gms/env/docker.env
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ MCE_CONSUMER_ENABLED=true
PE_CONSUMER_ENABLED=true
UI_INGESTION_ENABLED=true

# Uncomment to enable Metadata Service Authentication
METADATA_SERVICE_AUTH_ENABLED=false
# Uncomment to disable Metadata Service Authentication
# METADATA_SERVICE_AUTH_ENABLED=false

# Uncomment to disable persistence of client-side analytics events
# DATAHUB_ANALYTICS_ENABLED=false
Expand Down
2 changes: 2 additions & 0 deletions docker/docker-compose-with-cassandra.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ services:
context: ../
dockerfile: docker/datahub-gms/Dockerfile
env_file: ./datahub-gms/env/docker.cassandra.env
environment:
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
healthcheck:
test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
start_period: 20s
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose-without-neo4j.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ services:
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
depends_on:
datahub-upgrade:
condition: service_completed_successfully
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose-without-neo4j.postgres.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ services:
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
depends_on:
datahub-upgrade:
condition: service_completed_successfully
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose-without-neo4j.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ services:
env_file: datahub-gms/env/docker-without-neo4j.env
environment:
- KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
healthcheck:
test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
start_period: 90s
Expand Down
2 changes: 1 addition & 1 deletion docker/docker-compose.dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ services:
- SKIP_ELASTICSEARCH_CHECK=false
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-dev}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- METADATA_SERVICE_AUTH_ENABLED=false
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
- JAVA_TOOL_OPTIONS=-agentlib:jdwp=transport=dt_socket,server=y,suspend=n,address=*:5001
- BOOTSTRAP_SYSTEM_UPDATE_WAIT_FOR_SYSTEM_UPDATE=false
- SEARCH_SERVICE_ENABLE_CACHE=false
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose.override.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ services:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-true}
- GRAPH_SERVICE_IMPL=${GRAPH_SERVICE_IMPL:-elasticsearch}
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-upgrade:
Expand Down
1 change: 1 addition & 0 deletions docker/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ services:
image: ${DATAHUB_GMS_IMAGE:-acryldata/datahub-gms}:${DATAHUB_VERSION:-head}
environment:
- KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
ports:
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
build:
Expand Down
1 change: 1 addition & 0 deletions docker/profiles/docker-compose.gms.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ x-datahub-gms-service: &datahub-gms-service
environment: &datahub-gms-env
<<: [*primary-datastore-mysql-env, *graph-datastore-search-env, *search-datastore-env, *datahub-quickstart-telemetry-env, *kafka-env]
ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE: ${ELASTICSEARCH_QUERY_CUSTOM_CONFIG_FILE:-search_config.yaml}
METADATA_SERVICE_AUTH_ENABLED: ${METADATA_SERVICE_AUTH_ENABLED:-true}
healthcheck:
test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
start_period: 90s
Expand Down
2 changes: 1 addition & 1 deletion docker/quickstart/docker-compose-m1.quickstart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=false
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
- NEO4J_HOST=http://neo4j:7474
- NEO4J_PASSWORD=datahub
- NEO4J_URI=bolt://neo4j
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
healthcheck:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
healthcheck:
Expand Down
2 changes: 1 addition & 1 deletion docker/quickstart/docker-compose.quickstart.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ services:
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- METADATA_SERVICE_AUTH_ENABLED=false
- METADATA_SERVICE_AUTH_ENABLED=${METADATA_SERVICE_AUTH_ENABLED:-false}
- NEO4J_HOST=http://neo4j:7474
- NEO4J_PASSWORD=datahub
- NEO4J_URI=bolt://neo4j
Expand Down
3 changes: 3 additions & 0 deletions docs/how/updating-datahub.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ This file documents any backwards-incompatible changes in DataHub and assists pe

### Breaking Changes

- Metadata service authentication enabled by default
- Rest API authorization enabled by default

### Potential Downtime

### Deprecations
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,60 +94,59 @@ def urn_must_be_present(cls, v, values):
return v

@staticmethod
def create(file: str) -> None:
emitter: DataHubGraph

with get_default_graph() as emitter:
with open(file) as fp:
structuredproperties: List[dict] = yaml.safe_load(fp)
for structuredproperty_raw in structuredproperties:
structuredproperty = StructuredProperties.parse_obj(
structuredproperty_raw
def create(file: str, graph: Optional[DataHubGraph] = None) -> None:
emitter: DataHubGraph = graph if graph else get_default_graph()

with open(file) as fp:
structuredproperties: List[dict] = yaml.safe_load(fp)
for structuredproperty_raw in structuredproperties:
structuredproperty = StructuredProperties.parse_obj(
structuredproperty_raw
)
if not structuredproperty.type.islower():
structuredproperty.type = structuredproperty.type.lower()
logger.warn(
f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
)
if not structuredproperty.type.islower():
structuredproperty.type = structuredproperty.type.lower()
logger.warn(
f"Structured property type should be lowercase. Updated to {structuredproperty.type}"
)
if not AllowedTypes.check_allowed_type(structuredproperty.type):
raise ValueError(
f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
)
mcp = MetadataChangeProposalWrapper(
entityUrn=structuredproperty.urn,
aspect=StructuredPropertyDefinitionClass(
qualifiedName=structuredproperty.fqn,
valueType=Urn.make_data_type_urn(structuredproperty.type),
displayName=structuredproperty.display_name,
description=structuredproperty.description,
entityTypes=[
Urn.make_entity_type_urn(entity_type)
for entity_type in structuredproperty.entity_types or []
],
cardinality=structuredproperty.cardinality,
immutable=structuredproperty.immutable,
allowedValues=(
[
PropertyValueClass(
value=v.value, description=v.description
)
for v in structuredproperty.allowed_values
]
if structuredproperty.allowed_values
else None
),
typeQualifier=(
{
"allowedTypes": structuredproperty.type_qualifier.allowed_types
}
if structuredproperty.type_qualifier
else None
),
),
if not AllowedTypes.check_allowed_type(structuredproperty.type):
raise ValueError(
f"Type {structuredproperty.type} is not allowed. Allowed types are {AllowedTypes.values()}"
)
emitter.emit_mcp(mcp)
mcp = MetadataChangeProposalWrapper(
entityUrn=structuredproperty.urn,
aspect=StructuredPropertyDefinitionClass(
qualifiedName=structuredproperty.fqn,
valueType=Urn.make_data_type_urn(structuredproperty.type),
displayName=structuredproperty.display_name,
description=structuredproperty.description,
entityTypes=[
Urn.make_entity_type_urn(entity_type)
for entity_type in structuredproperty.entity_types or []
],
cardinality=structuredproperty.cardinality,
immutable=structuredproperty.immutable,
allowedValues=(
[
PropertyValueClass(
value=v.value, description=v.description
)
for v in structuredproperty.allowed_values
]
if structuredproperty.allowed_values
else None
),
typeQualifier=(
{
"allowedTypes": structuredproperty.type_qualifier.allowed_types
}
if structuredproperty.type_qualifier
else None
),
),
)
emitter.emit_mcp(mcp)

logger.info(f"Created structured property {structuredproperty.urn}")
logger.info(f"Created structured property {structuredproperty.urn}")

@classmethod
def from_datahub(cls, graph: DataHubGraph, urn: str) -> "StructuredProperties":
Expand Down
5 changes: 3 additions & 2 deletions metadata-ingestion/src/datahub/cli/timeline_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from termcolor import colored

from datahub.emitter.mce_builder import dataset_urn_to_key, schema_field_urn_to_key
from datahub.ingestion.graph.client import get_default_graph
from datahub.ingestion.graph.client import DataHubGraph, get_default_graph
from datahub.telemetry import telemetry
from datahub.upgrade import upgrade
from datahub.utilities.urns.urn import Urn
Expand Down Expand Up @@ -62,8 +62,9 @@ def get_timeline(
start_time: Optional[int],
end_time: Optional[int],
diff: bool,
graph: Optional[DataHubGraph] = None,
) -> Any:
client = get_default_graph()
client = graph if graph else get_default_graph()
session = client._session
host = client.config.server
if urn.startswith("urn%3A"):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
import static org.testng.AssertJUnit.*;
import static org.testng.Assert.assertNotNull;
import static org.testng.Assert.assertTrue;

import com.linkedin.metadata.entity.EntityService;
import com.linkedin.metadata.entity.restoreindices.RestoreIndicesResult;
Expand All @@ -19,7 +20,8 @@
@ActiveProfiles("test")
@SpringBootTest(
webEnvironment = SpringBootTest.WebEnvironment.RANDOM_PORT,
classes = {MceConsumerApplication.class, MceConsumerApplicationTestConfiguration.class})
classes = {MceConsumerApplication.class, MceConsumerApplicationTestConfiguration.class},
properties = {"authentication.enabled=false"})
public class MceConsumerApplicationTest extends AbstractTestNGSpringContextTests {

@Autowired private TestRestTemplate restTemplate;
Expand All @@ -38,7 +40,7 @@ public void testRestliServletConfig() {
String response =
this.restTemplate.postForObject(
"/gms/aspects?action=restoreIndices", "{\"urn\":\"\"}", String.class);
assertTrue(response.contains(mockResult.toString()));
assertTrue(response.contains(mockResult.toString()), String.format("Found: %s", response));
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ baseUrl: ${DATAHUB_BASE_URL:http://localhost:9002}

# App Layer
authentication:
# Enable if you want all requests to the Metadata Service to be authenticated. Disabled by default.
enabled: ${METADATA_SERVICE_AUTH_ENABLED:false}
# Enable if you want all requests to the Metadata Service to be authenticated.
enabled: ${METADATA_SERVICE_AUTH_ENABLED:true}

# Required if enabled is true! A configurable chain of Authenticators
authenticators:
Expand Down Expand Up @@ -43,8 +43,8 @@ authorization:
enabled: ${AUTH_POLICIES_ENABLED:true}
cacheRefreshIntervalSecs: ${POLICY_CACHE_REFRESH_INTERVAL_SECONDS:120}
cachePolicyFetchSize: ${POLICY_CACHE_FETCH_SIZE:1000}
# Enables authorization of reads, writes, and deletes on REST APIs. Defaults to false for backwards compatibility, but should become true down the road
restApiAuthorization: ${REST_API_AUTHORIZATION_ENABLED:false}
# Enables authorization of reads, writes, and deletes on REST APIs.
restApiAuthorization: ${REST_API_AUTHORIZATION_ENABLED:true}
view:
enabled: ${VIEW_AUTHORIZATION_ENABLED:false}
recommendations:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import com.linkedin.metadata.dao.throttle.APIThrottleException;
import io.datahubproject.openapi.exception.InvalidUrnException;
import io.datahubproject.openapi.exception.UnauthorizedException;
import java.util.Map;
import lombok.extern.slf4j.Slf4j;
import org.springframework.beans.ConversionNotSupportedException;
Expand Down Expand Up @@ -45,4 +46,10 @@ public static ResponseEntity<Map<String, String>> handleThrottleException(
return new ResponseEntity<>(
Map.of("error", e.getMessage()), headers, HttpStatus.TOO_MANY_REQUESTS);
}

@ExceptionHandler(UnauthorizedException.class)
public static ResponseEntity<Map<String, String>> handleUnauthorizedException(
UnauthorizedException e) {
return new ResponseEntity<>(Map.of("error", e.getMessage()), HttpStatus.FORBIDDEN);
}
}
39 changes: 39 additions & 0 deletions smoke-test/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import os

import pytest
import requests
from datahub.ingestion.graph.client import DatahubClientConfig, DataHubGraph

from tests.test_result_msg import send_message
from tests.utils import (
TestSessionWrapper,
get_frontend_session,
wait_for_healthcheck_util,
)

# Disable telemetry
os.environ["DATAHUB_TELEMETRY_ENABLED"] = "false"


@pytest.fixture(scope="session")
def auth_session():
wait_for_healthcheck_util(requests)
auth_session = TestSessionWrapper(get_frontend_session())
yield auth_session
auth_session.destroy()


@pytest.fixture(scope="session")
def graph_client(auth_session) -> DataHubGraph:
print(auth_session.cookies)
graph: DataHubGraph = DataHubGraph(
config=DatahubClientConfig(
server=auth_session.gms_url(), token=auth_session.gms_token()
)
)
return graph


def pytest_sessionfinish(session, exitstatus):
"""whole test run finishes."""
send_message(exitstatus)
4 changes: 0 additions & 4 deletions smoke-test/requests_wrapper/__init__.py

This file was deleted.

Empty file.
Loading

0 comments on commit 4296373

Please sign in to comment.