Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingestion/tableau): verify role assignment to user in test_connection. #12042

Merged
merged 21 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
a5de5f4
test case for site role check
sid-acryl Dec 5, 2024
1e3af1c
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 5, 2024
9bc4bc7
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 5, 2024
7f7ed4c
enhance reporting
sid-acryl Dec 6, 2024
aef9813
test case
sid-acryl Dec 6, 2024
67eacde
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 6, 2024
9b8d394
address review comments
sid-acryl Dec 9, 2024
ab5a98f
Merge branch 'cus3116-failing-cli-0.14.1' of https://github.com/sid-a…
sid-acryl Dec 9, 2024
b7ca573
resolve conflict
sid-acryl Dec 9, 2024
c62007b
Update metadata-ingestion/src/datahub/ingestion/source/tableau/tablea…
sid-acryl Dec 10, 2024
c268772
Update metadata-ingestion/src/datahub/ingestion/source/tableau/tablea…
sid-acryl Dec 10, 2024
5e9b876
Update metadata-ingestion/src/datahub/ingestion/source/tableau/tablea…
sid-acryl Dec 10, 2024
e6cc1eb
Address review comments
sid-acryl Dec 10, 2024
18a884d
restructured the test
sid-acryl Dec 10, 2024
d504594
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 10, 2024
f7129af
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 11, 2024
5975131
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 11, 2024
8434e0a
Update metadata-ingestion/src/datahub/ingestion/source/tableau/tablea…
sid-acryl Dec 12, 2024
9f489f5
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 12, 2024
3adbf28
address review comments
sid-acryl Dec 12, 2024
36aad78
Merge branch 'master' into cus3116-failing-cli-0.14.1
sid-acryl Dec 16, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 42 additions & 2 deletions metadata-ingestion/src/datahub/ingestion/source/tableau/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@
tableau_field_to_schema_field,
workbook_graphql_query,
)
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo
from datahub.ingestion.source.tableau.tableau_validation import check_user_role
from datahub.metadata.com.linkedin.pegasus2avro.common import (
AuditStamp,
ChangeAuditStamps,
Expand Down Expand Up @@ -167,7 +169,7 @@

try:
# On earlier versions of the tableauserverclient, the NonXMLResponseError
# was thrown when reauthentication was needed. We'll keep both exceptions
# was thrown when reauthentication was necessary. We'll keep both exceptions
# around for now, but can remove this in the future.
from tableauserverclient.server.endpoint.exceptions import ( # type: ignore
NotSignedInError,
Expand Down Expand Up @@ -632,6 +634,33 @@ class TableauSourceReport(StaleEntityRemovalSourceReport):
num_upstream_table_lineage_failed_parse_sql: int = 0
num_upstream_fine_grained_lineage_failed_parse_sql: int = 0
num_hidden_assets_skipped: int = 0
logged_in_user: List[UserInfo] = []


def report_user_role(report: TableauSourceReport, server: Server) -> None:
title: str = "Insufficient Permissions"
message: str = "The user must have the `Site Administrator Explorer` role to perform metadata ingestion."
try:
# TableauSiteSource instance is per site, so each time we need to find-out user detail
# the site-role might be different on another site
logged_in_user: UserInfo = UserInfo.from_server(server=server)

if not logged_in_user.is_site_administrator_explorer():
report.warning(
title=title,
message=message,
context=f"user-name={logged_in_user.user_name}, role={logged_in_user.site_role}, site_id={logged_in_user.site_id}",
)

report.logged_in_user.append(logged_in_user)

except Exception as e:
report.warning(
title=title,
message="Failed to verify the user's role. The user must have `Site Administrator Explorer` role.",
context=f"{e}",
exc=e,
)


@platform_name("Tableau")
Expand Down Expand Up @@ -676,6 +705,7 @@ def _authenticate(self, site_content_url: str) -> None:
try:
logger.info(f"Authenticated to Tableau site: '{site_content_url}'")
self.server = self.config.make_tableau_client(site_content_url)
report_user_role(report=self.report, server=self.server)
# Note that we're not catching ConfigurationError, since we want that to throw.
except ValueError as e:
self.report.failure(
Expand All @@ -689,9 +719,17 @@ def test_connection(config_dict: dict) -> TestConnectionReport:
test_report = TestConnectionReport()
try:
source_config = TableauConfig.parse_obj_allow_extras(config_dict)
source_config.make_tableau_client(source_config.site)

server = source_config.make_tableau_client(source_config.site)

test_report.basic_connectivity = CapabilityReport(capable=True)

test_report.capability_report = check_user_role(
logged_in_user=UserInfo.from_server(server=server)
)

except Exception as e:
logger.warning(f"{e}", exc_info=e)
test_report.basic_connectivity = CapabilityReport(
capable=False, failure_reason=str(e)
)
Expand Down Expand Up @@ -831,6 +869,8 @@ def __init__(
# when emitting custom SQL data sources.
self.custom_sql_ids_being_used: List[str] = []

report_user_role(report=report, server=server)

@property
def no_env_browse_prefix(self) -> str:
# Prefix to use with browse path (v1)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,3 +81,5 @@
PROJECT = "Project"
SITE = "Site"
IS_UNSUPPORTED_CUSTOM_SQL = "isUnsupportedCustomSql"
SITE_PERMISSION = "sitePermission"
SITE_ROLE = "SiteAdministratorExplorer"
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from dataclasses import dataclass

from tableauserverclient import Server, UserItem

from datahub.ingestion.source.tableau import tableau_constant as c


@dataclass
class UserInfo:
user_name: str
site_role: str
site_id: str

def is_site_administrator_explorer(self):
return self.site_role == c.SITE_ROLE

@staticmethod
def from_server(server: Server) -> "UserInfo":
assert server.user_id, "make the connection with tableau"

user: UserItem = server.users.get_by_id(server.user_id)

assert user.site_role, "site_role is not available" # to silent the lint

assert user.name, "user name is not available" # to silent the lint

assert server.site_id, "site identifier is not available" # to silent the lint

return UserInfo(
user_name=user.name,
site_role=user.site_role,
site_id=server.site_id,
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
import logging
from typing import Dict, Union

from datahub.ingestion.api.source import CapabilityReport, SourceCapability
from datahub.ingestion.source.tableau import tableau_constant as c
from datahub.ingestion.source.tableau.tableau_server_wrapper import UserInfo

logger = logging.getLogger(__name__)


def check_user_role(
logged_in_user: UserInfo,
) -> Dict[Union[SourceCapability, str], CapabilityReport]:
capability_dict: Dict[Union[SourceCapability, str], CapabilityReport] = {
c.SITE_PERMISSION: CapabilityReport(
capable=True,
)
}

failure_reason: str = (
"The user does not have the `Site Administrator Explorer` role."
)

mitigation_message_prefix: str = (
"Assign `Site Administrator Explorer` role to the user"
)
mitigation_message_suffix: str = "Refer to the setup guide: https://datahubproject.io/docs/quick-ingestion-guides/tableau/setup"

try:
# TODO: Add check for `Enable Derived Permissions`
if not logged_in_user.is_site_administrator_explorer():
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
capable=False,
failure_reason=f"{failure_reason} Their current role is {logged_in_user.site_role}.",
mitigation_message=f"{mitigation_message_prefix} `{logged_in_user.user_name}`. {mitigation_message_suffix}",
)

return capability_dict

except Exception as e:
logger.warning(msg=e, exc_info=e)
capability_dict[c.SITE_PERMISSION] = CapabilityReport(
capable=False,
failure_reason="Failed to verify user role.",
mitigation_message=f"{mitigation_message_prefix}. {mitigation_message_suffix}", # user is unknown
)

return capability_dict
147 changes: 102 additions & 45 deletions metadata-ingestion/tests/integration/tableau/test_tableau_ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import pytest
from freezegun import freeze_time
from pydantic import ValidationError
from requests.adapters import ConnectionError
from tableauserverclient import PermissionsRule, Server
from tableauserverclient.models import (
Expand All @@ -21,7 +22,9 @@

from datahub.emitter.mce_builder import DEFAULT_ENV, make_schema_field_urn
from datahub.emitter.mcp import MetadataChangeProposalWrapper
from datahub.ingestion.run.pipeline import Pipeline, PipelineContext, PipelineInitError
from datahub.ingestion.api.source import TestConnectionReport
from datahub.ingestion.run.pipeline import Pipeline, PipelineContext
from datahub.ingestion.source.tableau import tableau_constant as c
from datahub.ingestion.source.tableau.tableau import (
TableauConfig,
TableauSiteSource,
Expand Down Expand Up @@ -571,52 +574,28 @@ def test_extract_all_project(pytestconfig, tmp_path, mock_datahub_graph):
def test_value_error_projects_and_project_pattern(
pytestconfig, tmp_path, mock_datahub_graph
):
# Ingestion should raise ValueError
output_file_name: str = "tableau_project_pattern_precedence_mces.json"
golden_file_name: str = "tableau_project_pattern_precedence_mces_golden.json"

new_config = config_source_default.copy()
new_config["projects"] = ["default"]
new_config["project_pattern"] = {"allow": ["^Samples$"]}

with pytest.raises(
PipelineInitError,
ValidationError,
match=r".*projects is deprecated. Please use project_path_pattern only.*",
):
tableau_ingest_common(
pytestconfig,
tmp_path,
mock_data(),
golden_file_name,
output_file_name,
mock_datahub_graph,
pipeline_config=new_config,
)
TableauConfig.parse_obj(new_config)


def test_project_pattern_deprecation(pytestconfig, tmp_path, mock_datahub_graph):
# Ingestion should raise ValueError
output_file_name: str = "tableau_project_pattern_deprecation_mces.json"
golden_file_name: str = "tableau_project_pattern_deprecation_mces_golden.json"

new_config = config_source_default.copy()
del new_config["projects"]
new_config["project_pattern"] = {"allow": ["^Samples$"]}
new_config["project_path_pattern"] = {"allow": ["^Samples$"]}

with pytest.raises(
PipelineInitError,
ValidationError,
match=r".*project_pattern is deprecated. Please use project_path_pattern only*",
):
tableau_ingest_common(
pytestconfig,
tmp_path,
mock_data(),
golden_file_name,
output_file_name,
mock_datahub_graph,
pipeline_config=new_config,
)
TableauConfig.parse_obj(new_config)


def test_project_path_pattern_allow(pytestconfig, tmp_path, mock_datahub_graph):
Expand Down Expand Up @@ -1296,31 +1275,21 @@ def test_hidden_asset_tags(pytestconfig, tmp_path, mock_datahub_graph):
@pytest.mark.integration
def test_hidden_assets_without_ingest_tags(pytestconfig, tmp_path, mock_datahub_graph):
enable_logging()
output_file_name: str = "tableau_hidden_asset_tags_error_mces.json"
golden_file_name: str = "tableau_hidden_asset_tags_error_mces_golden.json"

new_config = config_source_default.copy()
new_config["tags_for_hidden_assets"] = ["hidden", "private"]
new_config["ingest_tags"] = False

with pytest.raises(
PipelineInitError,
ValidationError,
match=r".*tags_for_hidden_assets is only allowed with ingest_tags enabled.*",
):
tableau_ingest_common(
pytestconfig,
tmp_path,
mock_data(),
golden_file_name,
output_file_name,
mock_datahub_graph,
pipeline_config=new_config,
)
TableauConfig.parse_obj(new_config)


@freeze_time(FROZEN_TIME)
@pytest.mark.integration
def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_graph):
def test_permission_warning(pytestconfig, tmp_path, mock_datahub_graph):
with mock.patch(
"datahub.ingestion.source.state_provider.datahub_ingestion_checkpointing_provider.DataHubGraph",
mock_datahub_graph,
Expand Down Expand Up @@ -1357,11 +1326,99 @@ def test_permission_mode_switched_error(pytestconfig, tmp_path, mock_datahub_gra

warnings = list(reporter.warnings)

assert len(warnings) == 1
assert len(warnings) == 2

assert warnings[0].title == "Insufficient Permissions"

assert warnings[0].title == "Derived Permission Error"
assert warnings[1].title == "Derived Permission Error"

assert warnings[0].message == (
assert warnings[1].message == (
"Turn on your derived permissions. See for details "
"https://community.tableau.com/s/question/0D54T00000QnjHbSAJ/how-to-fix-the-permissionsmodeswitched-error"
)


@freeze_time(FROZEN_TIME)
@pytest.mark.integration
def test_connection_report_test(requests_mock):
server_info_response = """
<tsResponse xmlns:t="http://tableau.com/api">
<t:serverInfo>
<t:productVersion build="build-number">foo</t:productVersion>
<t:restApiVersion>2.4</t:restApiVersion>
</t:serverInfo>
</tsResponse>

"""

requests_mock.register_uri(
"GET",
"https://do-not-connect/api/2.4/serverInfo",
text=server_info_response,
status_code=200,
headers={"Content-Type": "application/xml"},
)

signin_response = """
<tsResponse xmlns:t="http://tableau.com/api">
<t:credentials token="fake_token">
<t:site id="fake_site_luid" contentUrl="fake_site_content_url"/>
<t:user id="fake_user_id"/>
</t:credentials>
</tsResponse>
"""

requests_mock.register_uri(
"POST",
"https://do-not-connect/api/2.4/auth/signin",
text=signin_response,
status_code=200,
headers={"Content-Type": "application/xml"},
)

user_by_id_response = """
<tsResponse xmlns:t="http://tableau.com/api">
<t:user id="user-id" name="[email protected]" siteRole="SiteAdministratorExplorer" />
</tsResponse>
"""

requests_mock.register_uri(
"GET",
"https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
text=user_by_id_response,
status_code=200,
headers={"Content-Type": "application/xml"},
)

report: TestConnectionReport = TableauSource.test_connection(config_source_default)

assert report
assert report.capability_report
assert report.capability_report.get(c.SITE_PERMISSION)
assert report.capability_report[c.SITE_PERMISSION].capable

# Role other than SiteAdministratorExplorer
user_by_id_response = """
<tsResponse xmlns:t="http://tableau.com/api">
<t:user id="user-id" name="[email protected]" siteRole="Explorer" />
</tsResponse>
"""

requests_mock.register_uri(
"GET",
"https://do-not-connect/api/2.4/sites/fake_site_luid/users/fake_user_id",
text=user_by_id_response,
status_code=200,
headers={"Content-Type": "application/xml"},
)

report = TableauSource.test_connection(config_source_default)

assert report
assert report.capability_report
assert report.capability_report.get(c.SITE_PERMISSION)
assert report.capability_report[c.SITE_PERMISSION].capable is False
assert (
report.capability_report[c.SITE_PERMISSION].failure_reason
== "The user does not have the `Site Administrator Explorer` role. Their current role is Explorer."
)
Loading