Skip to content

Commit

Permalink
fix(cli/lite): fix datahub lite serve command (#7089)
Browse files Browse the repository at this point in the history
  • Loading branch information
hsheth2 authored Jan 20, 2023
1 parent d53b3f4 commit 13cc16f
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 29 deletions.
4 changes: 2 additions & 2 deletions docs/datahub_lite.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,10 @@ The following features are **NOT** supported:

## Prerequisites

There are no pre-requisites for DataHub Lite other than having a Python 3.7+ environment and a [`acryl-datahub`](https://pypi.org/project/acryl-datahub/) > 0.9.6. Install the `datahub` Python cli using the [instructions](./cli.md#using-pip).
To use `datahub lite` commands, you need to install [`acryl-datahub`](https://pypi.org/project/acryl-datahub/) > 0.9.6 ([install instructions](./cli.md#using-pip)) and the `datahub-lite` plugin.

```shell
pip install acryl-datahub
pip install acryl-datahub[datahub-lite]
```

## Importing Metadata
Expand Down
5 changes: 4 additions & 1 deletion metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,10 @@ def get_long_description():
# Sink plugins.
"datahub-kafka": kafka_common,
"datahub-rest": rest_common,
"datahub-lite": set(),
"datahub-lite": {
"fastapi",
"uvicorn",
},
# Integrations.
"airflow": {
"apache-airflow >= 2.0.2",
Expand Down
5 changes: 2 additions & 3 deletions metadata-ingestion/src/datahub/cli/lite_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,10 @@ def serve(port: int) -> None:
uvicorn.run(app, port=port)


@lite.command(context_settings=dict(allow_extra_args=True))
@lite.command()
@click.argument("path", required=False, type=CompleteablePath())
@click.pass_context
@telemetry.with_telemetry
def ls(ctx: click.Context, path: Optional[str]) -> None:
def ls(path: Optional[str]) -> None:
"""List at a path"""

start_time = time.time()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -245,7 +245,7 @@ class SQLAlchemyConfig(StatefulIngestionConfigBase):
)
profile_pattern: AllowDenyPattern = Field(
default=AllowDenyPattern.allow_all(),
description="Regex patterns to filter tables for profiling during ingestion. Allowed by the `table_pattern`.",
description="Regex patterns to filter tables (or specific columns) for profiling during ingestion. Note that only tables allowed by the `table_pattern` will be considered.",
)
domain: Dict[str, AllowDenyPattern] = Field(
default=dict(),
Expand Down
3 changes: 3 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/tableau.py
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,10 @@ def _populate_usage_stat_registry(self):
if self.server is None:
return

view: TSC.ViewItem
for view in TSC.Pager(self.server.views, usage=True):
if not view.id:
continue
self.tableau_stat_registry[view.id] = UsageStat(view_count=view.total_views)
logger.debug("Tableau stats %s", self.tableau_stat_registry)

Expand Down
44 changes: 22 additions & 22 deletions metadata-ingestion/src/datahub/lite/lite_server.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
from typing import Dict, Iterable, List, Optional, Union
from typing import Dict, List, Optional, Union

from fastapi import Depends, FastAPI, Query
from fastapi import Depends, FastAPI, HTTPException, Query
from fastapi.responses import RedirectResponse

from datahub.lite.lite_local import (
Expand All @@ -10,7 +10,6 @@
Searchable,
SearchFlavor,
)
from datahub.metadata.schema_classes import SystemMetadataClass, _Aspect

app = FastAPI()
logger = logging.getLogger(__name__)
Expand All @@ -22,8 +21,8 @@ def redirect_to_docs():
return RedirectResponse(app.docs_url)


@app.get("/ping") # type: ignore
def ping() -> dict: # type: ignore
@app.get("/ping")
def ping() -> dict:
return {"ping": "pong"}


Expand All @@ -34,44 +33,45 @@ def lite() -> DataHubLiteLocal:
return lite


@app.get("/entities") # type: ignore
def entities_list(lite: DataHubLiteLocal = Depends(lite)) -> Iterable[str]: # type: ignore
@app.get("/entities")
def entities_list(lite: DataHubLiteLocal = Depends(lite)) -> List[str]:
# TODO add some filtering capabilities
return lite.list_ids()
return list(lite.list_ids())


@app.get("/entities/{id}") # type: ignore
def entities_get( # type: ignore
@app.get("/entities/{id}")
def entities_get(
id: str,
aspects: Optional[List[str]] = Query(None),
lite: DataHubLiteLocal = Depends(lite),
) -> Optional[
Dict[str, Union[str, Dict[str, Union[dict, _Aspect, SystemMetadataClass]]]]
]:
) -> Dict[str, Union[str, Dict[str, dict]]]:
# Queried as GET /entities/<url-encoded urn>?aspects=aspect1&aspects=aspect2&...
logger.warning(f"get {id} aspects={aspects}")
return lite.get(id, aspects=aspects)
entities = lite.get(id, aspects=aspects, typed=False)
if not entities:
raise HTTPException(status_code=404, detail="Entity not found")
return entities # type: ignore


@app.get("/browse") # type: ignore
def browse( # type: ignore
@app.get("/browse")
def browse(
path: str = Query("/"),
catalog: DataHubLiteLocal = Depends(lite),
) -> Iterable[Browseable]:
) -> List[Browseable]:
# Queried as GET /browse/?path=<url-encoded-path>
logger.info(f"browse {path}")
return catalog.ls(path)
return list(catalog.ls(path))


@app.get("/search") # type: ignore
def search( # type: ignore
@app.get("/search")
def search(
query: str = Query("*"),
flavor: SearchFlavor = Query(SearchFlavor.FREE_TEXT),
lite: DataHubLiteLocal = Depends(lite),
) -> Iterable[Searchable]:
) -> List[Searchable]:
# Queried as GET /search/?query=<url-encoded-query>
logger.info(f"search {query}")
return lite.search(query=query, flavor=flavor)
return list(lite.search(query=query, flavor=flavor))


# TODO put command

0 comments on commit 13cc16f

Please sign in to comment.