Skip to content

Commit d593351

Browse files
authored
chore: Add project metadata to registry (#2938)
* chore: Add project metadata to registry Signed-off-by: Danny Chiao <[email protected]> * lint Signed-off-by: Danny Chiao <[email protected]> * add to file registry Signed-off-by: Danny Chiao <[email protected]> * add to sql registry Signed-off-by: Danny Chiao <[email protected]> * fixes Signed-off-by: Danny Chiao <[email protected]> * fixes Signed-off-by: Danny Chiao <[email protected]> * fixes Signed-off-by: Danny Chiao <[email protected]> * fix Signed-off-by: Danny Chiao <[email protected]> * lint Signed-off-by: Danny Chiao <[email protected]> * fix test Signed-off-by: Danny Chiao <[email protected]> * add tests Signed-off-by: Danny Chiao <[email protected]> * make backwards compatible Signed-off-by: Danny Chiao <[email protected]> * update makefile for passing local tests Signed-off-by: Danny Chiao <[email protected]> * update makefile for passing local tests Signed-off-by: Danny Chiao <[email protected]> * update makefile for passing local tests Signed-off-by: Danny Chiao <[email protected]> * update makefile for passing local tests Signed-off-by: Danny Chiao <[email protected]>
1 parent 476fccd commit d593351

File tree

18 files changed

+450
-68
lines changed

18 files changed

+450
-68
lines changed

.github/workflows/pr_local_integration_tests.yml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,6 +67,23 @@ jobs:
6767
sudo apt install -y -V libarrow-dev
6868
- name: Install dependencies
6969
run: make install-python-ci-dependencies
70+
- name: Set up gcloud SDK # TODO(adchia): remove this dependency
71+
uses: google-github-actions/setup-gcloud@v0
72+
with:
73+
project_id: ${{ secrets.GCP_PROJECT_ID }}
74+
service_account_key: ${{ secrets.GCP_SA_KEY }}
75+
export_default_credentials: true
76+
- name: Use gcloud CLI
77+
run: gcloud info
7078
- name: Test local integration tests
7179
if: ${{ always() }} # this will guarantee that step won't be canceled and resources won't leak
7280
run: make test-python-integration-local
81+
- name: Upload coverage to Codecov
82+
uses: codecov/codecov-action@v1
83+
with:
84+
token: ${{ secrets.CODECOV_TOKEN }}
85+
files: ./coverage.xml
86+
flags: localintegrationtests
87+
env_vars: OS,PYTHON
88+
fail_ci_if_error: true
89+
verbose: true

Makefile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,8 @@ test-python-integration-local:
7777
python -m pytest -n 8 --integration \
7878
-k "not test_apply_entity_integration and \
7979
not test_apply_feature_view_integration and \
80-
not test_apply_data_source_integration" \
80+
not test_apply_data_source_integration and \
81+
not test_lambda_materialization" \
8182
sdk/python/tests \
8283
) || echo "This script uses Docker, and it isn't running - please start the Docker Daemon and try again!";
8384

protos/feast/core/Registry.proto

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ import "feast/core/SavedDataset.proto";
3434
import "feast/core/ValidationProfile.proto";
3535
import "google/protobuf/timestamp.proto";
3636

37-
// Next id: 15
37+
// Next id: 16
3838
message Registry {
3939
repeated Entity entities = 1;
4040
repeated FeatureTable feature_tables = 2;
@@ -47,9 +47,15 @@ message Registry {
4747
repeated SavedDataset saved_datasets = 11;
4848
repeated ValidationReference validation_references = 13;
4949
Infra infra = 10;
50+
// Tracking metadata of Feast by project
51+
repeated ProjectMetadata project_metadata = 15;
5052

5153
string registry_schema_version = 3; // to support migrations; incremented when schema is changed
5254
string version_id = 4; // version id, random string generated on each update of the data; now used only for debugging purposes
5355
google.protobuf.Timestamp last_updated = 5;
56+
}
5457

58+
message ProjectMetadata {
59+
string project = 1;
60+
string project_uuid = 2;
5561
}
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
feast.infra.materialization package
2+
===================================
3+
4+
Submodules
5+
----------
6+
7+
feast.infra.materialization.batch\_materialization\_engine module
8+
-----------------------------------------------------------------
9+
10+
.. automodule:: feast.infra.materialization.batch_materialization_engine
11+
:members:
12+
:undoc-members:
13+
:show-inheritance:
14+
15+
feast.infra.materialization.local\_engine module
16+
------------------------------------------------
17+
18+
.. automodule:: feast.infra.materialization.local_engine
19+
:members:
20+
:undoc-members:
21+
:show-inheritance:
22+
23+
Module contents
24+
---------------
25+
26+
.. automodule:: feast.infra.materialization
27+
:members:
28+
:undoc-members:
29+
:show-inheritance:

sdk/python/docs/source/feast.infra.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Subpackages
77
.. toctree::
88
:maxdepth: 4
99

10+
feast.infra.materialization
1011
feast.infra.offline_stores
1112
feast.infra.online_stores
1213
feast.infra.registry_stores

sdk/python/docs/source/feast.rst

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ feast.online\_response module
225225
:undoc-members:
226226
:show-inheritance:
227227

228+
feast.project\_metadata module
229+
------------------------------
230+
231+
.. automodule:: feast.project_metadata
232+
:members:
233+
:undoc-members:
234+
:show-inheritance:
235+
228236
feast.proto\_json module
229237
------------------------
230238

@@ -273,6 +281,14 @@ feast.repo\_operations module
273281
:undoc-members:
274282
:show-inheritance:
275283

284+
feast.repo\_upgrade module
285+
--------------------------
286+
287+
.. automodule:: feast.repo_upgrade
288+
:members:
289+
:undoc-members:
290+
:show-inheritance:
291+
276292
feast.request\_feature\_view module
277293
-----------------------------------
278294

sdk/python/feast/driver_test_data.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ def create_driver_hourly_stats_df(drivers, start_date, end_date) -> pd.DataFrame
9898
"event_timestamp": [
9999
pd.Timestamp(dt, unit="ms", tz="UTC").round("ms")
100100
for dt in pd.date_range(
101-
start=start_date, end=end_date, freq="1H", closed="left"
101+
start=start_date, end=end_date, freq="1H", inclusive="left"
102102
)
103103
]
104104
# include a fixed timestamp for get_historical_features in the quickstart

sdk/python/feast/feature_store.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def __init__(
142142
self._registry = SqlRegistry(registry_config, None)
143143
else:
144144
r = Registry(registry_config, repo_path=self.repo_path)
145-
r._initialize_registry()
145+
r._initialize_registry(self.config.project)
146146
self._registry = r
147147
self._provider = get_provider(self.config, self.repo_path)
148148
self._go_server = None
@@ -183,7 +183,7 @@ def refresh_registry(self):
183183
"""
184184
registry_config = self.config.get_registry_config()
185185
registry = Registry(registry_config, repo_path=self.repo_path)
186-
registry.refresh()
186+
registry.refresh(self.config.project)
187187

188188
self._registry = registry
189189

@@ -704,7 +704,7 @@ def plan(
704704

705705
# Compute the desired difference between the current infra, as stored in the registry,
706706
# and the desired infra.
707-
self._registry.refresh()
707+
self._registry.refresh(self.project)
708708
current_infra_proto = self._registry.proto().infra.__deepcopy__()
709709
desired_registry_proto = desired_repo_contents.to_registry_proto()
710710
new_infra = self._provider.plan_infra(self.config, desired_registry_proto)

sdk/python/feast/infra/registry_stores/sql.py

Lines changed: 66 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1+
import uuid
12
from datetime import datetime
3+
from enum import Enum
24
from pathlib import Path
35
from typing import Any, List, Optional, Set, Union
46

@@ -17,6 +19,7 @@
1719
)
1820
from sqlalchemy.engine import Engine
1921

22+
from feast import usage
2023
from feast.base_feature_view import BaseFeatureView
2124
from feast.data_source import DataSource
2225
from feast.entity import Entity
@@ -32,6 +35,7 @@
3235
from feast.feature_view import FeatureView
3336
from feast.infra.infra_object import Infra
3437
from feast.on_demand_feature_view import OnDemandFeatureView
38+
from feast.project_metadata import ProjectMetadata
3539
from feast.protos.feast.core.DataSource_pb2 import DataSource as DataSourceProto
3640
from feast.protos.feast.core.Entity_pb2 import Entity as EntityProto
3741
from feast.protos.feast.core.FeatureService_pb2 import (
@@ -156,6 +160,12 @@
156160
Column("infra_proto", LargeBinary, nullable=False),
157161
)
158162

163+
164+
class FeastMetadataKeys(Enum):
165+
LAST_UPDATED_TIMESTAMP = "last_updated_timestamp"
166+
PROJECT_UUID = "project_uuid"
167+
168+
159169
feast_metadata = Table(
160170
"feast_metadata",
161171
metadata,
@@ -189,7 +199,7 @@ def teardown(self):
189199
stmt = delete(t)
190200
conn.execute(stmt)
191201

192-
def refresh(self):
202+
def refresh(self, project: Optional[str]):
193203
# This method is a no-op since we're always reading the latest values from the db.
194204
pass
195205

@@ -459,6 +469,22 @@ def list_on_demand_feature_views(
459469
"feature_view_proto",
460470
)
461471

472+
def list_project_metadata(
473+
self, project: str, allow_cache: bool = False
474+
) -> List[ProjectMetadata]:
475+
with self.engine.connect() as conn:
476+
stmt = select(feast_metadata).where(feast_metadata.c.project_id == project,)
477+
rows = conn.execute(stmt).all()
478+
if rows:
479+
project_metadata = ProjectMetadata(project_name=project)
480+
for row in rows:
481+
if row["metadata_key"] == FeastMetadataKeys.PROJECT_UUID.value:
482+
project_metadata.project_uuid = row["metadata_value"]
483+
break
484+
# TODO(adchia): Add other project metadata in a structured way
485+
return [project_metadata]
486+
return []
487+
462488
def apply_saved_dataset(
463489
self, saved_dataset: SavedDataset, project: str, commit: bool = True,
464490
):
@@ -629,6 +655,7 @@ def proto(self) -> RegistryProto:
629655
(self.list_feature_services, r.feature_services),
630656
(self.list_saved_datasets, r.saved_datasets),
631657
(self.list_validation_references, r.validation_references),
658+
(self.list_project_metadata, r.project_metadata),
632659
]:
633660
objs: List[Any] = lister(project) # type: ignore
634661
if objs:
@@ -651,14 +678,16 @@ def commit(self):
651678
def _apply_object(
652679
self, table, project: str, id_field_name, obj, proto_field_name, name=None
653680
):
681+
self._maybe_init_project_metadata(project)
682+
654683
name = name or obj.name
655684
with self.engine.connect() as conn:
685+
update_datetime = datetime.utcnow()
686+
update_time = int(update_datetime.timestamp())
656687
stmt = select(table).where(
657688
getattr(table.c, id_field_name) == name, table.c.project_id == project
658689
)
659690
row = conn.execute(stmt).first()
660-
update_datetime = datetime.utcnow()
661-
update_time = int(update_datetime.timestamp())
662691
if hasattr(obj, "last_updated_timestamp"):
663692
obj.last_updated_timestamp = update_datetime
664693

@@ -685,6 +714,30 @@ def _apply_object(
685714

686715
self._set_last_updated_metadata(update_datetime, project)
687716

717+
def _maybe_init_project_metadata(self, project):
718+
# Initialize project metadata if needed
719+
with self.engine.connect() as conn:
720+
update_datetime = datetime.utcnow()
721+
update_time = int(update_datetime.timestamp())
722+
stmt = select(feast_metadata).where(
723+
feast_metadata.c.metadata_key == FeastMetadataKeys.PROJECT_UUID.value,
724+
feast_metadata.c.project_id == project,
725+
)
726+
row = conn.execute(stmt).first()
727+
if row:
728+
usage.set_current_project_uuid(row["metadata_value"])
729+
else:
730+
new_project_uuid = f"{uuid.uuid4()}"
731+
values = {
732+
"metadata_key": FeastMetadataKeys.PROJECT_UUID.value,
733+
"metadata_value": new_project_uuid,
734+
"last_updated_timestamp": update_time,
735+
"project_id": project,
736+
}
737+
insert_stmt = insert(feast_metadata).values(values)
738+
conn.execute(insert_stmt)
739+
usage.set_current_project_uuid(new_project_uuid)
740+
688741
def _delete_object(self, table, name, project, id_field_name, not_found_exception):
689742
with self.engine.connect() as conn:
690743
stmt = delete(table).where(
@@ -708,6 +761,8 @@ def _get_object(
708761
proto_field_name,
709762
not_found_exception,
710763
):
764+
self._maybe_init_project_metadata(project)
765+
711766
with self.engine.connect() as conn:
712767
stmt = select(table).where(
713768
getattr(table.c, id_field_name) == name, table.c.project_id == project
@@ -721,6 +776,7 @@ def _get_object(
721776
def _list_objects(
722777
self, table, project, proto_class, python_class, proto_field_name
723778
):
779+
self._maybe_init_project_metadata(project)
724780
with self.engine.connect() as conn:
725781
stmt = select(table).where(table.c.project_id == project)
726782
rows = conn.execute(stmt).all()
@@ -736,15 +792,16 @@ def _list_objects(
736792
def _set_last_updated_metadata(self, last_updated: datetime, project: str):
737793
with self.engine.connect() as conn:
738794
stmt = select(feast_metadata).where(
739-
feast_metadata.c.metadata_key == "last_updated_timestamp",
795+
feast_metadata.c.metadata_key
796+
== FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value,
740797
feast_metadata.c.project_id == project,
741798
)
742799
row = conn.execute(stmt).first()
743800

744801
update_time = int(last_updated.timestamp())
745802

746803
values = {
747-
"metadata_key": "last_updated_timestamp",
804+
"metadata_key": FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value,
748805
"metadata_value": f"{update_time}",
749806
"last_updated_timestamp": update_time,
750807
"project_id": project,
@@ -753,7 +810,8 @@ def _set_last_updated_metadata(self, last_updated: datetime, project: str):
753810
update_stmt = (
754811
update(feast_metadata)
755812
.where(
756-
feast_metadata.c.metadata_key == "last_updated_timestamp",
813+
feast_metadata.c.metadata_key
814+
== FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value,
757815
feast_metadata.c.project_id == project,
758816
)
759817
.values(values)
@@ -766,7 +824,8 @@ def _set_last_updated_metadata(self, last_updated: datetime, project: str):
766824
def _get_last_updated_metadata(self, project: str):
767825
with self.engine.connect() as conn:
768826
stmt = select(feast_metadata).where(
769-
feast_metadata.c.metadata_key == "last_updated_timestamp",
827+
feast_metadata.c.metadata_key
828+
== FeastMetadataKeys.LAST_UPDATED_TIMESTAMP.value,
770829
feast_metadata.c.project_id == project,
771830
)
772831
row = conn.execute(stmt).first()

0 commit comments

Comments
 (0)