Skip to content

Commit bc825a1

Browse files
committed
update init
Signed-off-by: HaoXuAI <[email protected]>
1 parent 778d19e commit bc825a1

File tree

2 files changed

+69
-22
lines changed

2 files changed

+69
-22
lines changed

sdk/python/feast/dataframe.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,20 @@ def __init__(
4343
"""
4444
self.data = data
4545
self.metadata = metadata or {}
46-
self._engine = engine or self._detect_engine()
46+
47+
# Detect the actual engine from the data
48+
detected_engine = self._detect_engine()
49+
50+
if engine is not None:
51+
# Validate that the provided engine matches the detected engine
52+
if engine != detected_engine:
53+
raise ValueError(
54+
f"Provided engine '{engine}' does not match detected engine '{detected_engine}' "
55+
f"for data type {type(data).__name__}"
56+
)
57+
self._engine = engine
58+
else:
59+
self._engine = detected_engine
4760

4861
def _detect_engine(self) -> DataFrameEngine:
4962
"""Auto-detect the DataFrame engine based on type."""

sdk/python/tests/unit/test_dataframe.py

Lines changed: 55 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ def test_arrow_detection(self):
2929
assert isinstance(feast_df.data, pa.Table)
3030

3131
def test_explicit_engine(self):
32-
"""Test explicit engine specification."""
32+
"""Test explicit engine specification with unknown data."""
3333
data = {"mock": "data"}
34-
feast_df = FeastDataFrame(data, engine=DataFrameEngine.SPARK)
34+
feast_df = FeastDataFrame(data, engine=DataFrameEngine.UNKNOWN)
3535

36-
assert feast_df.engine == DataFrameEngine.SPARK
37-
assert feast_df.is_lazy
36+
assert feast_df.engine == DataFrameEngine.UNKNOWN
37+
assert not feast_df.is_lazy
3838

3939
def test_unknown_engine(self):
4040
"""Test handling of unknown DataFrame types."""
@@ -62,34 +62,68 @@ def test_repr(self):
6262
assert "engine=pandas" in repr_str
6363
assert "DataFrame" in repr_str
6464

65-
@pytest.mark.parametrize(
66-
"engine,expected_lazy",
67-
[
68-
(DataFrameEngine.PANDAS, False),
69-
(DataFrameEngine.ARROW, False),
70-
(DataFrameEngine.POLARS, False),
71-
(DataFrameEngine.SPARK, True),
72-
(DataFrameEngine.DASK, True),
73-
(DataFrameEngine.RAY, True),
74-
(DataFrameEngine.UNKNOWN, False),
75-
],
76-
)
77-
def test_is_lazy_property(self, engine, expected_lazy):
65+
def test_is_lazy_property(self):
7866
"""Test is_lazy property for different engines."""
79-
feast_df = FeastDataFrame({"mock": "data"}, engine=engine)
80-
assert feast_df.is_lazy == expected_lazy
67+
# Test with pandas DataFrame (not lazy)
68+
df = pd.DataFrame({"a": [1, 2, 3]})
69+
feast_df = FeastDataFrame(df)
70+
assert not feast_df.is_lazy
71+
72+
# Test with Arrow table (not lazy)
73+
table = pa.table({"a": [1, 2, 3]})
74+
feast_df = FeastDataFrame(table)
75+
assert not feast_df.is_lazy
76+
77+
# Test with unknown data type (not lazy)
78+
unknown_data = {"mock": "data"}
79+
feast_df = FeastDataFrame(unknown_data)
80+
assert not feast_df.is_lazy
81+
82+
# Test explicit lazy engines (using unknown data to avoid type validation)
83+
for lazy_engine in [DataFrameEngine.SPARK, DataFrameEngine.DASK, DataFrameEngine.RAY]:
84+
feast_df = FeastDataFrame(unknown_data, engine=DataFrameEngine.UNKNOWN)
85+
feast_df._engine = lazy_engine # Override for testing
86+
assert feast_df.is_lazy
8187

8288
def test_polars_detection(self):
8389
"""Test detection of polars DataFrame (using mock)."""
8490

8591
# Mock polars DataFrame
8692
class MockPolarsDF:
93+
__module__ = "polars.dataframe.frame"
94+
8795
def __init__(self):
88-
self.__module__ = "polars.dataframe.frame"
89-
self.__class__.__name__ = "DataFrame"
96+
pass
9097

9198
polars_df = MockPolarsDF()
9299
feast_df = FeastDataFrame(polars_df)
93100

94101
assert feast_df.engine == DataFrameEngine.POLARS
95102
assert not feast_df.is_lazy
103+
104+
def test_engine_validation_valid(self):
105+
"""Test that providing a correct engine passes validation."""
106+
df = pd.DataFrame({"a": [1, 2, 3]})
107+
feast_df = FeastDataFrame(df, engine=DataFrameEngine.PANDAS)
108+
109+
assert feast_df.engine == DataFrameEngine.PANDAS
110+
assert isinstance(feast_df.data, pd.DataFrame)
111+
112+
def test_engine_validation_invalid(self):
113+
"""Test that providing an incorrect engine raises ValueError."""
114+
df = pd.DataFrame({"a": [1, 2, 3]})
115+
116+
with pytest.raises(ValueError, match="Provided engine 'spark' does not match detected engine 'pandas'"):
117+
FeastDataFrame(df, engine=DataFrameEngine.SPARK)
118+
119+
def test_engine_validation_arrow(self):
120+
"""Test engine validation with Arrow table."""
121+
table = pa.table({"a": [1, 2, 3]})
122+
123+
# Valid case
124+
feast_df = FeastDataFrame(table, engine=DataFrameEngine.ARROW)
125+
assert feast_df.engine == DataFrameEngine.ARROW
126+
127+
# Invalid case
128+
with pytest.raises(ValueError, match="Provided engine 'pandas' does not match detected engine 'arrow'"):
129+
FeastDataFrame(table, engine=DataFrameEngine.PANDAS)

0 commit comments

Comments
 (0)