Skip to content

Commit fdb4dea

Browse files
authored
Optional IngestionJob parameters passed by Spark Launcher (#1130)
1 parent 620e3e4 commit fdb4dea

File tree

22 files changed

+300
-159
lines changed

22 files changed

+300
-159
lines changed

.github/workflows/complete.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
runs-on: [self-hosted]
88
strategy:
99
matrix:
10-
component: [core, serving, jobcontroller, jupyter]
10+
component: [core, serving, jobservice, jupyter]
1111
env:
1212
GITHUB_PR_SHA: ${{ github.event.pull_request.head.sha }}
1313
REGISTRY: gcr.io/kf-feast

Makefile

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,17 +121,13 @@ build-push-docker:
121121
@$(MAKE) push-core-docker registry=$(REGISTRY) version=$(VERSION)
122122
@$(MAKE) push-serving-docker registry=$(REGISTRY) version=$(VERSION)
123123
@$(MAKE) push-ci-docker registry=$(REGISTRY) version=$(VERSION)
124-
@$(MAKE) push-jobcontroller-docker registry=$(REGISTRY) version=$(VERSION)
125124
@$(MAKE) push-jobservice-docker registry=$(REGISTRY) version=$(VERSION)
126125

127-
build-docker: build-core-docker build-serving-docker build-ci-docker build-jobcontroller-docker build-jobservice-docker
126+
build-docker: build-core-docker build-serving-docker build-ci-docker build-jobservice-docker
128127

129128
push-core-docker:
130129
docker push $(REGISTRY)/feast-core:$(VERSION)
131130

132-
push-jobcontroller-docker:
133-
docker push $(REGISTRY)/feast-jobcontroller:$(VERSION)
134-
135131
push-jobservice-docker:
136132
docker push $(REGISTRY)/feast-jobservice:$(VERSION)
137133

@@ -150,9 +146,6 @@ build-core-docker:
150146
build-jobservice-docker:
151147
docker build -t $(REGISTRY)/feast-jobservice:$(VERSION) -f infra/docker/jobservice/Dockerfile .
152148

153-
build-jobcontroller-docker:
154-
docker build -t $(REGISTRY)/feast-jobcontroller:$(VERSION) -f infra/docker/jobcontroller/Dockerfile .
155-
156149
build-serving-docker:
157150
docker build -t $(REGISTRY)/feast-serving:$(VERSION) -f infra/docker/serving/Dockerfile .
158151

docs/coverage/java/pom.xml

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -59,12 +59,6 @@
5959
<version>${project.version}</version>
6060
</dependency>
6161

62-
<dependency>
63-
<groupId>dev.feast</groupId>
64-
<artifactId>feast-ingestion</artifactId>
65-
<version>${project.version}</version>
66-
</dependency>
67-
6862
<dependency>
6963
<groupId>dev.feast</groupId>
7064
<artifactId>feast-core</artifactId>

infra/charts/feast/charts/feast-jobservice/templates/deployment.yaml

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -37,40 +37,32 @@ spec:
3737
{{- toYaml . | nindent 8 }}
3838
{{- end }}
3939

40-
{{- if .Values.gcpServiceAccount.enabled }}
40+
{{- if Values.secrets }}
4141
volumes:
42-
- name: {{ template "feast-jobservice.fullname" . }}-gcp-service-account
42+
{{ - range $secret := .Values.secrets }}
43+
- name: {{ $secret }}
4344
secret:
44-
secretName: {{ .Values.gcpServiceAccount.existingSecret.name }}
45+
secretName: {{ $secret }}
46+
{{ - end }}
4547
{{- end }}
4648

4749
containers:
4850
- name: {{ .Chart.Name }}
4951
image: {{ .Values.image.repository }}:{{ .Values.image.tag }}
5052
imagePullPolicy: {{ .Values.image.pullPolicy }}
5153

52-
{{- if .Values.gcpServiceAccount.enabled }}
54+
{{- if .Values.secrets }}
5355
volumeMounts:
54-
- name: {{ template "feast-jobservice.fullname" . }}-gcp-service-account
55-
mountPath: /etc/secrets/google
56+
{{ - range $secret := .Values.secrets }}
57+
- name: {{ $secret }}
58+
mountPath: "/etc/secrets/{{ $secret }}"
5659
readOnly: true
60+
{{ - end }}
5761
{{- end }}
5862

5963
env:
6064
- name: FEAST_CORE_URL
6165
value: "{{ .Release.Name }}-feast-core:6565"
62-
- name: FEAST_HISTORICAL_SERVING_URL
63-
value: "{{ .Release.Name }}-feast-batch-serving:6566"
64-
65-
{{- if .Values.gcpServiceAccount.enabled }}
66-
- name: GOOGLE_APPLICATION_CREDENTIALS
67-
value: /etc/secrets/google/{{ .Values.gcpServiceAccount.existingSecret.key }}
68-
{{- end }}
69-
70-
{{- if .Values.gcpProjectId }}
71-
- name: GOOGLE_CLOUD_PROJECT
72-
value: {{ .Values.gcpProjectId | quote }}
73-
{{- end }}
7466

7567
{{- range $key, $value := .Values.envOverrides }}
7668
- name: {{ printf "%s" $key | replace "." "_" | upper | quote }}

infra/docker-compose/docker-compose.yml

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,30 @@ services:
1818
- /opt/feast/feast-core.jar
1919
- --spring.config.location=classpath:/application.yml,file:/etc/feast/application.yml
2020

21+
jobservice:
22+
image: gcr.io/kf-feast/feast-jobservice:${FEAST_VERSION}
23+
depends_on:
24+
- core
25+
ports:
26+
- 6568:6568
27+
volumes:
28+
- $PWD:/shared
29+
environment:
30+
FEAST_CORE_URL: core:6565
31+
FEAST_SERVING_URL: online_serving:6566
32+
FEAST_SPARK_LAUNCHER: standalone
33+
FEAST_SPARK_STANDALONE_MASTER: local
34+
FEAST_SPARK_HOME: /usr/local/spark
35+
FEAST_SPARK_STAGING_LOCATION: file:///shared/staging
36+
FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION: file:///shared/historical_feature_output
37+
FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT: parquet
38+
FEAST_REDIS_HOST: redis
39+
2140
jupyter:
2241
image: gcr.io/kf-feast/feast-jupyter:${FEAST_VERSION}
2342
volumes:
2443
- ${GCP_SERVICE_ACCOUNT}:/etc/gcloud/service-accounts/key.json
44+
- $PWD:/shared
2545
depends_on:
2646
- core
2747
environment:
@@ -30,8 +50,8 @@ services:
3050
FEAST_SPARK_LAUNCHER: standalone
3151
FEAST_SPARK_STANDALONE_MASTER: local
3252
FEAST_SPARK_HOME: /usr/local/spark
33-
FEAST_SPARK_STAGING_LOCATION: file:///tmp/staging
34-
FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION: file:///tmp/historical_feature_output
53+
FEAST_SPARK_STAGING_LOCATION: file:///shared/staging
54+
FEAST_HISTORICAL_FEATURE_OUTPUT_LOCATION: file:///shared/historical_feature_output
3555
FEAST_HISTORICAL_FEATURE_OUTPUT_FORMAT: parquet
3656
FEAST_REDIS_HOST: redis
3757
GOOGLE_APPLICATION_CREDENTIALS: /etc/gcloud/service-accounts/key.json

infra/docker/jobservice/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ RUN make compile-protos-python
1616
# Install Feast SDK
1717
COPY .git .git
1818
COPY README.md README.md
19-
RUN pip install -e sdk/python -U
19+
RUN pip install -U -e sdk/python
2020
RUN pip install "s3fs" "boto3" "urllib3>=1.25.4"
2121

2222
#
@@ -27,4 +27,4 @@ RUN wget -q https://github.com/grpc-ecosystem/grpc-health-probe/releases/downloa
2727
-O /usr/bin/grpc-health-probe && \
2828
chmod +x /usr/bin/grpc-health-probe
2929

30-
CMD ["feast", "server"]
30+
CMD ["python", "-m", "feast.cli", "server"]

infra/scripts/test-docker-compose.sh

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@ export JUPYTER_DOCKER_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .Network
3939

4040
# Print Jupyter container information
4141
docker inspect feast_jupyter_1
42-
docker logs feast_jupyter_1
4342

4443
# Wait for Jupyter Notebook Container to come online
4544
${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${JUPYTER_DOCKER_CONTAINER_IP_ADDRESS}:8888 --timeout=60
@@ -56,10 +55,18 @@ export FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .N
5655
# Wait for Feast Online Serving to be ready
5756
${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${FEAST_ONLINE_SERVING_CONTAINER_IP_ADDRESS}:6566 --timeout=120
5857

58+
59+
# Get Feast Job Service container IP address
60+
export FEAST_JOB_SERVICE_CONTAINER_IP_ADDRESS=$(docker inspect -f '{{range .NetworkSettings.Networks}}{{.IPAddress}}{{end}}' feast_jobservice_1)
61+
62+
# Wait for Feast Job Service to be ready
63+
${PROJECT_ROOT_DIR}/infra/scripts/wait-for-it.sh ${FEAST_JOB_SERVICE_CONTAINER_IP_ADDRESS}:6568 --timeout=120
64+
5965
# Run e2e tests for Redis
6066
docker exec \
6167
-e FEAST_VERSION=${FEAST_VERSION} \
6268
-e DISABLE_SERVICE_FIXTURES=true \
6369
-e DISABLE_FEAST_SERVICE_FIXTURES=true \
70+
--user root \
6471
feast_jupyter_1 bash \
65-
-c 'cd /feast/tests && python -m pip install -r requirements.txt && pytest e2e/ -m "not bq" --ingestion-jar gs://feast-jobs/spark/ingestion/feast-ingestion-spark-${FEAST_VERSION}.jar --redis-url redis:6379 --core-url core:6565 --serving-url online_serving:6566 --kafka-brokers kafka:9092'
72+
-c 'cd /feast/tests && python -m pip install -r requirements.txt && pytest e2e/ --ingestion-jar https://storage.googleapis.com/feast-jobs/spark/ingestion/feast-ingestion-spark-${FEAST_VERSION}.jar --redis-url redis:6379 --core-url core:6565 --serving-url online_serving:6566 --job-service-url jobservice:6568 --staging-path file:///shared/staging/ --kafka-brokers kafka:9092'

infra/scripts/test-end-to-end-gcp.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,5 +13,7 @@ python -m pip install -qr tests/requirements.txt
1313
su -p postgres -c "PATH=$PATH HOME=/tmp pytest -v tests/e2e/ \
1414
--feast-version develop --env=gcloud --dataproc-cluster-name feast-e2e \
1515
--dataproc-project kf-feast --dataproc-region us-central1 \
16+
--staging-path gs://feast-templocation-kf-feast/ \
17+
--with-job-service \
1618
--redis-url 10.128.0.105:6379 --redis-cluster --kafka-brokers 10.128.0.103:9094 \
1719
--bq-project kf-feast"

pom.xml

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,11 @@
3131
<module>datatypes/java</module>
3232
<module>storage/api</module>
3333
<module>storage/connectors</module>
34-
<module>ingestion</module>
3534
<module>core</module>
3635
<module>serving</module>
3736
<module>sdk/java</module>
3837
<module>docs/coverage/java</module>
3938
<module>common</module>
40-
<module>job-controller</module>
4139
<module>common-test</module>
4240
<module>spark/ingestion</module>
4341
</modules>

sdk/python/feast/client.py

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -387,7 +387,7 @@ def version(self):
387387
return result
388388

389389
@property
390-
def project(self) -> Union[str, None]:
390+
def project(self) -> str:
391391
"""
392392
Retrieve currently active project
393393
@@ -949,6 +949,7 @@ def get_historical_features(
949949
>>> output_file_uri = feature_retrieval_job.get_output_file_uri()
950950
"gs://some-bucket/output/
951951
"""
952+
project = project or FEAST_DEFAULT_OPTIONS[CONFIG_PROJECT_KEY]
952953
feature_tables = self._get_feature_tables_from_feature_refs(
953954
feature_refs, project
954955
)
@@ -1001,7 +1002,12 @@ def get_historical_features(
10011002
)
10021003
else:
10031004
return start_historical_feature_retrieval_job(
1004-
self, entity_source, feature_tables, output_format, output_location,
1005+
client=self,
1006+
project=self.project,
1007+
entity_source=entity_source,
1008+
feature_tables=feature_tables,
1009+
output_format=output_format,
1010+
output_path=output_location,
10051011
)
10061012

10071013
def get_historical_features_df(
@@ -1043,7 +1049,10 @@ def get_historical_features_df(
10431049
feature_refs, project
10441050
)
10451051
return start_historical_feature_retrieval_spark_session(
1046-
self, entity_source, feature_tables
1052+
client=self,
1053+
project=self.project,
1054+
entity_source=entity_source,
1055+
feature_tables=feature_tables,
10471056
)
10481057

10491058
def _get_feature_tables_from_feature_refs(
@@ -1079,7 +1088,13 @@ def start_offline_to_online_ingestion(
10791088
:return: Spark Job Proxy object
10801089
"""
10811090
if not self._use_job_service:
1082-
return start_offline_to_online_ingestion(feature_table, start, end, self)
1091+
return start_offline_to_online_ingestion(
1092+
client=self,
1093+
project=self.project,
1094+
feature_table=feature_table,
1095+
start=start,
1096+
end=end,
1097+
)
10831098
else:
10841099
request = StartOfflineToOnlineIngestionJobRequest(
10851100
project=self.project, table_name=feature_table.name,
@@ -1096,7 +1111,10 @@ def start_stream_to_online_ingestion(
10961111
) -> SparkJob:
10971112
if not self._use_job_service:
10981113
return start_stream_to_online_ingestion(
1099-
feature_table, extra_jars or [], self
1114+
client=self,
1115+
project=self.project,
1116+
feature_table=feature_table,
1117+
extra_jars=extra_jars or [],
11001118
)
11011119
else:
11021120
request = StartStreamToOnlineIngestionJobRequest(

0 commit comments

Comments
 (0)