Skip to content

Commit 1d53f45

Browse files
authored
Add Azure end-to-end test (#1241)
* azure e2e test Signed-off-by: Jacob Klegar <[email protected]> * move functions + poll for gcr images + build feast-ci on PRs Signed-off-by: Jacob Klegar <[email protected]> * pin kubectl and helm versions Signed-off-by: Jacob Klegar <[email protected]>
1 parent d65fb6c commit 1d53f45

File tree

4 files changed

+177
-2
lines changed

4 files changed

+177
-2
lines changed

.github/workflows/complete.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ jobs:
77
runs-on: [self-hosted]
88
strategy:
99
matrix:
10-
component: [core, serving, jobservice, jupyter]
10+
component: [core, serving, jobservice, jupyter, ci]
1111
env:
1212
GITHUB_PR_SHA: ${{ github.event.pull_request.head.sha }}
1313
REGISTRY: gcr.io/kf-feast

infra/docker/ci/Dockerfile

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,4 +71,18 @@ RUN PROTOC_ZIP=protoc-${PROTOC_VERSION}-linux-x86_64.zip && \
7171
go get gopkg.in/russross/blackfriday.v2 && \
7272
git clone https://github.com/istio/tools/ && \
7373
cd tools/cmd/protoc-gen-docs && \
74-
go build && mkdir -p $HOME/bin && cp protoc-gen-docs $HOME/bin
74+
go build && mkdir -p $HOME/bin && cp protoc-gen-docs $HOME/bin
75+
76+
# Install AZ CLI
77+
RUN curl -sL https://aka.ms/InstallAzureCLIDeb | bash
78+
79+
# Install kubectl
80+
RUN apt-get install -y kubectl=1.20.0-00
81+
82+
# Install helm
83+
RUN curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && \
84+
chmod 700 get_helm.sh && \
85+
./get_helm.sh --version v3.4.2
86+
87+
# Install jq
88+
RUN apt-get install -y jq

infra/scripts/azure-runner.sh

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,58 @@
11
#!/bin/bash
2+
3+
set -euo pipefail
4+
5+
STEP_BREADCRUMB='~~~~~~~~'
6+
SECONDS=0
7+
TIMEFORMAT="${STEP_BREADCRUMB} took %R seconds"
8+
9+
# Note requires running in root feast directory
10+
source infra/scripts/runner-helper.sh
11+
12+
GIT_TAG=$(git rev-parse HEAD)
13+
GIT_REMOTE_URL=$(git config --get remote.origin.url)
14+
15+
echo "########## Starting e2e tests for ${GIT_REMOTE_URL} ${GIT_TAG} ###########"
16+
17+
# Workaround for COPY command in core docker image that pulls local maven repo into the image
18+
# itself.
19+
mkdir .m2 2>/dev/null || true
20+
21+
# Log into k8s.
22+
echo "${STEP_BREADCRUMB} Updating kubeconfig"
23+
az login --service-principal -u "$AZ_SERVICE_PRINCIPAL_ID" -p "$AZ_SERVICE_PRINCIPAL_PASS" --tenant "$AZ_SERVICE_PRINCIPAL_TENANT_ID"
24+
az aks get-credentials --resource-group "$RESOURCE_GROUP" --name "$AKS_CLUSTER_NAME"
25+
26+
# Sanity check that kubectl is working.
27+
echo "${STEP_BREADCRUMB} k8s sanity check"
28+
kubectl get pods
29+
30+
# e2e test - runs in sparkop namespace for consistency with AWS sparkop test.
31+
NAMESPACE=sparkop
32+
RELEASE=sparkop
33+
34+
# Delete old helm release and PVCs
35+
k8s_cleanup "$RELEASE" "$NAMESPACE"
36+
37+
# Helm install everything in a namespace
38+
helm_install "$RELEASE" "${DOCKER_REPOSITORY}" "${GIT_TAG}" --namespace "$NAMESPACE"
39+
40+
# Delete old test running pod if it exists
41+
kubectl delete pod -n "$NAMESPACE" ci-test-runner 2>/dev/null || true
42+
43+
# Delete all sparkapplication resources that may be left over from the previous test runs.
44+
kubectl delete sparkapplication --all -n "$NAMESPACE" || true
45+
46+
# Make sure the test pod has permissions to create sparkapplication resources
47+
setup_sparkop_role
48+
49+
# Run the test suite as a one-off pod.
50+
echo "${STEP_BREADCRUMB} Running the test suite"
51+
time kubectl run --rm -n "$NAMESPACE" -i ci-test-runner \
52+
--restart=Never \
53+
--image="${DOCKER_REPOSITORY}/feast-ci:${GIT_TAG}" \
54+
--env="STAGING_PATH=$STAGING_PATH" \
55+
-- \
56+
bash -c "mkdir src && cd src && git clone $GIT_REMOTE_URL && cd feast && git config remote.origin.fetch '+refs/pull/*:refs/remotes/origin/pull/*' && git fetch -q && git checkout $GIT_TAG && ./infra/scripts/setup-e2e-env-sparkop.sh && ./infra/scripts/test-end-to-end-sparkop.sh"
57+
58+
echo "########## e2e tests took $SECONDS seconds ###########"

infra/scripts/runner-helper.sh

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#!/bin/bash
2+
3+
function k8s_cleanup {
4+
local RELEASE=$1
5+
local NAMESPACE=$2
6+
7+
# Create namespace if it doesn't exist.
8+
kubectl create namespace "$NAMESPACE" || true
9+
10+
# Uninstall previous feast release if there is any.
11+
helm uninstall "$RELEASE" -n "$NAMESPACE" || true
12+
13+
# `helm uninstall` doesn't remove PVCs, delete them manually.
14+
time kubectl delete pvc --all -n "$NAMESPACE" || true
15+
16+
kubectl get service -n "$NAMESPACE"
17+
18+
# Set a new postgres password. Note that the postgres instance is not available outside
19+
# the k8s cluster anyway so it doesn't have to be super secure.
20+
echo "${STEP_BREADCRUMB} Setting PG password"
21+
PG_PASSWORD=$(head -c 59 /dev/urandom | md5sum | head -c 16)
22+
kubectl delete secret feast-postgresql -n "$NAMESPACE" || true
23+
kubectl create secret generic feast-postgresql --from-literal=postgresql-password="$PG_PASSWORD" -n "$NAMESPACE"
24+
}
25+
26+
function helm_install {
27+
# helm install Feast into k8s cluster and display a nice error if it fails.
28+
# Usage: helm_install $RELEASE $DOCKER_REPOSITORY $GIT_TAG ...
29+
# Args:
30+
# $RELEASE is helm release name
31+
# $DOCKER_REPOSITORY is the docker repo containing feast images tagged with $GIT_TAG
32+
# ... you can pass additional args to this function that are passed on to helm install
33+
34+
local RELEASE=$1
35+
local DOCKER_REPOSITORY=$2
36+
local GIT_TAG=$3
37+
38+
shift 3
39+
40+
# Wait for images to be available in the docker repository; ci is the last image built
41+
timeout 15m bash -c 'while ! gcloud container images list-tags ${DOCKER_REPOSITORY}/feast-ci --format=json | jq -e ".[] | select(.tags[] | contains (\"$GIT_TAG\"))" > /dev/null; do sleep 10s; done'
42+
43+
# We skip statsd exporter and other metrics stuff since we're not using it anyway, and it
44+
# has some issues with unbound PVCs (that cause kubectl delete pvc to hang).
45+
echo "${STEP_BREADCRUMB} Helm installing feast"
46+
47+
if ! time helm install --wait "$RELEASE" infra/charts/feast \
48+
--timeout 15m \
49+
--set "feast-jupyter.image.repository=${DOCKER_REPOSITORY}/feast-jupyter" \
50+
--set "feast-jupyter.image.tag=${GIT_TAG}" \
51+
--set "feast-online-serving.image.repository=${DOCKER_REPOSITORY}/feast-serving" \
52+
--set "feast-online-serving.image.tag=${GIT_TAG}" \
53+
--set "feast-jobservice.image.repository=${DOCKER_REPOSITORY}/feast-jobservice" \
54+
--set "feast-jobservice.image.tag=${GIT_TAG}" \
55+
--set "feast-core.image.repository=${DOCKER_REPOSITORY}/feast-core" \
56+
--set "feast-core.image.tag=${GIT_TAG}" \
57+
--set "prometheus-statsd-exporter.enabled=false" \
58+
--set "prometheus.enabled=false" \
59+
--set "grafana.enabled=false" \
60+
--set "feast-jobservice.enabled=false" \
61+
"$@" ; then
62+
63+
echo "Error during helm install. "
64+
kubectl -n "$NAMESPACE" get pods
65+
66+
readarray -t CRASHED_PODS < <(kubectl -n "$NAMESPACE" get pods --no-headers=true | grep "$RELEASE" | awk '{if ($2 == "0/1") { print $1 } }')
67+
echo "Crashed pods: ${CRASHED_PODS[*]}"
68+
69+
for POD in "${CRASHED_PODS[@]}"; do
70+
echo "Logs from pod error $POD:"
71+
kubectl -n "$NAMESPACE" logs "$POD" --previous
72+
done
73+
74+
exit 1
75+
fi
76+
}
77+
78+
function setup_sparkop_role {
79+
# Set up permissions for the default user in sparkop namespace so that Feast SDK can manage
80+
# sparkapplication resources from the test runner pod.
81+
82+
cat <<EOF | kubectl apply -f -
83+
kind: Role
84+
apiVersion: rbac.authorization.k8s.io/v1beta1
85+
metadata:
86+
name: use-spark-operator
87+
rules:
88+
- apiGroups: ["sparkoperator.k8s.io"]
89+
resources: ["sparkapplications"]
90+
verbs: ["create", "delete", "deletecollection", "get", "list", "update", "watch", "patch"]
91+
---
92+
apiVersion: rbac.authorization.k8s.io/v1beta1
93+
kind: RoleBinding
94+
metadata:
95+
name: use-spark-operator
96+
roleRef:
97+
kind: Role
98+
name: use-spark-operator
99+
apiGroup: rbac.authorization.k8s.io
100+
subjects:
101+
- kind: ServiceAccount
102+
name: default
103+
EOF
104+
}

0 commit comments

Comments
 (0)