-
Notifications
You must be signed in to change notification settings - Fork 3.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(ci): add spark smoke test (#4158)
- Loading branch information
1 parent
78cb194
commit ede31c4
Showing
50 changed files
with
2,761 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
name: spark smoke test | ||
on: | ||
# Allows you to run this workflow manually from the Actions tab | ||
workflow_dispatch: | ||
push: | ||
branches: | ||
- master | ||
paths: | ||
- "metadata_models/**" | ||
- "metadata-integration/java/datahub-client/**" | ||
- "metadata-integration/java/spark-lineage" | ||
pull_request: | ||
branches: | ||
- master | ||
paths: | ||
- "metadata_models/**" | ||
- "metadata-integration/java/datahub-client/**" | ||
- "metadata-integration/java/spark-lineage" | ||
release: | ||
types: [published, edited] | ||
|
||
jobs: | ||
spark-smoke-test: | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@v2 | ||
- name: Set up JDK 1.8 | ||
uses: actions/setup-java@v1 | ||
with: | ||
java-version: 1.8 | ||
- uses: actions/setup-python@v2 | ||
with: | ||
python-version: "3.6" | ||
- name: Install dependencies | ||
run: ./metadata-ingestion/scripts/install_deps.sh | ||
- name: Gradle build | ||
run: ./gradlew build -x check -x docs-website:build -x test | ||
- name: Smoke test | ||
run: ./gradlew metadata-integration:java:spark-lineage:integrationTest | ||
- uses: actions/upload-artifact@v2 | ||
if: always() | ||
with: | ||
name: Test Results (smoke tests) | ||
path: | | ||
**/build/reports/tests/test/** | ||
**/build/test-results/test/** | ||
**/junit.*.xml | ||
- name: Slack failure notification | ||
if: failure() && github.event_name == 'push' | ||
uses: kpritam/slack-job-status-action@v1 | ||
with: | ||
job-status: ${{ job.status }} | ||
slack-bot-token: ${{ secrets.SLACK_BOT_TOKEN }} | ||
channel: github-activities | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3 changes: 3 additions & 0 deletions
3
metadata-integration/java/spark-lineage/spark-smoke-test/docker/.dockerignore
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
**/metastore_db/ | ||
**/derby.log | ||
spark-warehouse |
30 changes: 30 additions & 0 deletions
30
metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkBase.Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
FROM rappdw/docker-java-python:openjdk1.8.0_171-python3.6.6 | ||
|
||
ARG shared_workspace=/opt/workspace | ||
|
||
|
||
ENV SHARED_WORKSPACE=${shared_workspace} | ||
|
||
# -- Layer: Apache Spark | ||
|
||
ARG spark_version=2.4.8 | ||
ARG hadoop_version=2.7 | ||
|
||
RUN apt-get update -y && \ | ||
apt-get install -y curl && \ | ||
curl https://archive.apache.org/dist/spark/spark-${spark_version}/spark-${spark_version}-bin-hadoop${hadoop_version}.tgz -o spark.tgz && \ | ||
tar -xf spark.tgz && \ | ||
mv spark-${spark_version}-bin-hadoop${hadoop_version} /usr/bin/ && \ | ||
mkdir /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version}/logs && \ | ||
rm spark.tgz | ||
|
||
ENV SPARK_HOME /usr/bin/spark-${spark_version}-bin-hadoop${hadoop_version} | ||
ENV SPARK_MASTER_HOST spark-master | ||
ENV SPARK_MASTER_PORT 7077 | ||
ENV PYSPARK_PYTHON python2.7 | ||
ENV PATH=$PATH:$SPARK_HOME/bin | ||
|
||
COPY workspace $SHARED_WORKSPACE | ||
|
||
WORKDIR ${SPARK_HOME} | ||
|
8 changes: 8 additions & 0 deletions
8
metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkMaster.Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM spark-base | ||
|
||
# -- Runtime | ||
|
||
ARG spark_master_web_ui=8080 | ||
|
||
EXPOSE ${spark_master_web_ui} ${SPARK_MASTER_PORT} | ||
ENTRYPOINT bin/spark-class org.apache.spark.deploy.master.Master >> logs/spark-master.out |
8 changes: 8 additions & 0 deletions
8
metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkSlave.Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
FROM spark-base | ||
|
||
# -- Runtime | ||
|
||
ARG spark_worker_web_ui=8081 | ||
|
||
EXPOSE ${spark_worker_web_ui} | ||
ENTRYPOINT bin/spark-class org.apache.spark.deploy.worker.Worker spark://${SPARK_MASTER_HOST}:${SPARK_MASTER_PORT} >> logs/spark-worker.out |
13 changes: 13 additions & 0 deletions
13
metadata-integration/java/spark-lineage/spark-smoke-test/docker/SparkSubmit.Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
FROM spark-base | ||
|
||
# -- Runtime | ||
|
||
WORKDIR ${SHARED_WORKSPACE} | ||
|
||
ENTRYPOINT sleep 30 && \ | ||
cd python-spark-lineage-test && \ | ||
./python_test_run.sh $SPARK_HOME ../spark-docker.conf && \ | ||
cd ../java-spark-lineage-test && ./java_test_run.sh $SPARK_HOME ../spark-docker.conf | ||
|
||
|
||
|
26 changes: 26 additions & 0 deletions
26
metadata-integration/java/spark-lineage/spark-smoke-test/docker/build_images.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
|
||
#Remove old configuration | ||
rm -rf workspace | ||
|
||
#Copy needed files | ||
mkdir workspace | ||
|
||
ls ../../ | ||
|
||
cp ../../build/libs/datahub-spark-lineage* workspace/ | ||
cp ../spark-docker.conf workspace/ | ||
cp -a ../python-spark-lineage-test workspace/ | ||
mkdir workspace/java-spark-lineage-test | ||
cp ../test-spark-lineage/java_test_run.sh workspace/java-spark-lineage-test/ | ||
|
||
mkdir -p workspace/java-spark-lineage-test/build/libs/ | ||
cp ../test-spark-lineage/build/libs/test-spark-lineage.jar workspace/java-spark-lineage-test/build/libs/ | ||
|
||
cp -a ../resources workspace | ||
|
||
# create docker images | ||
docker build -f SparkBase.Dockerfile -t spark-base . | ||
docker build -f SparkMaster.Dockerfile -t spark-master . | ||
docker build -f SparkSlave.Dockerfile -t spark-slave . | ||
docker build -f SparkSubmit.Dockerfile -t spark-submit . | ||
|
29 changes: 29 additions & 0 deletions
29
metadata-integration/java/spark-lineage/spark-smoke-test/docker/spark-docker-compose.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
version: "3.6" | ||
|
||
services: | ||
spark-master: | ||
image: spark-master | ||
container_name: spark-master | ||
ports: | ||
- 8090:8080 | ||
- 7077:7077 | ||
spark-worker-1: | ||
image: spark-slave | ||
container_name: spark-worker-1 | ||
ports: | ||
- 8091:8081 | ||
depends_on: | ||
- spark-master | ||
spark-worker-2: | ||
image: spark-slave | ||
container_name: spark-worker-2 | ||
ports: | ||
- 8092:8081 | ||
depends_on: | ||
- spark-master | ||
|
||
networks: | ||
default: | ||
external: true | ||
name: datahub_network | ||
|
91 changes: 91 additions & 0 deletions
91
...data-integration/java/spark-lineage/spark-smoke-test/golden_json/JavaHdfsIn2HdfsOut1.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,91 @@ | ||
{ | ||
"urn:li:dataFlow:(spark,JavaHdfsIn2HdfsOut1,spark_spark-master_7077)": { | ||
"value": { | ||
"com.linkedin.metadata.snapshot.DataFlowSnapshot": { | ||
"urn": "urn:li:dataFlow:(spark,JavaHdfsIn2HdfsOut1,spark_spark-master_7077)", | ||
"aspects": [ | ||
{ | ||
"com.linkedin.metadata.key.DataFlowKey": { | ||
"orchestrator": "spark", | ||
"cluster": "spark_spark-master_7077", | ||
"flowId": "JavaHdfsIn2HdfsOut1" | ||
} | ||
}, | ||
{ | ||
"com.linkedin.datajob.DataFlowInfo": { | ||
"name": "JavaHdfsIn2HdfsOut1", | ||
"customProperties": { | ||
"sparkUser": "root", | ||
"appName": "JavaHdfsIn2HdfsOut1" | ||
} | ||
} | ||
}, | ||
{ | ||
"com.linkedin.common.DataPlatformInstance": { | ||
"platform": "urn:li:dataPlatform:spark" | ||
} | ||
}, | ||
{ | ||
"com.linkedin.common.BrowsePaths": { | ||
"paths": [ | ||
"/spark/spark_spark-master_7077/javahdfsin2hdfsout1" | ||
] | ||
} | ||
} | ||
] | ||
} | ||
} | ||
}, | ||
"urn:li:dataJob:(urn:li:dataFlow:(spark,JavaHdfsIn2HdfsOut1,spark_spark-master_7077),QueryExecId_4)": { | ||
"value": { | ||
"com.linkedin.metadata.snapshot.DataJobSnapshot": { | ||
"urn": "urn:li:dataJob:(urn:li:dataFlow:(spark,JavaHdfsIn2HdfsOut1,spark_spark-master_7077),QueryExecId_4)", | ||
"aspects": [ | ||
{ | ||
"com.linkedin.metadata.key.DataJobKey": { | ||
"jobId": "QueryExecId_4", | ||
"flow": "urn:li:dataFlow:(spark,JavaHdfsIn2HdfsOut1,spark_spark-master_7077)" | ||
} | ||
}, | ||
{ | ||
"com.linkedin.common.DataPlatformInstance": { | ||
"platform": "urn:li:dataPlatform:spark" | ||
} | ||
}, | ||
{ | ||
"com.linkedin.datajob.DataJobInfo": { | ||
"name": "csv at HdfsIn2HdfsOut1.java:27", | ||
"type": { | ||
"string": "sparkJob" | ||
}, | ||
"customProperties": { | ||
"SQLQueryId": "4", | ||
"appName": "JavaHdfsIn2HdfsOut1", | ||
"description": "csv at HdfsIn2HdfsOut1.java:27", | ||
"queryPlan": "InsertIntoHadoopFsRelationCommand file:/opt/workspace/resources/data/JavaHdfsIn2HdfsOut1/out.csv, false, CSV, Map(path -> ../resources/data/JavaHdfsIn2HdfsOut1/out.csv), Overwrite, [a, b, c, d]\n+- Project [c1#11 AS a#32, c2#12 AS b#33, c1#27 AS c#34, c2#28 AS d#35]\n +- Join Inner, (id#10 = id#26)\n :- Filter isnotnull(id#10)\n : +- Relation[id#10,c1#11,c2#12] csv\n +- Filter isnotnull(id#26)\n +- Relation[id#26,c1#27,c2#28] csv\n" | ||
} | ||
} | ||
}, | ||
{ | ||
"com.linkedin.common.BrowsePaths": { | ||
"paths": [ | ||
"/spark/javahdfsin2hdfsout1/queryexecid_4" | ||
] | ||
} | ||
}, | ||
{ | ||
"com.linkedin.datajob.DataJobInputOutput": { | ||
"inputDatasets": [ | ||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in1.csv,PROD)", | ||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/in2.csv,PROD)" | ||
], | ||
"outputDatasets": [ | ||
"urn:li:dataset:(urn:li:dataPlatform:hdfs,file:/opt/workspace/resources/data/JavaHdfsIn2HdfsOut1/out.csv,PROD)" | ||
] | ||
} | ||
} | ||
] | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.