Skip to content

Commit

Permalink
Merge pull request datahub-project#9 from dpoe/DPOE-22788-deploy-datahub
Browse files Browse the repository at this point in the history
[DPOE-22788] deploy datahub in AWS
  • Loading branch information
Jeff Xu authored and GitHub Enterprise committed Mar 25, 2024
2 parents a30c15d + 24e4dd8 commit cd35be2
Show file tree
Hide file tree
Showing 2 changed files with 257 additions and 0 deletions.
41 changes: 41 additions & 0 deletions goku/aws/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
.PHONY: check-artifactory-credential artifactory-login \
start-datahub stop-datahub start-datahub-v0-13-0 \


#DOCKER_REGISTRY_URL=docker-dev-artifactory.workday.com
DATAHUB_V_0_13_0 := v0.13.0


start-datahub: start-datahub-v0-13-0
@echo

stop-datahub:
docker-compose -f datahub-rds-opensearch.yml -p datahub down

start-datahub-v0-13-0: artifactory-login
@echo "Launching datahub v0.13.0 ..."
export DATAHUB_VERSION=${DATAHUB_V_0_13_0}; \
export DATAHUB_FRONTEND_IMAGE=${DOCKER_REGISTRY_URL}/linkedin/datahub-frontend-react; \
export DATAHUB_GMS_IMAGE=${DOCKER_REGISTRY_URL}/linkedin/datahub-gms; \
export DATAHUB_UPGRADE_IMAGE=${DOCKER_REGISTRY_URL}/acryldata/datahub-upgrade; \
export DATAHUB_ELASTIC_SETUP_IMAGE=${DOCKER_REGISTRY_URL}/linkedin/datahub-elasticsearch-setup; \
export DATAHUB_KAFKA_SETUP_IMAGE=${DOCKER_REGISTRY_URL}/linkedin/datahub-kafka-setup; \
export DATAHUB_MYSQL_SETUP_IMAGE=${DOCKER_REGISTRY_URL}/acryldata/datahub-mysql-setup; \
export KAFKA_IMAGE=${DOCKER_REGISTRY_URL}/confluentinc/cp-kafka; \
export KAFKA_VERSION=7.4.0; \
export SCHEMA_REGISTRY_IMAGE=${DOCKER_REGISTRY_URL}/confluentinc/cp-schema-registry; \
export SCHEMA_REGISTRY_VERSION=7.5.3; \
export ZOOKEEPER_IMAGE=${DOCKER_REGISTRY_URL}/confluentinc/cp-zookeeper; \
export ZOOKEEPER_VERSION=7.4.0; \
docker-compose -f datahub-rds-opensearch.yml -p datahub up -d
@echo
@echo "Go to http://localhost:9002/ ..."
@echo


check-artifactory-credential:
@test "${AF_USER}" -a "${AF_PASSWORD}" || (echo 'AF_USER and AF_PASSWORD must be defined' && exit 1)

artifactory-login: check-artifactory-credential
@echo "Login to $(DOCKER_REGISTRY_URL) ..."
@echo ${AF_PASSWORD} | docker login ${DOCKER_REGISTRY_URL} --username ${AF_USER} --password-stdin
216 changes: 216 additions & 0 deletions goku/aws/datahub-rds-opensearch.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
networks:
default:
name: datahub_network
services:
broker:
depends_on:
zookeeper:
condition: service_healthy
environment:
- KAFKA_BROKER_ID=1
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- KAFKA_LISTENER_SECURITY_PROTOCOL_MAP=PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT
- KAFKA_ADVERTISED_LISTENERS=PLAINTEXT://broker:29092,PLAINTEXT_HOST://localhost:9092
- KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR=1
- KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS=0
- KAFKA_HEAP_OPTS=-Xms256m -Xmx256m
- KAFKA_CONFLUENT_SUPPORT_METRICS_ENABLE=false
- KAFKA_MESSAGE_MAX_BYTES=5242880
- KAFKA_MAX_MESSAGE_BYTES=5242880
healthcheck:
interval: 1s
retries: 5
start_period: 60s
test: nc -z broker $${DATAHUB_KAFKA_BROKER_PORT:-9092}
timeout: 5s
hostname: broker
image: ${KAFKA_IMAGE:-confluentinc/cp-kafka}:${KAFKA_VERSION:-7.4.0}
ports:
- ${DATAHUB_MAPPED_KAFKA_BROKER_PORT:-9092}:9092
volumes:
- broker:/var/lib/kafka/data/
datahub-frontend-react:
depends_on:
datahub-gms:
condition: service_healthy
environment:
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- DATAHUB_SECRET=${DATAHUB_SECRET}
- DATAHUB_APP_VERSION=1.0
- DATAHUB_PLAY_MEM_BUFFER_SIZE=10MB
- JAVA_OPTS=-Xms1g -Xmx3g -Dhttp.port=9002 -Dconfig.file=datahub-frontend/conf/application.conf -Djava.security.auth.login.config=datahub-frontend/conf/jaas.conf -Dlogback.configurationFile=datahub-frontend/conf/logback.xml -Dlogback.debug=false -Dpidfile.path=/dev/null
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- DATAHUB_TRACKING_TOPIC=DataHubUsageEvent_v1
- ELASTICSEARCH_HOST=${ELASTICSEARCH_HOST}
- ELASTICSEARCH_PORT=443
- ELASTICSEARCH_USE_SSL=true
- ELASTICSEARCH_USERNAME=${ELASTICSEARCH_USERNAME}
- ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD}
hostname: datahub-frontend-react
image: ${DATAHUB_FRONTEND_IMAGE:-linkedin/datahub-frontend-react}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_FRONTEND_PORT:-9002}:9002
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-gms:
depends_on:
datahub-upgrade:
condition: service_completed_successfully
environment:
- DATAHUB_SERVER_TYPE=${DATAHUB_SERVER_TYPE:-quickstart}
- DATAHUB_TELEMETRY_ENABLED=${DATAHUB_TELEMETRY_ENABLED:-false}
- DATAHUB_UPGRADE_HISTORY_KAFKA_CONSUMER_GROUP_ID=generic-duhe-consumer-job-client-gms
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- EBEAN_DATASOURCE_HOST=${MYSQL_HOST}:3306
- EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD}
- EBEAN_DATASOURCE_URL=jdbc:mysql://${MYSQL_HOST}:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_USERNAME=${MYSQL_USERNAME}
- ELASTICSEARCH_HOST=${ELASTICSEARCH_HOST}
- ELASTICSEARCH_PORT=443
- ELASTICSEARCH_USE_SSL=true
- ELASTICSEARCH_USERNAME=${ELASTICSEARCH_USERNAME}
- ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD}
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- ENTITY_SERVICE_ENABLE_RETENTION=true
- ES_BULK_REFRESH_POLICY=WAIT_UNTIL
- GRAPH_SERVICE_DIFF_MODE_ENABLED=true
- GRAPH_SERVICE_IMPL=elasticsearch
- JAVA_OPTS=-Xms2g -Xmx4g
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR=${KAFKA_CONSUMER_STOP_ON_DESERIALIZATION_ERROR:-true}
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- MAE_CONSUMER_ENABLED=true
- MCE_CONSUMER_ENABLED=true
- PE_CONSUMER_ENABLED=true
- UI_INGESTION_ENABLED=true
healthcheck:
interval: 1s
retries: 3
start_period: 90s
test: curl -sS --fail http://datahub-gms:${DATAHUB_GMS_PORT:-8080}/health
timeout: 5s
hostname: datahub-gms
image: ${DATAHUB_GMS_IMAGE:-linkedin/datahub-gms}:${DATAHUB_VERSION:-head}
ports:
- ${DATAHUB_MAPPED_GMS_PORT:-8080}:8080
volumes:
- ${HOME}/.datahub/plugins:/etc/datahub/plugins
datahub-upgrade:
command:
- -u
- SystemUpdate
depends_on:
elasticsearch-setup:
condition: service_completed_successfully
kafka-setup:
condition: service_completed_successfully
mysql-setup:
condition: service_completed_successfully
environment:
- EBEAN_DATASOURCE_USERNAME=${MYSQL_USERNAME}
- EBEAN_DATASOURCE_PASSWORD=${MYSQL_PASSWORD}
- EBEAN_DATASOURCE_HOST=${MYSQL_HOST}:3306
- EBEAN_DATASOURCE_URL=jdbc:mysql://${MYSQL_HOST}:3306/datahub?verifyServerCertificate=false&useSSL=true&useUnicode=yes&characterEncoding=UTF-8
- EBEAN_DATASOURCE_DRIVER=com.mysql.jdbc.Driver
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_SCHEMAREGISTRY_URL=http://schema-registry:8081
- ELASTICSEARCH_HOST=${ELASTICSEARCH_HOST}
- ELASTICSEARCH_PORT=443
- ELASTICSEARCH_USE_SSL=true
- ELASTICSEARCH_USERNAME=${ELASTICSEARCH_USERNAME}
- ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD}
- ELASTICSEARCH_INDEX_BUILDER_MAPPINGS_REINDEX=true
- ELASTICSEARCH_INDEX_BUILDER_SETTINGS_REINDEX=true
- ELASTICSEARCH_BUILD_INDICES_CLONE_INDICES=false
- GRAPH_SERVICE_IMPL=elasticsearch
- DATAHUB_GMS_HOST=datahub-gms
- DATAHUB_GMS_PORT=8080
- ENTITY_REGISTRY_CONFIG_PATH=/datahub/datahub-gms/resources/entity-registry.yml
- BACKFILL_BROWSE_PATHS_V2=true
- REPROCESS_DEFAULT_BROWSE_PATHS_V2=false
hostname: datahub-upgrade
image: ${DATAHUB_UPGRADE_IMAGE:-acryldata/datahub-upgrade}:${DATAHUB_VERSION:-head}
labels:
datahub_setup_job: true
elasticsearch-setup:
environment:
- USE_AWS_ELASTICSEARCH=true
- ELASTICSEARCH_USERNAME=${ELASTICSEARCH_USERNAME}
- ELASTICSEARCH_PASSWORD=${ELASTICSEARCH_PASSWORD}
- ELASTICSEARCH_HOST=${ELASTICSEARCH_HOST}
- ELASTICSEARCH_PORT=443
- ELASTICSEARCH_PROTOCOL=https
- ELASTICSEARCH_USE_SSL=true
hostname: elasticsearch-setup
image: ${DATAHUB_ELASTIC_SETUP_IMAGE:-linkedin/datahub-elasticsearch-setup}:${DATAHUB_VERSION:-head}
labels:
datahub_setup_job: true
kafka-setup:
depends_on:
broker:
condition: service_healthy
schema-registry:
condition: service_healthy
environment:
- DATAHUB_PRECREATE_TOPICS=${DATAHUB_PRECREATE_TOPICS:-false}
- KAFKA_BOOTSTRAP_SERVER=broker:29092
- KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181
- USE_CONFLUENT_SCHEMA_REGISTRY=TRUE
hostname: kafka-setup
image: ${DATAHUB_KAFKA_SETUP_IMAGE:-linkedin/datahub-kafka-setup}:${DATAHUB_VERSION:-head}
labels:
datahub_setup_job: true
mysql-setup:
environment:
- MYSQL_HOST=${MYSQL_HOST}
- MYSQL_PORT=3306
- MYSQL_USERNAME=${MYSQL_USERNAME}
- MYSQL_PASSWORD=${MYSQL_PASSWORD}
- DATAHUB_DB_NAME=datahub
hostname: mysql-setup
image: ${DATAHUB_MYSQL_SETUP_IMAGE:-acryldata/datahub-mysql-setup}:${DATAHUB_VERSION:-head}
labels:
datahub_setup_job: true
schema-registry:
depends_on:
broker:
condition: service_healthy
environment:
- SCHEMA_REGISTRY_HOST_NAME=schemaregistry
- SCHEMA_REGISTRY_KAFKASTORE_SECURITY_PROTOCOL=PLAINTEXT
- SCHEMA_REGISTRY_KAFKASTORE_BOOTSTRAP_SERVERS=broker:29092
healthcheck:
interval: 1s
retries: 3
start_period: 60s
test: nc -z schema-registry ${DATAHUB_SCHEMA_REGISTRY_PORT:-8081}
timeout: 5s
hostname: schema-registry
image: ${SCHEMA_REGISTRY_IMAGE:-confluentinc/cp-schema-registry}:${SCHEMA_REGISTRY_VERSION:-7.4.0}
ports:
- ${DATAHUB_MAPPED_SCHEMA_REGISTRY_PORT:-8081}:8081
zookeeper:
environment:
- ZOOKEEPER_CLIENT_PORT=2181
- ZOOKEEPER_TICK_TIME=2000
healthcheck:
interval: 5s
retries: 3
start_period: 30s
test: echo srvr | nc zookeeper $${DATAHUB_ZK_PORT:-2181}
timeout: 5s
hostname: zookeeper
image: ${ZOOKEEPER_IMAGE:-confluentinc/cp-zookeeper}:${ZOOKEEPER_VERSION:-7.4.0}
ports:
- ${DATAHUB_MAPPED_ZK_PORT:-2181}:2181
volumes:
- zkdata:/var/lib/zookeeper/data
- zklogs:/var/lib/zookeeper/log
version: '3.9'
volumes:
broker: null
zkdata: null
zklogs: null

0 comments on commit cd35be2

Please sign in to comment.