diff --git a/docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml b/docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml new file mode 100644 index 0000000000000..857180cfbee20 --- /dev/null +++ b/docker/compose/docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml @@ -0,0 +1,259 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +version: "3.3" + +services: + + namenode: + image: apachehudi/hudi-hadoop_2.8.4-namenode:linux-arm64-0.10.1 + platform: linux/arm64 + hostname: namenode + container_name: namenode + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + ports: + - "50070:50070" + - "8020:8020" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + env_file: + - ./hadoop.env + healthcheck: + test: [ "CMD", "curl", "-f", "http://namenode:50070" ] + interval: 30s + timeout: 10s + retries: 3 + + datanode1: + image: apachehudi/hudi-hadoop_2.8.4-datanode:linux-arm64-0.10.1 + platform: linux/arm64 + container_name: datanode1 + hostname: datanode1 + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + env_file: + - ./hadoop.env + ports: + - "50075:50075" + - "50010:50010" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + links: + - "namenode" + - "historyserver" + healthcheck: + test: [ "CMD", "curl", "-f", "http://datanode1:50075" ] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - namenode + + historyserver: + image: apachehudi/hudi-hadoop_2.8.4-history:latest + hostname: historyserver + container_name: historyserver + environment: + - CLUSTER_NAME=hudi_hadoop284_hive232_spark244 + depends_on: + - "namenode" + links: + - "namenode" + ports: + - "58188:8188" + healthcheck: + test: [ "CMD", "curl", "-f", "http://historyserver:8188" ] + interval: 30s + timeout: 10s + retries: 3 + env_file: + - ./hadoop.env + volumes: + - historyserver:/hadoop/yarn/timeline + + hive-metastore-postgresql: + image: menorah84/hive-metastore-postgresql:2.3.0 + platform: linux/arm64 + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + volumes: + - hive-metastore-postgresql:/var/lib/postgresql + hostname: hive-metastore-postgresql + container_name: hive-metastore-postgresql + + hivemetastore: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1 + platform: linux/arm64 + hostname: hivemetastore + container_name: hivemetastore + links: + - "hive-metastore-postgresql" + - "namenode" + env_file: + - ./hadoop.env + command: /opt/hive/bin/hive --service metastore + environment: + SERVICE_PRECONDITION: "namenode:50070 hive-metastore-postgresql:5432" + ports: + - "9083:9083" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + healthcheck: + test: [ "CMD", "nc", "-z", "hivemetastore", "9083" ] + interval: 30s + timeout: 10s + retries: 3 + depends_on: + - "hive-metastore-postgresql" + - "namenode" + + hiveserver: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1 + platform: linux/arm64 + hostname: hiveserver + container_name: hiveserver + env_file: + - ./hadoop.env + environment: + SERVICE_PRECONDITION: "hivemetastore:9083" + ports: + - "10000:10000" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + depends_on: + - "hivemetastore" + links: + - "hivemetastore" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + sparkmaster: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkmaster_2.4.4:linux-arm64-0.10.1 + platform: linux/arm64 + hostname: sparkmaster + container_name: sparkmaster + env_file: + - ./hadoop.env + ports: + - "8080:8080" + - "7077:7077" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + environment: + - INIT_DAEMON_STEP=setup_spark + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + + spark-worker-1: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkworker_2.4.4:linux-arm64-0.10.1 + platform: linux/arm64 + hostname: spark-worker-1 + container_name: spark-worker-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - "8081:8081" + # JVM debugging port (will be mapped to a random port on host) + - "5005" + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + + zookeeper: + image: 'arm64v8/zookeeper:3.4.12' + platform: linux/arm64 + hostname: zookeeper + container_name: zookeeper + ports: + - "2181:2181" + environment: + - ALLOW_ANONYMOUS_LOGIN=yes + + kafka: + image: 'wurstmeister/kafka:2.12-2.0.1' + platform: linux/arm64 + hostname: kafkabroker + container_name: kafkabroker + ports: + - "9092:9092" + environment: + - KAFKA_ZOOKEEPER_CONNECT=zookeeper:2181 + - ALLOW_PLAINTEXT_LISTENER=yes + - KAFKA_ADVERTISED_HOST_NAME=kafkabroker + + adhoc-1: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:linux-arm64-0.10.1 + platform: linux/arm64 + hostname: adhoc-1 + container_name: adhoc-1 + env_file: + - ./hadoop.env + depends_on: + - sparkmaster + ports: + - '4040:4040' + # JVM debugging port (mapped to 5006 on the host) + - "5006:5005" + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + + adhoc-2: + image: apachehudi/hudi-hadoop_2.8.4-hive_2.3.3-sparkadhoc_2.4.4:linux-arm64-0.10.1 + platform: linux/arm64 + hostname: adhoc-2 + container_name: adhoc-2 + env_file: + - ./hadoop.env + ports: + # JVM debugging port (mapped to 5005 on the host) + - "5005:5005" + depends_on: + - sparkmaster + environment: + - "SPARK_MASTER=spark://sparkmaster:7077" + links: + - "hivemetastore" + - "hiveserver" + - "hive-metastore-postgresql" + - "namenode" + volumes: + - ${HUDI_WS}:/var/hoodie/ws + +volumes: + namenode: + historyserver: + hive-metastore-postgresql: + +networks: + default: diff --git a/docker/setup_demo.sh b/docker/setup_demo.sh index 9f0a100da6122..81270bba75ffe 100755 --- a/docker/setup_demo.sh +++ b/docker/setup_demo.sh @@ -19,14 +19,18 @@ SCRIPT_PATH=$(cd `dirname $0`; pwd) HUDI_DEMO_ENV=$1 WS_ROOT=`dirname $SCRIPT_PATH` +COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244.yml" +if [ "$HUDI_DEMO_ENV" = "--mac-aarch64" ]; then + COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml" +fi # restart cluster -HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml down +HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} down if [ "$HUDI_DEMO_ENV" != "dev" ]; then echo "Pulling docker demo images ..." - HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml pull + HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} pull fi sleep 5 -HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml up -d +HUDI_WS=${WS_ROOT} docker-compose --verbose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} up -d sleep 15 docker exec -it adhoc-1 /bin/bash /var/hoodie/ws/docker/demo/setup_demo_container.sh diff --git a/docker/stop_demo.sh b/docker/stop_demo.sh index 83b8a2c1ef5c0..32a0e70c37919 100755 --- a/docker/stop_demo.sh +++ b/docker/stop_demo.sh @@ -17,10 +17,15 @@ # limitations under the License. SCRIPT_PATH=$(cd `dirname $0`; pwd) +HUDI_DEMO_ENV=$1 # set up root directory WS_ROOT=`dirname $SCRIPT_PATH` +COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244.yml" +if [ "$HUDI_DEMO_ENV" = "--mac-aarch64" ]; then + COMPOSE_FILE_NAME="docker-compose_hadoop284_hive233_spark244_mac_aarch64.yml" +fi # shut down cluster -HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/docker-compose_hadoop284_hive233_spark244.yml down +HUDI_WS=${WS_ROOT} docker-compose -f ${SCRIPT_PATH}/compose/${COMPOSE_FILE_NAME} down # remove houst mount directory rm -rf /tmp/hadoop_data