diff --git a/docker-files/hadoop/docker-compose-with-spark.yaml b/docker-files/hadoop/docker-compose-with-spark.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4e2ba1b92ec99ee57b95f36383b9e36fcb079f07 --- /dev/null +++ b/docker-files/hadoop/docker-compose-with-spark.yaml @@ -0,0 +1,195 @@ +services: + namenode: + image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8 + networks: + - hadoop + container_name: namenode + volumes: + - hadoop_namenode:/hadoop/dfs/name + - ../../files:/data/hdfs/files + environment: + - CLUSTER_NAME=project + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + ports: + - 9870:9870 + - 9000:9000 + + datanode1: + image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 + container_name: datanode1 + networks: + - hadoop + volumes: + - hadoop_datanode_1:/hadoop/dfs/data + env_file: + - ./hadoop.env + environment: + SERVICE_PRECONDITION: "namenode:9870" + deploy: + mode: global + datanode2: + image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 + container_name: datanode2 + networks: + - hadoop + volumes: + - hadoop_datanode_2:/hadoop/dfs/data + env_file: + - ./hadoop.env + environment: + SERVICE_PRECONDITION: "namenode:9870" + deploy: + mode: global + + + resourcemanager: + image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.2.1-java8 + container_name: resourcemanager + networks: + - hadoop + environment: + SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 datanode2:9864" + env_file: + - ./hadoop.env + volumes: + - ../../files:/data/yarn/files + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + healthcheck: + disable: true + nodemanager1: + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8 + container_name: nodemanager1 + networks: + - hadoop + environment: + SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 resourcemanager:8088" + env_file: + - ./hadoop.env + deploy: + mode: global + nodemanager2: + image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8 + container_name: nodemanager2 + networks: + - hadoop + environment: + SERVICE_PRECONDITION: "namenode:9870 datanode2:9864 resourcemanager:8088" + env_file: + - ./hadoop.env + deploy: + mode: global + historyserver: + image: bde2020/hadoop-historyserver:2.0.0-hadoop3.2.1-java8 + container_name: historyserver + networks: + - hadoop + volumes: + - hadoop_historyserver:/hadoop/yarn/timeline + environment: + SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 datanode2:9864 resourcemanager:8088" + env_file: + - ./hadoop.env + deploy: + mode: replicated + replicas: 1 + placement: + constraints: + - node.hostname == akswnc4.aksw.uni-leipzig.de + ports: + - 28188:8188 + + hive-server: + image: bde2020/hive:2.3.2-postgresql-metastore + container_name: hive-server + env_file: + - ./hadoop-hive.env + volumes: + - ../../files:/data/hive/files + environment: + HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" + SERVICE_PRECONDITION: "hive-metastore:9083" + ports: + - "10000:10000" + networks: + - hadoop + + hive-metastore: + image: bde2020/hive:2.3.2-postgresql-metastore + container_name: hive-metastore + env_file: + - ./hadoop-hive.env + command: /opt/hive/bin/hive --service metastore + environment: + SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode1:9864 datanode2:9864 hive-metastore-postgresql:5432" + ports: + - "9083:9083" + networks: + - hadoop + + hive-metastore-postgresql: + image: bde2020/hive-metastore-postgresql:2.3.0 + container_name: hive-metastore-postgresql + env_file: + - ./hadoop-hive.env + networks: + - hadoop + ports: + - "5432:5432" + + spark-master: + image: bde2020/spark-master:3.1.1-hadoop3.2 + container_name: spark-master + volumes: + - ../../files:/data/spark/files + - ../../python:/data/spark/python + ports: + - "28083:8080" + - "7077:7077" + environment: + - INIT_DAEMON_STEP=setup_spark + networks: + - hadoop + spark-worker-1: + image: bde2020/spark-worker:3.1.1-hadoop3.2 + container_name: spark-worker-1 + depends_on: + - spark-master + ports: + - "28081:8081" + environment: + - "SPARK_MASTER=spark://spark-master:7077" + networks: + - hadoop + spark-worker-2: + image: bde2020/spark-worker:3.1.1-hadoop3.2 + container_name: spark-worker-2 + depends_on: + - spark-master + ports: + - "28082:8081" + environment: + - "SPARK_MASTER=spark://spark-master:7077" + networks: + - hadoop + +volumes: + hadoop_datanode_1: + hadoop_datanode_2: + hadoop_namenode: + hadoop_historyserver: + +networks: + hadoop: + diff --git a/docker-files/hadoop/docker-compose.yml b/docker-files/hadoop/docker-compose.yml index 4e2ba1b92ec99ee57b95f36383b9e36fcb079f07..4f77a64969002b7a9e50fa3dda17871931fe1dfb 100644 --- a/docker-files/hadoop/docker-compose.yml +++ b/docker-files/hadoop/docker-compose.yml @@ -48,7 +48,6 @@ services: deploy: mode: global - resourcemanager: image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.2.1-java8 container_name: resourcemanager @@ -148,42 +147,6 @@ services: ports: - "5432:5432" - spark-master: - image: bde2020/spark-master:3.1.1-hadoop3.2 - container_name: spark-master - volumes: - - ../../files:/data/spark/files - - ../../python:/data/spark/python - ports: - - "28083:8080" - - "7077:7077" - environment: - - INIT_DAEMON_STEP=setup_spark - networks: - - hadoop - spark-worker-1: - image: bde2020/spark-worker:3.1.1-hadoop3.2 - container_name: spark-worker-1 - depends_on: - - spark-master - ports: - - "28081:8081" - environment: - - "SPARK_MASTER=spark://spark-master:7077" - networks: - - hadoop - spark-worker-2: - image: bde2020/spark-worker:3.1.1-hadoop3.2 - container_name: spark-worker-2 - depends_on: - - spark-master - ports: - - "28082:8081" - environment: - - "SPARK_MASTER=spark://spark-master:7077" - networks: - - hadoop - volumes: hadoop_datanode_1: hadoop_datanode_2: diff --git a/docker-files/hadoop/start-hadoop.sh b/docker-files/hadoop/start-hadoop.sh index f83ee127acb4dd173ea7ba539334b4a865bb7d12..9ad7c8df45c10139e2529b4b90f777d0de1a3821 100755 --- a/docker-files/hadoop/start-hadoop.sh +++ b/docker-files/hadoop/start-hadoop.sh @@ -19,7 +19,7 @@ docker compose -f $DOCKER_COMPOSE_PATH up -d namenode hive-metastore-postgresql docker compose -f $DOCKER_COMPOSE_PATH up -d datanode1 datanode2 docker compose -f $DOCKER_COMPOSE_PATH up -d resourcemanager nodemanager1 nodemanager2 historyserver docker compose -f $DOCKER_COMPOSE_PATH up -d hive-server hive-metastore -docker compose -f $DOCKER_COMPOSE_PATH up -d spark-master spark-worker-1 spark-worker-2 +#docker compose -f $DOCKER_COMPOSE_PATH up -d spark-master spark-worker-1 spark-worker-2 my_ip=`ip route get 1 | awk '{ for (i=1;i<=NF;i++) { if ( $i == "src" ) { print $(i+1) ; exit } } }'` echo "Namenode: (HDFS Filebrowser) http://${my_ip}:9870"