From e234a937d7a6d7bbe71002fc634b6f54dc7c10ab Mon Sep 17 00:00:00 2001 From: karnas <xavier.deweerd@ensiie.eu> Date: Mon, 20 May 2024 18:38:25 +0200 Subject: [PATCH] fixed hadoop paths in docker-compose and scripts --- {hadoop => docker-files/hadoop}/.env | 2 +- .../hadoop}/docker-compose-que-hadoop.yml | 2 +- .../hadoop/docker-compose-v3.yml | 15 +- .../hadoop/hadoop-16goRAM.env | 0 .../hadoop}/hadoop-8goRAM.env | 0 .../hadoop}/hadoop-hive.env | 0 {hadoop => docker-files/hadoop}/hadoop.env | 0 {hadoop => docker-files/hadoop}/init.sql | 0 .../hadoop}/overrides/namenode/entrypoint.sh | 0 .../overrides/nodemanagers/entrypoint.sh | 0 docker-files/hadoop/purge-hadoop-ensiie.sh | 8 + docker-files/hadoop/start-hadoop-ensiie.sh | 27 +++ docker-files/hadoop/stop-hadoop-ensiie.sh | 7 + download_dataset.sh | 6 +- files/dataset/.gitkeep | 0 hadoop/docker-compose.yml | 196 ------------------ hadoop/purge-hadoop-ensiie.sh | 6 - hadoop/start-hadoop-ensiie.sh | 21 -- hadoop/stop-hadoop-ensiie.sh | 4 - 19 files changed, 55 insertions(+), 239 deletions(-) rename {hadoop => docker-files/hadoop}/.env (86%) rename {hadoop => docker-files/hadoop}/docker-compose-que-hadoop.yml (96%) rename hadoop/docker-compose-ensiie-v3.yml => docker-files/hadoop/docker-compose-v3.yml (93%) rename hadoop/hadoop-16oRAM.env => docker-files/hadoop/hadoop-16goRAM.env (100%) rename {hadoop => docker-files/hadoop}/hadoop-8goRAM.env (100%) rename {hadoop => docker-files/hadoop}/hadoop-hive.env (100%) rename {hadoop => docker-files/hadoop}/hadoop.env (100%) rename {hadoop => docker-files/hadoop}/init.sql (100%) rename {hadoop => docker-files/hadoop}/overrides/namenode/entrypoint.sh (100%) rename {hadoop => docker-files/hadoop}/overrides/nodemanagers/entrypoint.sh (100%) create mode 100755 docker-files/hadoop/purge-hadoop-ensiie.sh create mode 100755 docker-files/hadoop/start-hadoop-ensiie.sh create mode 100755 docker-files/hadoop/stop-hadoop-ensiie.sh create mode 100644 files/dataset/.gitkeep delete mode 100644 hadoop/docker-compose.yml delete mode 100755 hadoop/purge-hadoop-ensiie.sh delete mode 100755 hadoop/start-hadoop-ensiie.sh delete mode 100755 hadoop/stop-hadoop-ensiie.sh diff --git a/hadoop/.env b/docker-files/hadoop/.env similarity index 86% rename from hadoop/.env rename to docker-files/hadoop/.env index 192c1af..91f29f7 100644 --- a/hadoop/.env +++ b/docker-files/hadoop/.env @@ -1,4 +1,4 @@ -CLUSTER_NAME=ensiie +CLUSTER_NAME=project ADMIN_NAME=centos ADMIN_PASSWORD=ensiie INSTALL_PYTHON=true # whether you want python or not (to run hadoop streaming) diff --git a/hadoop/docker-compose-que-hadoop.yml b/docker-files/hadoop/docker-compose-que-hadoop.yml similarity index 96% rename from hadoop/docker-compose-que-hadoop.yml rename to docker-files/hadoop/docker-compose-que-hadoop.yml index 7c2e71e..65e4d24 100644 --- a/hadoop/docker-compose-que-hadoop.yml +++ b/docker-files/hadoop/docker-compose-que-hadoop.yml @@ -8,7 +8,7 @@ services: container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name - - ~/ensiie/exo/files:/data/hdfs/formation + - ../../files:/data/hdfs/files environment: - CLUSTER_NAME=test env_file: diff --git a/hadoop/docker-compose-ensiie-v3.yml b/docker-files/hadoop/docker-compose-v3.yml similarity index 93% rename from hadoop/docker-compose-ensiie-v3.yml rename to docker-files/hadoop/docker-compose-v3.yml index 3620de8..fab7536 100644 --- a/hadoop/docker-compose-ensiie-v3.yml +++ b/docker-files/hadoop/docker-compose-v3.yml @@ -1,3 +1,5 @@ +version: "3" + services: namenode: image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8 @@ -6,10 +8,9 @@ services: container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name - - ~/ensiie/exo/files:/data/hdfs/formation - - ~/ensiie/exo/opensearch/bano-data:/data/hdfs/bano-data + - ../../files:/data/hdfs/files environment: - - CLUSTER_NAME=test + - CLUSTER_NAME=project env_file: - ./hadoop.env deploy: @@ -60,7 +61,7 @@ services: env_file: - ./hadoop.env volumes: - - ~/ensiie/exo/files:/data/yarn/formation + - ../../files:/data/yarn/files deploy: mode: replicated replicas: 1 @@ -117,7 +118,7 @@ services: env_file: - ./hadoop-hive.env volumes: - - ~/ensiie/exo/files:/data/hive/formation + - ../../files:/data/hive/files environment: HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" SERVICE_PRECONDITION: "hive-metastore:9083" @@ -153,8 +154,8 @@ services: image: bde2020/spark-master:3.1.1-hadoop3.2 container_name: spark-master volumes: - - ~/ensiie/exo/files:/data/spark/files - - ~/ensiie/exo/python:/data/spark/python + - ../../files:/data/spark/files + - ../../python:/data/spark/python ports: - "28083:8080" - "7077:7077" diff --git a/hadoop/hadoop-16oRAM.env b/docker-files/hadoop/hadoop-16goRAM.env similarity index 100% rename from hadoop/hadoop-16oRAM.env rename to docker-files/hadoop/hadoop-16goRAM.env diff --git a/hadoop/hadoop-8goRAM.env b/docker-files/hadoop/hadoop-8goRAM.env similarity index 100% rename from hadoop/hadoop-8goRAM.env rename to docker-files/hadoop/hadoop-8goRAM.env diff --git a/hadoop/hadoop-hive.env b/docker-files/hadoop/hadoop-hive.env similarity index 100% rename from hadoop/hadoop-hive.env rename to docker-files/hadoop/hadoop-hive.env diff --git a/hadoop/hadoop.env b/docker-files/hadoop/hadoop.env similarity index 100% rename from hadoop/hadoop.env rename to docker-files/hadoop/hadoop.env diff --git a/hadoop/init.sql b/docker-files/hadoop/init.sql similarity index 100% rename from hadoop/init.sql rename to docker-files/hadoop/init.sql diff --git a/hadoop/overrides/namenode/entrypoint.sh b/docker-files/hadoop/overrides/namenode/entrypoint.sh similarity index 100% rename from hadoop/overrides/namenode/entrypoint.sh rename to docker-files/hadoop/overrides/namenode/entrypoint.sh diff --git a/hadoop/overrides/nodemanagers/entrypoint.sh b/docker-files/hadoop/overrides/nodemanagers/entrypoint.sh similarity index 100% rename from hadoop/overrides/nodemanagers/entrypoint.sh rename to docker-files/hadoop/overrides/nodemanagers/entrypoint.sh diff --git a/docker-files/hadoop/purge-hadoop-ensiie.sh b/docker-files/hadoop/purge-hadoop-ensiie.sh new file mode 100755 index 0000000..cfdefdf --- /dev/null +++ b/docker-files/hadoop/purge-hadoop-ensiie.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +SCRIPT_PATH=$(dirname $(realpath $0)) +DOCKER_COMPOSE_FILE="docker-compose-v3.yml" +DOCKER_COMPOSE_PATH=$SCRIPT_PATH/$DOCKER_COMPOSE_FILE + +docker compose -f $DOCKER_COMPOSE_PATH down -v +docker volume list | grep hadoop | awk '{ print $2 }' | xargs docker volume rm --force diff --git a/docker-files/hadoop/start-hadoop-ensiie.sh b/docker-files/hadoop/start-hadoop-ensiie.sh new file mode 100755 index 0000000..24d95d3 --- /dev/null +++ b/docker-files/hadoop/start-hadoop-ensiie.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# modification 2024 : test de la ram dispo et utilisation de tel ou tel fichier hadoop.env +# 2024-05-20 Karnas : ajout des variables de chemin + +SCRIPT_PATH=$(dirname $(realpath $0)) +DOCKER_COMPOSE_FILE="docker-compose-v3.yml" +DOCKER_COMPOSE_PATH=$SCRIPT_PATH/$DOCKER_COMPOSE_FILE + +if [ $(free --giga | grep "^Mem" | awk '{ print $2 }') -lt 16 ] +then + cp hadoop-16goRAM.env hadoop.env +elif [ $(free --giga | grep "^Mem" | awk '{ print $2 }') -lt 8 ] +then + cp hadoop-8goRAM.env hadoop.env +fi + +docker network create hbase 2>/dev/null +docker compose -f $DOCKER_COMPOSE_PATH up -d namenode hive-metastore-postgresql +docker compose -f $DOCKER_COMPOSE_PATH up -d datanode1 datanode2 +docker compose -f $DOCKER_COMPOSE_PATH up -d resourcemanager nodemanager1 nodemanager2 historyserver +docker compose -f $DOCKER_COMPOSE_PATH up -d hive-server hive-metastore +docker compose -f $DOCKER_COMPOSE_PATH up -d spark-master spark-worker-1 spark-worker-2 + +my_ip=`ip route get 1 | awk '{ for (i=1;i<=NF;i++) { if ( $i == "src" ) { print $(i+1) ; exit } } }'` +echo "Namenode: (HDFS Filebrowser) http://${my_ip}:9870" +echo "Spark-master: http://${my_ip}:28083" +echo "History Server: http://${my_ip}:28188" diff --git a/docker-files/hadoop/stop-hadoop-ensiie.sh b/docker-files/hadoop/stop-hadoop-ensiie.sh new file mode 100755 index 0000000..a42b0e6 --- /dev/null +++ b/docker-files/hadoop/stop-hadoop-ensiie.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +SCRIPT_PATH=$(dirname $(realpath $0)) +DOCKER_COMPOSE_FILE="docker-compose-v3.yml" +DOCKER_COMPOSE_PATH=$SCRIPT_PATH/$DOCKER_COMPOSE_FILE + +docker compose -f $DOCKER_COMPOSE_PATH stop diff --git a/download_dataset.sh b/download_dataset.sh index 2c1c5d1..ac74abd 100755 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -1,3 +1,3 @@ -wget "https://storage.googleapis.com/kaggle-data-sets/3384322/6207733/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240519%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240519T145233Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=1ac8f9216a239f62f3aa19666ce2b09c188d1d34d5199cf254a3677292e1b893eb10d0e2280baf0cbfb1f21d38a2b99f55e3e080beaa4a376d07326750503e15f35e123e2efd21c2c300a82c5bc06c787528bbe5e0d6b7be5a31bc0e6fb458b9a59456233fb852c658827d1dd547ca683890de508dd88940526568357bdd28611409ed5db0e479abf7b6f98855cd942d0cebfae55d463f288640c594bce7e11cd9f460e941cec80a7713e7faa54e69e3e9c4e9e3cd87b11bc35aa74439f96f80c2d592c6a97519353ca099d62e7276bec190a99e9327aee45ab9531d86f8f6be65fb3931148dbd4342712849494a71adcfe0b4eb54051582393fe8a98ebf68bc" -c -O 'dataset.zip' -mkdir dataset -unzip dataset.zip -d dataset +mkdir -p files/dataset +wget "https://storage.googleapis.com/kaggle-data-sets/3384322/6207733/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240519%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240519T145233Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=1ac8f9216a239f62f3aa19666ce2b09c188d1d34d5199cf254a3677292e1b893eb10d0e2280baf0cbfb1f21d38a2b99f55e3e080beaa4a376d07326750503e15f35e123e2efd21c2c300a82c5bc06c787528bbe5e0d6b7be5a31bc0e6fb458b9a59456233fb852c658827d1dd547ca683890de508dd88940526568357bdd28611409ed5db0e479abf7b6f98855cd942d0cebfae55d463f288640c594bce7e11cd9f460e941cec80a7713e7faa54e69e3e9c4e9e3cd87b11bc35aa74439f96f80c2d592c6a97519353ca099d62e7276bec190a99e9327aee45ab9531d86f8f6be65fb3931148dbd4342712849494a71adcfe0b4eb54051582393fe8a98ebf68bc" -c -O 'files/dataset.zip' +unzip dataset.zip -d files/dataset diff --git a/files/dataset/.gitkeep b/files/dataset/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/hadoop/docker-compose.yml b/hadoop/docker-compose.yml deleted file mode 100644 index 3620de8..0000000 --- a/hadoop/docker-compose.yml +++ /dev/null @@ -1,196 +0,0 @@ -services: - namenode: - image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8 - networks: - - hadoop - container_name: namenode - volumes: - - hadoop_namenode:/hadoop/dfs/name - - ~/ensiie/exo/files:/data/hdfs/formation - - ~/ensiie/exo/opensearch/bano-data:/data/hdfs/bano-data - environment: - - CLUSTER_NAME=test - env_file: - - ./hadoop.env - deploy: - mode: replicated - replicas: 1 - placement: - constraints: - - node.hostname == akswnc4.aksw.uni-leipzig.de - ports: - - 9870:9870 - - 9000:9000 - - datanode1: - image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 - container_name: datanode1 - networks: - - hadoop - volumes: - - hadoop_datanode_1:/hadoop/dfs/data - env_file: - - ./hadoop.env - environment: - SERVICE_PRECONDITION: "namenode:9870" - deploy: - mode: global - datanode2: - image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 - container_name: datanode2 - networks: - - hadoop - volumes: - - hadoop_datanode_2:/hadoop/dfs/data - env_file: - - ./hadoop.env - environment: - SERVICE_PRECONDITION: "namenode:9870" - deploy: - mode: global - - - resourcemanager: - image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.2.1-java8 - container_name: resourcemanager - networks: - - hadoop - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 datanode2:9864" - env_file: - - ./hadoop.env - volumes: - - ~/ensiie/exo/files:/data/yarn/formation - deploy: - mode: replicated - replicas: 1 - placement: - constraints: - - node.hostname == akswnc4.aksw.uni-leipzig.de - healthcheck: - disable: true - nodemanager1: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8 - container_name: nodemanager1 - networks: - - hadoop - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 resourcemanager:8088" - env_file: - - ./hadoop.env - deploy: - mode: global - nodemanager2: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8 - container_name: nodemanager2 - networks: - - hadoop - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode2:9864 resourcemanager:8088" - env_file: - - ./hadoop.env - deploy: - mode: global - historyserver: - image: bde2020/hadoop-historyserver:2.0.0-hadoop3.2.1-java8 - container_name: historyserver - networks: - - hadoop - volumes: - - hadoop_historyserver:/hadoop/yarn/timeline - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 datanode2:9864 resourcemanager:8088" - env_file: - - ./hadoop.env - deploy: - mode: replicated - replicas: 1 - placement: - constraints: - - node.hostname == akswnc4.aksw.uni-leipzig.de - ports: - - 28188:8188 - - hive-server: - image: bde2020/hive:2.3.2-postgresql-metastore - container_name: hive-server - env_file: - - ./hadoop-hive.env - volumes: - - ~/ensiie/exo/files:/data/hive/formation - environment: - HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" - SERVICE_PRECONDITION: "hive-metastore:9083" - ports: - - "10000:10000" - networks: - - hadoop - - hive-metastore: - image: bde2020/hive:2.3.2-postgresql-metastore - container_name: hive-metastore - env_file: - - ./hadoop-hive.env - command: /opt/hive/bin/hive --service metastore - environment: - SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode1:9864 datanode2:9864 hive-metastore-postgresql:5432" - ports: - - "9083:9083" - networks: - - hadoop - - hive-metastore-postgresql: - image: bde2020/hive-metastore-postgresql:2.3.0 - container_name: hive-metastore-postgresql - env_file: - - ./hadoop-hive.env - networks: - - hadoop - ports: - - "5432:5432" - - spark-master: - image: bde2020/spark-master:3.1.1-hadoop3.2 - container_name: spark-master - volumes: - - ~/ensiie/exo/files:/data/spark/files - - ~/ensiie/exo/python:/data/spark/python - ports: - - "28083:8080" - - "7077:7077" - environment: - - INIT_DAEMON_STEP=setup_spark - networks: - - hadoop - spark-worker-1: - image: bde2020/spark-worker:3.1.1-hadoop3.2 - container_name: spark-worker-1 - depends_on: - - spark-master - ports: - - "28081:8081" - environment: - - "SPARK_MASTER=spark://spark-master:7077" - networks: - - hadoop - spark-worker-2: - image: bde2020/spark-worker:3.1.1-hadoop3.2 - container_name: spark-worker-2 - depends_on: - - spark-master - ports: - - "28082:8081" - environment: - - "SPARK_MASTER=spark://spark-master:7077" - networks: - - hadoop - -volumes: - hadoop_datanode_1: - hadoop_datanode_2: - hadoop_namenode: - hadoop_historyserver: - -networks: - hadoop: - diff --git a/hadoop/purge-hadoop-ensiie.sh b/hadoop/purge-hadoop-ensiie.sh deleted file mode 100755 index b2c9986..0000000 --- a/hadoop/purge-hadoop-ensiie.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# -# - -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml down -v -docker volume list | grep hadoop | awk '{ print $2 }' | xargs docker volume rm --force diff --git a/hadoop/start-hadoop-ensiie.sh b/hadoop/start-hadoop-ensiie.sh deleted file mode 100755 index 6a9da5b..0000000 --- a/hadoop/start-hadoop-ensiie.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# -# -# modification 2024 : test de la ram dispo et utilisation de tel ou tel fichier hadoop.env -# - -if [ $(free --giga | grep "^Mem" | awk '{ print $2 }') -lt 8 ] -then cp hadoop-8goRAM.env hadoop.env -fi - -docker network create hbase 2>/dev/null -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d namenode hive-metastore-postgresql -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d datanode1 datanode2 -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d resourcemanager nodemanager1 nodemanager2 historyserver -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d hive-server hive-metastore -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d spark-master spark-worker-1 spark-worker-2 - -my_ip=`ip route get 1 | awk '{ for (i=1;i<=NF;i++) { if ( $i == "src" ) { print $(i+1) ; exit } } }'` -echo "Namenode: (HDFS Filebrowser) http://${my_ip}:9870" -echo "Spark-master: http://${my_ip}:28083" -echo "History Server: http://${my_ip}:28188" diff --git a/hadoop/stop-hadoop-ensiie.sh b/hadoop/stop-hadoop-ensiie.sh deleted file mode 100755 index 1dab2a9..0000000 --- a/hadoop/stop-hadoop-ensiie.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# -# -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml stop -- GitLab