diff --git a/hadoop/.env b/docker-files/hadoop/.env similarity index 86% rename from hadoop/.env rename to docker-files/hadoop/.env index 192c1afc92f7778a76f2a1d3cc349d99b907d029..91f29f7023d94f67ccbfde8657e749bcbaccf585 100644 --- a/hadoop/.env +++ b/docker-files/hadoop/.env @@ -1,4 +1,4 @@ -CLUSTER_NAME=ensiie +CLUSTER_NAME=project ADMIN_NAME=centos ADMIN_PASSWORD=ensiie INSTALL_PYTHON=true # whether you want python or not (to run hadoop streaming) diff --git a/hadoop/docker-compose-que-hadoop.yml b/docker-files/hadoop/docker-compose-que-hadoop.yml similarity index 96% rename from hadoop/docker-compose-que-hadoop.yml rename to docker-files/hadoop/docker-compose-que-hadoop.yml index 7c2e71e0da0d7b86c47f664c14d68c4c6510b07e..65e4d24d41ae0e12cd560926c2a589f801c1941c 100644 --- a/hadoop/docker-compose-que-hadoop.yml +++ b/docker-files/hadoop/docker-compose-que-hadoop.yml @@ -8,7 +8,7 @@ services: container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name - - ~/ensiie/exo/files:/data/hdfs/formation + - ../../files:/data/hdfs/files environment: - CLUSTER_NAME=test env_file: diff --git a/hadoop/docker-compose-ensiie-v3.yml b/docker-files/hadoop/docker-compose-v3.yml similarity index 93% rename from hadoop/docker-compose-ensiie-v3.yml rename to docker-files/hadoop/docker-compose-v3.yml index 3620de8a121005ae4612763313c2092e73977ce6..fab7536691c76b3b86cd8246ed47bec5646575fb 100644 --- a/hadoop/docker-compose-ensiie-v3.yml +++ b/docker-files/hadoop/docker-compose-v3.yml @@ -1,3 +1,5 @@ +version: "3" + services: namenode: image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8 @@ -6,10 +8,9 @@ services: container_name: namenode volumes: - hadoop_namenode:/hadoop/dfs/name - - ~/ensiie/exo/files:/data/hdfs/formation - - ~/ensiie/exo/opensearch/bano-data:/data/hdfs/bano-data + - ../../files:/data/hdfs/files environment: - - CLUSTER_NAME=test + - CLUSTER_NAME=project env_file: - ./hadoop.env deploy: @@ -60,7 +61,7 @@ services: env_file: - ./hadoop.env volumes: - - ~/ensiie/exo/files:/data/yarn/formation + - ../../files:/data/yarn/files deploy: mode: replicated replicas: 1 @@ -117,7 +118,7 @@ services: env_file: - ./hadoop-hive.env volumes: - - ~/ensiie/exo/files:/data/hive/formation + - ../../files:/data/hive/files environment: HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" SERVICE_PRECONDITION: "hive-metastore:9083" @@ -153,8 +154,8 @@ services: image: bde2020/spark-master:3.1.1-hadoop3.2 container_name: spark-master volumes: - - ~/ensiie/exo/files:/data/spark/files - - ~/ensiie/exo/python:/data/spark/python + - ../../files:/data/spark/files + - ../../python:/data/spark/python ports: - "28083:8080" - "7077:7077" diff --git a/hadoop/hadoop-16oRAM.env b/docker-files/hadoop/hadoop-16goRAM.env similarity index 100% rename from hadoop/hadoop-16oRAM.env rename to docker-files/hadoop/hadoop-16goRAM.env diff --git a/hadoop/hadoop-8goRAM.env b/docker-files/hadoop/hadoop-8goRAM.env similarity index 100% rename from hadoop/hadoop-8goRAM.env rename to docker-files/hadoop/hadoop-8goRAM.env diff --git a/hadoop/hadoop-hive.env b/docker-files/hadoop/hadoop-hive.env similarity index 100% rename from hadoop/hadoop-hive.env rename to docker-files/hadoop/hadoop-hive.env diff --git a/hadoop/hadoop.env b/docker-files/hadoop/hadoop.env similarity index 100% rename from hadoop/hadoop.env rename to docker-files/hadoop/hadoop.env diff --git a/hadoop/init.sql b/docker-files/hadoop/init.sql similarity index 100% rename from hadoop/init.sql rename to docker-files/hadoop/init.sql diff --git a/hadoop/overrides/namenode/entrypoint.sh b/docker-files/hadoop/overrides/namenode/entrypoint.sh similarity index 100% rename from hadoop/overrides/namenode/entrypoint.sh rename to docker-files/hadoop/overrides/namenode/entrypoint.sh diff --git a/hadoop/overrides/nodemanagers/entrypoint.sh b/docker-files/hadoop/overrides/nodemanagers/entrypoint.sh similarity index 100% rename from hadoop/overrides/nodemanagers/entrypoint.sh rename to docker-files/hadoop/overrides/nodemanagers/entrypoint.sh diff --git a/docker-files/hadoop/purge-hadoop-ensiie.sh b/docker-files/hadoop/purge-hadoop-ensiie.sh new file mode 100755 index 0000000000000000000000000000000000000000..cfdefdf862f1460a7a20f05b6caf5cd07fd89bd2 --- /dev/null +++ b/docker-files/hadoop/purge-hadoop-ensiie.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +SCRIPT_PATH=$(dirname $(realpath $0)) +DOCKER_COMPOSE_FILE="docker-compose-v3.yml" +DOCKER_COMPOSE_PATH=$SCRIPT_PATH/$DOCKER_COMPOSE_FILE + +docker compose -f $DOCKER_COMPOSE_PATH down -v +docker volume list | grep hadoop | awk '{ print $2 }' | xargs docker volume rm --force diff --git a/docker-files/hadoop/start-hadoop-ensiie.sh b/docker-files/hadoop/start-hadoop-ensiie.sh new file mode 100755 index 0000000000000000000000000000000000000000..24d95d3c8bb5e107756e106d4b6a0bf7c60ed8f5 --- /dev/null +++ b/docker-files/hadoop/start-hadoop-ensiie.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# modification 2024 : test de la ram dispo et utilisation de tel ou tel fichier hadoop.env +# 2024-05-20 Karnas : ajout des variables de chemin + +SCRIPT_PATH=$(dirname $(realpath $0)) +DOCKER_COMPOSE_FILE="docker-compose-v3.yml" +DOCKER_COMPOSE_PATH=$SCRIPT_PATH/$DOCKER_COMPOSE_FILE + +if [ $(free --giga | grep "^Mem" | awk '{ print $2 }') -lt 16 ] +then + cp hadoop-16goRAM.env hadoop.env +elif [ $(free --giga | grep "^Mem" | awk '{ print $2 }') -lt 8 ] +then + cp hadoop-8goRAM.env hadoop.env +fi + +docker network create hbase 2>/dev/null +docker compose -f $DOCKER_COMPOSE_PATH up -d namenode hive-metastore-postgresql +docker compose -f $DOCKER_COMPOSE_PATH up -d datanode1 datanode2 +docker compose -f $DOCKER_COMPOSE_PATH up -d resourcemanager nodemanager1 nodemanager2 historyserver +docker compose -f $DOCKER_COMPOSE_PATH up -d hive-server hive-metastore +docker compose -f $DOCKER_COMPOSE_PATH up -d spark-master spark-worker-1 spark-worker-2 + +my_ip=`ip route get 1 | awk '{ for (i=1;i<=NF;i++) { if ( $i == "src" ) { print $(i+1) ; exit } } }'` +echo "Namenode: (HDFS Filebrowser) http://${my_ip}:9870" +echo "Spark-master: http://${my_ip}:28083" +echo "History Server: http://${my_ip}:28188" diff --git a/docker-files/hadoop/stop-hadoop-ensiie.sh b/docker-files/hadoop/stop-hadoop-ensiie.sh new file mode 100755 index 0000000000000000000000000000000000000000..a42b0e66c3dd05a81ec0c0975312de8da20718c8 --- /dev/null +++ b/docker-files/hadoop/stop-hadoop-ensiie.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +SCRIPT_PATH=$(dirname $(realpath $0)) +DOCKER_COMPOSE_FILE="docker-compose-v3.yml" +DOCKER_COMPOSE_PATH=$SCRIPT_PATH/$DOCKER_COMPOSE_FILE + +docker compose -f $DOCKER_COMPOSE_PATH stop diff --git a/download_dataset.sh b/download_dataset.sh index 2c1c5d1f149092831986227fc41927e9fa8042cd..ac74abdd9d945f5ec15fcad684534c68ed52c180 100755 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -1,3 +1,3 @@ -wget "https://storage.googleapis.com/kaggle-data-sets/3384322/6207733/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240519%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240519T145233Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=1ac8f9216a239f62f3aa19666ce2b09c188d1d34d5199cf254a3677292e1b893eb10d0e2280baf0cbfb1f21d38a2b99f55e3e080beaa4a376d07326750503e15f35e123e2efd21c2c300a82c5bc06c787528bbe5e0d6b7be5a31bc0e6fb458b9a59456233fb852c658827d1dd547ca683890de508dd88940526568357bdd28611409ed5db0e479abf7b6f98855cd942d0cebfae55d463f288640c594bce7e11cd9f460e941cec80a7713e7faa54e69e3e9c4e9e3cd87b11bc35aa74439f96f80c2d592c6a97519353ca099d62e7276bec190a99e9327aee45ab9531d86f8f6be65fb3931148dbd4342712849494a71adcfe0b4eb54051582393fe8a98ebf68bc" -c -O 'dataset.zip' -mkdir dataset -unzip dataset.zip -d dataset +mkdir -p files/dataset +wget "https://storage.googleapis.com/kaggle-data-sets/3384322/6207733/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240519%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240519T145233Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=1ac8f9216a239f62f3aa19666ce2b09c188d1d34d5199cf254a3677292e1b893eb10d0e2280baf0cbfb1f21d38a2b99f55e3e080beaa4a376d07326750503e15f35e123e2efd21c2c300a82c5bc06c787528bbe5e0d6b7be5a31bc0e6fb458b9a59456233fb852c658827d1dd547ca683890de508dd88940526568357bdd28611409ed5db0e479abf7b6f98855cd942d0cebfae55d463f288640c594bce7e11cd9f460e941cec80a7713e7faa54e69e3e9c4e9e3cd87b11bc35aa74439f96f80c2d592c6a97519353ca099d62e7276bec190a99e9327aee45ab9531d86f8f6be65fb3931148dbd4342712849494a71adcfe0b4eb54051582393fe8a98ebf68bc" -c -O 'files/dataset.zip' +unzip dataset.zip -d files/dataset diff --git a/files/dataset/.gitkeep b/files/dataset/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/hadoop/docker-compose.yml b/hadoop/docker-compose.yml deleted file mode 100644 index 3620de8a121005ae4612763313c2092e73977ce6..0000000000000000000000000000000000000000 --- a/hadoop/docker-compose.yml +++ /dev/null @@ -1,196 +0,0 @@ -services: - namenode: - image: bde2020/hadoop-namenode:2.0.0-hadoop3.2.1-java8 - networks: - - hadoop - container_name: namenode - volumes: - - hadoop_namenode:/hadoop/dfs/name - - ~/ensiie/exo/files:/data/hdfs/formation - - ~/ensiie/exo/opensearch/bano-data:/data/hdfs/bano-data - environment: - - CLUSTER_NAME=test - env_file: - - ./hadoop.env - deploy: - mode: replicated - replicas: 1 - placement: - constraints: - - node.hostname == akswnc4.aksw.uni-leipzig.de - ports: - - 9870:9870 - - 9000:9000 - - datanode1: - image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 - container_name: datanode1 - networks: - - hadoop - volumes: - - hadoop_datanode_1:/hadoop/dfs/data - env_file: - - ./hadoop.env - environment: - SERVICE_PRECONDITION: "namenode:9870" - deploy: - mode: global - datanode2: - image: bde2020/hadoop-datanode:2.0.0-hadoop3.2.1-java8 - container_name: datanode2 - networks: - - hadoop - volumes: - - hadoop_datanode_2:/hadoop/dfs/data - env_file: - - ./hadoop.env - environment: - SERVICE_PRECONDITION: "namenode:9870" - deploy: - mode: global - - - resourcemanager: - image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.2.1-java8 - container_name: resourcemanager - networks: - - hadoop - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 datanode2:9864" - env_file: - - ./hadoop.env - volumes: - - ~/ensiie/exo/files:/data/yarn/formation - deploy: - mode: replicated - replicas: 1 - placement: - constraints: - - node.hostname == akswnc4.aksw.uni-leipzig.de - healthcheck: - disable: true - nodemanager1: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8 - container_name: nodemanager1 - networks: - - hadoop - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 resourcemanager:8088" - env_file: - - ./hadoop.env - deploy: - mode: global - nodemanager2: - image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.2.1-java8 - container_name: nodemanager2 - networks: - - hadoop - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode2:9864 resourcemanager:8088" - env_file: - - ./hadoop.env - deploy: - mode: global - historyserver: - image: bde2020/hadoop-historyserver:2.0.0-hadoop3.2.1-java8 - container_name: historyserver - networks: - - hadoop - volumes: - - hadoop_historyserver:/hadoop/yarn/timeline - environment: - SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 datanode2:9864 resourcemanager:8088" - env_file: - - ./hadoop.env - deploy: - mode: replicated - replicas: 1 - placement: - constraints: - - node.hostname == akswnc4.aksw.uni-leipzig.de - ports: - - 28188:8188 - - hive-server: - image: bde2020/hive:2.3.2-postgresql-metastore - container_name: hive-server - env_file: - - ./hadoop-hive.env - volumes: - - ~/ensiie/exo/files:/data/hive/formation - environment: - HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore" - SERVICE_PRECONDITION: "hive-metastore:9083" - ports: - - "10000:10000" - networks: - - hadoop - - hive-metastore: - image: bde2020/hive:2.3.2-postgresql-metastore - container_name: hive-metastore - env_file: - - ./hadoop-hive.env - command: /opt/hive/bin/hive --service metastore - environment: - SERVICE_PRECONDITION: "namenode:9000 namenode:9870 datanode1:9864 datanode2:9864 hive-metastore-postgresql:5432" - ports: - - "9083:9083" - networks: - - hadoop - - hive-metastore-postgresql: - image: bde2020/hive-metastore-postgresql:2.3.0 - container_name: hive-metastore-postgresql - env_file: - - ./hadoop-hive.env - networks: - - hadoop - ports: - - "5432:5432" - - spark-master: - image: bde2020/spark-master:3.1.1-hadoop3.2 - container_name: spark-master - volumes: - - ~/ensiie/exo/files:/data/spark/files - - ~/ensiie/exo/python:/data/spark/python - ports: - - "28083:8080" - - "7077:7077" - environment: - - INIT_DAEMON_STEP=setup_spark - networks: - - hadoop - spark-worker-1: - image: bde2020/spark-worker:3.1.1-hadoop3.2 - container_name: spark-worker-1 - depends_on: - - spark-master - ports: - - "28081:8081" - environment: - - "SPARK_MASTER=spark://spark-master:7077" - networks: - - hadoop - spark-worker-2: - image: bde2020/spark-worker:3.1.1-hadoop3.2 - container_name: spark-worker-2 - depends_on: - - spark-master - ports: - - "28082:8081" - environment: - - "SPARK_MASTER=spark://spark-master:7077" - networks: - - hadoop - -volumes: - hadoop_datanode_1: - hadoop_datanode_2: - hadoop_namenode: - hadoop_historyserver: - -networks: - hadoop: - diff --git a/hadoop/purge-hadoop-ensiie.sh b/hadoop/purge-hadoop-ensiie.sh deleted file mode 100755 index b2c9986d5777c1e9ec285b0f2444049233e9ac36..0000000000000000000000000000000000000000 --- a/hadoop/purge-hadoop-ensiie.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash -# -# - -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml down -v -docker volume list | grep hadoop | awk '{ print $2 }' | xargs docker volume rm --force diff --git a/hadoop/start-hadoop-ensiie.sh b/hadoop/start-hadoop-ensiie.sh deleted file mode 100755 index 6a9da5bedd037bbc85f784acf6f68936a5596355..0000000000000000000000000000000000000000 --- a/hadoop/start-hadoop-ensiie.sh +++ /dev/null @@ -1,21 +0,0 @@ -#!/bin/bash -# -# -# modification 2024 : test de la ram dispo et utilisation de tel ou tel fichier hadoop.env -# - -if [ $(free --giga | grep "^Mem" | awk '{ print $2 }') -lt 8 ] -then cp hadoop-8goRAM.env hadoop.env -fi - -docker network create hbase 2>/dev/null -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d namenode hive-metastore-postgresql -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d datanode1 datanode2 -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d resourcemanager nodemanager1 nodemanager2 historyserver -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d hive-server hive-metastore -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml up -d spark-master spark-worker-1 spark-worker-2 - -my_ip=`ip route get 1 | awk '{ for (i=1;i<=NF;i++) { if ( $i == "src" ) { print $(i+1) ; exit } } }'` -echo "Namenode: (HDFS Filebrowser) http://${my_ip}:9870" -echo "Spark-master: http://${my_ip}:28083" -echo "History Server: http://${my_ip}:28188" diff --git a/hadoop/stop-hadoop-ensiie.sh b/hadoop/stop-hadoop-ensiie.sh deleted file mode 100755 index 1dab2a9c2dec2aa5a409489999990ee05d3ff387..0000000000000000000000000000000000000000 --- a/hadoop/stop-hadoop-ensiie.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash -# -# -docker compose -f ~/ensiie/exo/hadoop/docker-compose-ensiie-v3.yml stop