diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..c76ddb39a2a58768e8b27d16f7e4ca0b08cce0fe
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+dataset.zip
+dataset/*
+
diff --git a/download_dataset.sh b/download_dataset.sh
new file mode 100755
index 0000000000000000000000000000000000000000..2c1c5d1f149092831986227fc41927e9fa8042cd
--- /dev/null
+++ b/download_dataset.sh
@@ -0,0 +1,3 @@
+wget "https://storage.googleapis.com/kaggle-data-sets/3384322/6207733/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20240519%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20240519T145233Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=1ac8f9216a239f62f3aa19666ce2b09c188d1d34d5199cf254a3677292e1b893eb10d0e2280baf0cbfb1f21d38a2b99f55e3e080beaa4a376d07326750503e15f35e123e2efd21c2c300a82c5bc06c787528bbe5e0d6b7be5a31bc0e6fb458b9a59456233fb852c658827d1dd547ca683890de508dd88940526568357bdd28611409ed5db0e479abf7b6f98855cd942d0cebfae55d463f288640c594bce7e11cd9f460e941cec80a7713e7faa54e69e3e9c4e9e3cd87b11bc35aa74439f96f80c2d592c6a97519353ca099d62e7276bec190a99e9327aee45ab9531d86f8f6be65fb3931148dbd4342712849494a71adcfe0b4eb54051582393fe8a98ebf68bc" -c -O 'dataset.zip'
+mkdir dataset
+unzip dataset.zip -d dataset
diff --git a/rapport.txt b/rapport.txt
deleted file mode 100644
index b13e01b9400a0961a64fd593130f37407deeb98f..0000000000000000000000000000000000000000
--- a/rapport.txt
+++ /dev/null
@@ -1,122 +0,0 @@
-rapport : 
-
-
-Partie 1 : Hadoop
-
-cd ensiie/exo/opensearch
-vi recup-bano-opensearch
- 
--> en dessous du commentaire, i in {91..93}
-on vérifie le contenu de bano-data : i y a bien les 3 départements demandés
-
-
-nouveau terminal 
-cd ~/ensiie/exo/hadoop/
-./start-hadoop-ensiie.sh
-•Attendre environ 20 secondes et le relancer une seconde fois : 
-./start-hadoop-ensiie.sh
-
-
-docker run -it -v /home/tessa/ensiie/exo/opensearch/bano-data:/ensiie/tpnote namenode bash  -> ça marche pas pck déjà un volume
-
-
-cp -r ~/ensiie/exo/opensearch/bano-data ~/ensiie/exo/files pck montage déjà fait (sinon docker cp)
-
-docker exec -it namenode bash
-
-cd /data/hdfs/formation/bano-data
-cf copie écran
-
-hdfs dfs -put /data/hdfs/formation/bano-data /ensiie/tpnote/
-cf screen -> pb bizarre avec le /, ça trouvait pas mes fichiers sur hive
-
-
-
-partie 2 : Hive
-
-docker exec -it hive-server bash
-(une fois dans le conteneur)
-/opt/hive/bin/beeline -u jdbc:hive2://hive-server:10000
-
-1)
-hive>
-CREATE DATABASE IF NOT EXISTS ensiie;
-USE ensiie;
-
-2)
-CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.] table_name
-[(col_name data_type [COMMENT col_comment], ...)]
-[COMMENT table_comment]
-[ROW FORMAT row_format]
-[STORED AS file_format]
-
-cf site bano
-
-CREATE EXTERNAL TABLE IF NOT EXISTS bano
-( id STRING,
-  numero STRING, 
-  voie STRING,
-code_postal INT,
-nom_commune STRING,
-source STRING,
-latitude DOUBLE,
-longitude DOUBLE)
-ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
-WITH SERDEPROPERTIES (
-   "separatorChar" = ",",
-   "quoteChar"     = "\""
-)
-STORED AS textfile
-LOCATION '/ensiie/tpnote/bano-data';
-  
-
-SELECT * FROM bano LIMIT 10;
-
-cf screen
-
-
-3)
-CREATE TABLE IF NOT EXISTS banorc
-( id STRING,
-  numero STRING, 
-  voie STRING,
-code_postal INT,
-nom_commune STRING,
-source STRING,
-latitude DOUBLE,
-longitude DOUBLE)
-STORED AS ORC;
-INSERT INTO TABLE banorc
-SELECT * FROM bano;
-
-4) 
-select count (*) from bano ; 
-select count (*) from banorc ;
-
-756087 
-1,364s VS 0,11s
-
-5)
-SELECT SUBSTR(id, 1, 2) AS departement, COUNT(DISTINCT nom_commune) AS nombre_de_communes
-FROM bano
-GROUP BY SUBSTR(id, 1, 2)
-ORDER BY departement;
-
-select count(distinct nom_commune) from bano;
-
-
-Spark : 
-cd /spark/bin
-./spark-shell --driver-memory 2G 
-
-val lines = sc.textFile("hdfs://namenode:9000/ensiie/tpnote/bano-data/bano-91.csv")
-val words = lines.flatMap(_.split("\\s+"))
-val wc = words.map(w => (w, 1)).reduceByKey(_ + _)
-val sorted_words = wc.sortBy(-_._2).take(30)
-
-
-3) OpenSearch Dashboard
-
-cd ensiie/exo/hadoop/
-./stop-hadoop-ensiie.sh
-cd ../opensearch