From 4131f7723876ea76939bf1664a7909296bcea7dd Mon Sep 17 00:00:00 2001
From: "tessa.depaoli" <tessa@Tessaordi>
Date: Tue, 21 May 2024 01:45:32 +0200
Subject: [PATCH] gp

---
 DB_commands.txt | 91 +++++++++++++++++++++++++++++++++++++++++++++++++
 sake.txt        | 28 ++++++++++++++-
 2 files changed, 118 insertions(+), 1 deletion(-)
 create mode 100644 DB_commands.txt

diff --git a/DB_commands.txt b/DB_commands.txt
new file mode 100644
index 0000000..edb505a
--- /dev/null
+++ b/DB_commands.txt
@@ -0,0 +1,91 @@
+CREATE DATABASE IF NOT EXISTS myanimelist_db;
+
+USE myanimelist_db;
+
+
+
+CREATE EXTERNAL TABLE IF NOT EXISTS anime (
+    anime_id INT,
+    name STRING,
+    english_name STRING,
+    other_name STRING,
+    score FLOAT,
+    genres STRING,
+    synopsis STRING,
+    type STRING,
+    episodes FLOAT,
+    aired STRING,
+    premiered STRING,
+    status STRING,
+    producers STRING,
+    licensors STRING,
+    studios STRING,
+    source STRING,
+    duration STRING,
+    rating STRING,
+    rank FLOAT,
+    popularity INT,
+    favorites INT,
+    scored_by FLOAT,
+    members INT,
+    image_url STRING
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY ','
+LINES TERMINATED BY '\n'
+STORED AS TEXTFILE;
+
+CREATE EXTERNAL TABLE IF NOT EXISTS users (
+    mal_id INT,
+    username STRING,
+    gender STRING,
+    birthday TIMESTAMP,
+    location STRING,
+    joined TIMESTAMP,
+    days_watched FLOAT,
+    mean_score FLOAT,
+    watching FLOAT,
+    completed FLOAT,
+    on_hold FLOAT,
+    dropped FLOAT,
+    plan_to_watch FLOAT,
+    total_entries FLOAT,
+    rewatched FLOAT,
+    episodes_watched FLOAT
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY ','
+LINES TERMINATED BY '\n'
+STORED AS TEXTFILE;
+
+
+CREATE EXTERNAL TABLE IF NOT EXISTS score (
+    user_id INT,
+    anime_id INT,
+    rating INT
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY ','
+LINES TERMINATED BY '\n'
+STORED AS TEXTFILE;
+////////////////////////////////
+
+CREATE TABLE IF NOT EXISTS users_score_2023 (
+    user_id INT,
+    username STRING,
+    anime_id INT,
+    anime_title STRING,
+    rating INT
+)
+ROW FORMAT DELIMITED
+FIELDS TERMINATED BY ','
+LINES TERMINATED BY '\n'
+STORED AS TEXTFILE;
+
+//////////////////////////////
+
+SELECT x.table
+FROM users u
+JOIN score s ON u.user_id = s.user_id
+JOIN anime a ON u.anime_id = a.anime_id;
+
diff --git a/sake.txt b/sake.txt
index 19a6e35..508daaa 100644
--- a/sake.txt
+++ b/sake.txt
@@ -155,9 +155,35 @@ vérification ici :
 http://162.19.124.170:9870/
 
 
-Une fois que les conteneurs sont up et que la config hdfs est faite (en gros qu'on a accès aux fichiers), on passe sur hive
+Une fois que les conteneurs sont up et que la config hdfs est faite (en gros qu'on a accès aux fichiers), on passe à la partie sur hive
+
+
 	C- Construction (aka le vif du sujet)
 
+On passe sur hive :
+
+docker exec -it hive-server bash 
+
+On passe sur beeline
+/opt/hive/bin/beeline -u jdbc:hive2://hive-server:10000
+
+SI PB DE CO : 
+On vérifie  si HiveServer2 est en cours d'exécution :
+ps aux | grep HiveServer2
+/opt/hive/bin/hiveserver2 &
+
+ça ne fonctionne pas 
+On regarde notre numéro de port :
+nc -zv hive-server 10000
+
+au final il fallait prendre celui spécifié dans notre nom, à savoir : 3e00f5903a89
+
+Nouvelle commande : 
+/opt/hive/bin/beeline -u jdbc:hive2://hive-server:3e00f5903a89
+
+Il faut donc maintenant construire la BD -> cf DB_commands
+
+
 
 
 V- Analyse représentatives
-- 
GitLab