From 4131f7723876ea76939bf1664a7909296bcea7dd Mon Sep 17 00:00:00 2001 From: "tessa.depaoli" <tessa@Tessaordi> Date: Tue, 21 May 2024 01:45:32 +0200 Subject: [PATCH] gp --- DB_commands.txt | 91 +++++++++++++++++++++++++++++++++++++++++++++++++ sake.txt | 28 ++++++++++++++- 2 files changed, 118 insertions(+), 1 deletion(-) create mode 100644 DB_commands.txt diff --git a/DB_commands.txt b/DB_commands.txt new file mode 100644 index 0000000..edb505a --- /dev/null +++ b/DB_commands.txt @@ -0,0 +1,91 @@ +CREATE DATABASE IF NOT EXISTS myanimelist_db; + +USE myanimelist_db; + + + +CREATE EXTERNAL TABLE IF NOT EXISTS anime ( + anime_id INT, + name STRING, + english_name STRING, + other_name STRING, + score FLOAT, + genres STRING, + synopsis STRING, + type STRING, + episodes FLOAT, + aired STRING, + premiered STRING, + status STRING, + producers STRING, + licensors STRING, + studios STRING, + source STRING, + duration STRING, + rating STRING, + rank FLOAT, + popularity INT, + favorites INT, + scored_by FLOAT, + members INT, + image_url STRING +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; + +CREATE EXTERNAL TABLE IF NOT EXISTS users ( + mal_id INT, + username STRING, + gender STRING, + birthday TIMESTAMP, + location STRING, + joined TIMESTAMP, + days_watched FLOAT, + mean_score FLOAT, + watching FLOAT, + completed FLOAT, + on_hold FLOAT, + dropped FLOAT, + plan_to_watch FLOAT, + total_entries FLOAT, + rewatched FLOAT, + episodes_watched FLOAT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; + + +CREATE EXTERNAL TABLE IF NOT EXISTS score ( + user_id INT, + anime_id INT, + rating INT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; +//////////////////////////////// + +CREATE TABLE IF NOT EXISTS users_score_2023 ( + user_id INT, + username STRING, + anime_id INT, + anime_title STRING, + rating INT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; + +////////////////////////////// + +SELECT x.table +FROM users u +JOIN score s ON u.user_id = s.user_id +JOIN anime a ON u.anime_id = a.anime_id; + diff --git a/sake.txt b/sake.txt index 19a6e35..508daaa 100644 --- a/sake.txt +++ b/sake.txt @@ -155,9 +155,35 @@ vérification ici : http://162.19.124.170:9870/ -Une fois que les conteneurs sont up et que la config hdfs est faite (en gros qu'on a accès aux fichiers), on passe sur hive +Une fois que les conteneurs sont up et que la config hdfs est faite (en gros qu'on a accès aux fichiers), on passe à la partie sur hive + + C- Construction (aka le vif du sujet) +On passe sur hive : + +docker exec -it hive-server bash + +On passe sur beeline +/opt/hive/bin/beeline -u jdbc:hive2://hive-server:10000 + +SI PB DE CO : +On vérifie si HiveServer2 est en cours d'exécution : +ps aux | grep HiveServer2 +/opt/hive/bin/hiveserver2 & + +ça ne fonctionne pas +On regarde notre numéro de port : +nc -zv hive-server 10000 + +au final il fallait prendre celui spécifié dans notre nom, à savoir : 3e00f5903a89 + +Nouvelle commande : +/opt/hive/bin/beeline -u jdbc:hive2://hive-server:3e00f5903a89 + +Il faut donc maintenant construire la BD -> cf DB_commands + + V- Analyse représentatives -- GitLab