diff --git a/DB_commands.txt b/DB_commands.txt new file mode 100644 index 0000000000000000000000000000000000000000..edb505a36c866651b1d62468c2d37b7296a222e2 --- /dev/null +++ b/DB_commands.txt @@ -0,0 +1,91 @@ +CREATE DATABASE IF NOT EXISTS myanimelist_db; + +USE myanimelist_db; + + + +CREATE EXTERNAL TABLE IF NOT EXISTS anime ( + anime_id INT, + name STRING, + english_name STRING, + other_name STRING, + score FLOAT, + genres STRING, + synopsis STRING, + type STRING, + episodes FLOAT, + aired STRING, + premiered STRING, + status STRING, + producers STRING, + licensors STRING, + studios STRING, + source STRING, + duration STRING, + rating STRING, + rank FLOAT, + popularity INT, + favorites INT, + scored_by FLOAT, + members INT, + image_url STRING +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; + +CREATE EXTERNAL TABLE IF NOT EXISTS users ( + mal_id INT, + username STRING, + gender STRING, + birthday TIMESTAMP, + location STRING, + joined TIMESTAMP, + days_watched FLOAT, + mean_score FLOAT, + watching FLOAT, + completed FLOAT, + on_hold FLOAT, + dropped FLOAT, + plan_to_watch FLOAT, + total_entries FLOAT, + rewatched FLOAT, + episodes_watched FLOAT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; + + +CREATE EXTERNAL TABLE IF NOT EXISTS score ( + user_id INT, + anime_id INT, + rating INT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; +//////////////////////////////// + +CREATE TABLE IF NOT EXISTS users_score_2023 ( + user_id INT, + username STRING, + anime_id INT, + anime_title STRING, + rating INT +) +ROW FORMAT DELIMITED +FIELDS TERMINATED BY ',' +LINES TERMINATED BY '\n' +STORED AS TEXTFILE; + +////////////////////////////// + +SELECT x.table +FROM users u +JOIN score s ON u.user_id = s.user_id +JOIN anime a ON u.anime_id = a.anime_id; + diff --git a/sake.txt b/sake.txt index 19a6e356ea703f4e54cdbd02395aacabcb2f2373..508daaafbe39d876d6864ccb6435eab250a76771 100644 --- a/sake.txt +++ b/sake.txt @@ -155,9 +155,35 @@ vérification ici : http://162.19.124.170:9870/ -Une fois que les conteneurs sont up et que la config hdfs est faite (en gros qu'on a accès aux fichiers), on passe sur hive +Une fois que les conteneurs sont up et que la config hdfs est faite (en gros qu'on a accès aux fichiers), on passe à la partie sur hive + + C- Construction (aka le vif du sujet) +On passe sur hive : + +docker exec -it hive-server bash + +On passe sur beeline +/opt/hive/bin/beeline -u jdbc:hive2://hive-server:10000 + +SI PB DE CO : +On vérifie si HiveServer2 est en cours d'exécution : +ps aux | grep HiveServer2 +/opt/hive/bin/hiveserver2 & + +ça ne fonctionne pas +On regarde notre numéro de port : +nc -zv hive-server 10000 + +au final il fallait prendre celui spécifié dans notre nom, à savoir : 3e00f5903a89 + +Nouvelle commande : +/opt/hive/bin/beeline -u jdbc:hive2://hive-server:3e00f5903a89 + +Il faut donc maintenant construire la BD -> cf DB_commands + + V- Analyse représentatives