From 3b927540ece970be09073bcf55f4aa3cf6e24922 Mon Sep 17 00:00:00 2001 From: Thomas MESLIN <th.meslin@gmail.com> Date: Tue, 1 Apr 2025 23:06:09 +0200 Subject: [PATCH] =?UTF-8?q?module=20Csv=20comment=C3=A9=20et=20document?= =?UTF-8?q?=C3=A9=20!?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- script/src/csv.ml | 223 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 223 insertions(+) diff --git a/script/src/csv.ml b/script/src/csv.ml index 8d60388..71aa54a 100644 --- a/script/src/csv.ml +++ b/script/src/csv.ml @@ -1,4 +1,22 @@ +(*Retrieve all data from a file, line by line +get_data : string -> sting list +Ex : +nom_fichier : +anfrzvirz qvirù qvr +rqv erp,be rq,b r +qb oepv k,q, vkerq +reo qpv,reoq,beripkn q +bro, pq, erp,bqer p + +get_data nom_fichier = [ +anfrzvirz qvirù qvr; +rqv erp,be rq,b r; +qb oepv k,q, vkerq; +reo qpv,reoq,beripkn q; +bro, pq, erp,bqer p; +] +*) let get_data name = begin let ic = open_in name in @@ -19,6 +37,10 @@ let get_data name = begin type csv = {column_name : string list;data : string array list} +(* +Open a file as a csv file +open_as_csv : string -> csv +*) let open_as_csv name = let data = get_data name in let column_name = List.hd data |> String.split_on_char ',' in @@ -26,11 +48,39 @@ let open_as_csv name = {column_name = column_name; data = data} +(* +renvoie l'indice de l'element elt dans la liste l +trouve_indice : string list -> string -> int + +Ex : +trouve_indice ["miaou";"olala";"b"] "olala" = 1 +trouve_indice ["miaou";"olala";"b"] "" = Exception : failwith "Indice non trouvé" +*) let rec trouve_indice l elt = match l with |[] -> failwith "Indice non trouvé !" |x :: tl -> if String.equal x elt then 0 else 1 + (trouve_indice tl elt) +(* +Fusionne deux colonne col_name1 et col_name2 en new_col_name a l'aide de la fonction f (modifie aussi le tableau de départ par effet de bord): + +merge_column : csv -> string -> string -> string -> (string -> string -> string) -> csv +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +merge_column moncsv "col1" "col2" "new_col" (fun x y -> x ^ y) = +new_col, "", col3, col4 +a0b0 , , c0 , d0 +a1b1 , , c1 , d1 +a2b2 , , c2 , d2 +a3b3 , , c3 , d3 + +*) let merge_column rand_csv col_name1 col_name2 new_col_name f = let idx1 = trouve_indice rand_csv.column_name col_name1 in let idx2 = trouve_indice rand_csv.column_name col_name2 in @@ -51,11 +101,50 @@ let merge_column rand_csv col_name1 col_name2 new_col_name f = let new_col_name = replace rand_csv.column_name in {column_name = new_col_name;data = new_data} +(* +Supprime le contenu d'une colonne (modifie aussi le tableau de départ par effet de bord): + +del_column : csv -> string -> csv +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +del_column moncsv "col1" = +"", col2, col3, col4 + , b0 , c0 , d0 + , b1 , c1 , d1 + , b2 , c2 , d2 + , b3 , c3 , d3 +*) let del_column csv nom_col = merge_column csv nom_col nom_col "" (fun a b -> "") +(* +Applique la fonction f a une colonne (modifie aussi le tableau de départ par effet de bord) : + +map : csv -> string -> (string -> string) -> csv + +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +map moncsv "col1" (fun x -> x^x) = +col1, col2, col3, col4 +a0a0 , b0 , c0 , d0 +a1a1 , b1 , c1 , d1 +a2a2 , b2 , c2 , d2 +a3a3 , b3 , c3 , d3 +*) let map rand_csv col_name f = begin let idx = trouve_indice rand_csv.column_name col_name in let new_data = List.map @@ -65,6 +154,26 @@ let map rand_csv col_name f = begin {column_name = rand_csv.column_name;data = new_data} end +(* +Applique la fonction f a chaque ligne (représenté par une table de hashage), la ligne est ensuite reconstruite : + +map_line : csv -> ((string, string) Hashtbl.t -> 'a) -> csv + +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +map_line moncsv (fun htab -> Hashtbl.replace htab "col1" (Hashtbl.find htab "col2" ^ (Hashtbl.find htab "col3"))) = +col1, col2, col3, col4 +b0c0 , b0 , c0 , d0 +b1c1 , b1 , c1 , d1 +b2c2 , b2 , c2 , d2 +b3c3 , b3 , c3 , d3 +*) let map_line rand_csv f = let htab = Hashtbl.create (List.length rand_csv.column_name) in let n = List.length rand_csv.column_name in @@ -84,13 +193,37 @@ let map_line rand_csv f = {column_name = rand_csv.column_name;data = new_data} +(* +Applique la fonction f a une colonne mais ne modifie rien: + +iter : csv -> string -> (string -> string) -> csv +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +iter moncsv "col1" print_endline = () +(*Dans la console*) +a0 +a1 +a2 +a3 +*) let iter rand_csv col_name f = let idx = trouve_indice rand_csv.column_name col_name in List.iter (fun a -> f a.(idx)) rand_csv.data + +(* +Sauvegarde le csv au chemin file_name au format csv. (,) comme séparateur +save : csv -> string -> () +*) let save rand_csv file_name = begin let oc = open_out file_name in let first_line = rand_csv.column_name in @@ -108,15 +241,63 @@ let save rand_csv file_name = begin close_out oc end +(* +Créer un csv qui a comme nom de colonne les elements de l +create : string list -> csv +Ex : +create ["col0";"col1";"col2"] = +col1, col2, col3, col4 +(*contenu vide*) +*) let create l = {column_name = l;data = []} +(* +Créer un csv qui ajoute une ligne au csv si les dimensions correspondent +add_line : csv -> string array -> csv +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +add_line moncsv [|"a4";"b4";"c4";"d4"|] = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 +a4 , b4 , c4 , d4 + +add_line moncsv [|"a4";"d4"|] = Exception : failwith "Missmatching dimension" +*) let add_line csv arr = if List.length csv.column_name = Array.length arr then {column_name = csv.column_name; data = arr :: csv.data} else failwith "Missmatching dimension" +(* +Ajoute une ligne au csv a partir d'une liste sous le format (nom_col, val_col) +create_line : csv -> (string*string) list -> csv +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +create_line moncsv [("col4","d4");("col1","a4")] = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 +a4 , , , d4 +*) let create_line csv l = let arr = Array.make (List.length csv.column_name) "" in let rec set_arg l = match l with @@ -126,6 +307,48 @@ let create_line csv l = set_arg l; add_line csv arr + +(* +Convertit le csv en un graphe non_oriente, en utilisant la colonne col_name_label comme nom des noeud du graphe +Le poids d'une aretes est la valeur entre nom de la colonne et label de la colonne. +Renvoie 2 csv -> les aretes et les noeuds du graphes +to_graph : csv -> string -> csv * csv +Ex : +let moncsv = +col1, col2, col3, col4 +a0 , b0 , c0 , d0 +a1 , b1 , c1 , d1 +a2 , b2 , c2 , d2 +a3 , b3 , c3 , d3 + +to_graph moncsv "col2" = +( +Source, Target, Type , Id, Weight +6 , 2 , Undirected, 11, d3 +6 , 1 , Undirected, 10, c3 +6 , 0 , Undirected, 9 , a3 +5 , 2 , Undirected, 8 , d2 +5 , 1 , Undirected, 7 , c2 +5 , 0 , Undirected, 6 , a2 +4 , 2 , Undirected, 5 , d1 +4 , 1 , Undirected, 4 , c1 +4 , 0 , Undirected, 3 , a1 +3 , 2 , Undirected, 2 , d0 +3 , 1 , Undirected, 1 , c0 +3 , 0 , Undirected, 0 , a0 +) +, +( +Id, Label +6 , b3 +5 , b2 +4 , b1 +3 , b0 +2 , col4 +1 , col3 +0 , col1 +) +*) let to_graph csv col_name_label = let nodes = create ["Id";"Label"] in let edges = create ["Source";"Target";"Type";"Id";"Weight"] in -- GitLab