Sélectionner une révision Git
csv.ml 13,30 Kio
(*Retrieve all data from a file, line by line
get_data : string -> sting list
Ex :
nom_fichier :
anfrzvirz, qvirù, qvr
rqv erp,be rq,b r
qb oepv k,q, vkerq
reo qpv,reoq,beripkn q
bro, pq, erp,bqer p
get_data nom_fichier = [
anfrzvirz, qvirù, qvr;
rqv erp,be rq,b r;
qb oepv k,q, vkerq;
reo qpv,reoq,beripkn q;
bro, pq, erp,bqer p;
]
*)
let get_data name = begin
let ic = open_in name in
let rec read_file acc =
let line_m = try Some (input_line ic) with
|End_of_file -> None
in
match line_m with
|Some l -> read_file (l::acc)
|None -> List.rev acc
in
let res = read_file [] in
close_in ic;
res
end
type csv = {column_name : string list;data : string array list}
(*
Open a file as a csv file
open_as_csv : string -> csv
*)
let open_as_csv name =
let data = get_data name in
let column_name = List.hd data |> String.split_on_char ',' in
let data = List.tl data |> List.map (fun x -> String.split_on_char ',' x |> Array.of_list) in
{column_name = column_name; data = data}
(*
renvoie l'indice de l'element elt dans la liste l
trouve_indice : string list -> string -> int
Ex :
trouve_indice ["miaou";"olala";"b"] "olala" = 1
trouve_indice ["miaou";"olala";"b"] "" = Exception : failwith "Indice non trouvé"
*)
let rec trouve_indice l elt = match l with
|[] -> failwith ("Indice non trouvé : " ^ elt)
|x :: tl -> if String.equal x elt then 0 else 1 + (trouve_indice tl elt)
(*
Fusionne deux colonne col_name1 et col_name2 en new_col_name a l'aide de la fonction f (modifie aussi le tableau de départ par effet de bord):
merge_column : csv -> string -> string -> string -> (string -> string -> string) -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
merge_column moncsv "col1" "col2" "new_col" (fun x y -> x ^ y) =
new_col, "", col3, col4
a0b0 , , c0 , d0
a1b1 , , c1 , d1
a2b2 , , c2 , d2
a3b3 , , c3 , d3
*)
let merge_column rand_csv col_name1 col_name2 new_col_name f =
let idx1 = trouve_indice rand_csv.column_name col_name1 in
let idx2 = trouve_indice rand_csv.column_name col_name2 in
let new_data = List.map
(fun a -> begin
let elt1,elt2 = a.(idx1),a.(idx2) in
let new_elt = f elt1 elt2 in
a.(idx1) <- new_elt;
a.(idx2) <- "";
a
end
) rand_csv.data
in
let rec replace l = match l with
|x :: tl -> if String.equal x col_name1 then new_col_name :: (replace tl) else if String.equal x col_name2 then "" :: (replace tl) else x :: (replace tl)
|[] -> []
in
let new_col_name = replace rand_csv.column_name in
{column_name = new_col_name;data = new_data}
(*
Supprime le contenu d'une colonne (modifie aussi le tableau de départ par effet de bord):
del_column : csv -> string -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
del_column moncsv "col1" =
"", col2, col3, col4
, b0 , c0 , d0
, b1 , c1 , d1
, b2 , c2 , d2
, b3 , c3 , d3
*)
let del_column csv nom_col =
let idx = trouve_indice csv.column_name nom_col in
let n = List.length csv.column_name in
let new_data = List.map
(fun arr -> Array.init (n-1) (fun i -> if i < idx then arr.(i) else arr.(i+1))) csv.data
in
let new_l = List.fold_left (fun acc elt -> if elt = nom_col then acc else elt :: acc) [] csv.column_name |> List.rev in
{column_name = new_l;data = new_data}
(*
Applique la fonction f a une colonne (modifie aussi le tableau de départ par effet de bord) :
map : csv -> string -> (string -> string) -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
map moncsv "col1" (fun x -> x^x) =
col1, col2, col3, col4
a0a0 , b0 , c0 , d0
a1a1 , b1 , c1 , d1
a2a2 , b2 , c2 , d2
a3a3 , b3 , c3 , d3
*)
let map rand_csv col_name f = begin
let idx = trouve_indice rand_csv.column_name col_name in
let new_data = List.map
(fun a -> (a.(idx) <- f a.(idx);a))
rand_csv.data
in
{column_name = rand_csv.column_name;data = new_data}
end
(*
Applique la fonction f a chaque ligne (représenté par une table de hashage), la ligne est ensuite reconstruite :
map_line : csv -> ((string, string) Hashtbl.t -> 'a) -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
map_line moncsv (fun htab -> Hashtbl.replace htab "col1" (Hashtbl.find htab "col2" ^ (Hashtbl.find htab "col3"))) =
col1, col2, col3, col4
b0c0 , b0 , c0 , d0
b1c1 , b1 , c1 , d1
b2c2 , b2 , c2 , d2
b3c3 , b3 , c3 , d3
*)
let map_line rand_csv f =
let htab = Hashtbl.create (List.length rand_csv.column_name) in
let n = List.length rand_csv.column_name in
let col_name_arr = Array.of_list rand_csv.column_name in
let rec save_line a l i =
match l with
|[] -> ()
|x :: tl -> (Hashtbl.replace htab x a.(i);save_line a tl (i+1))
in
let map_l a = begin
save_line a rand_csv.column_name 0;
(f htab);
Array.init n (fun i -> Hashtbl.find htab (col_name_arr.(i)));
end
in
let new_data = List.map map_l rand_csv.data in
{column_name = rand_csv.column_name;data = new_data}
(*
filtre les lignes qui satisfont f
filter_line : csv -> ((string, string) Hashtbl.t -> bool) -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
map_line moncsv (fun htab -> Hashtbl.find htab "col1" = "a0" || Hashtbl.find htab "col2" = "b1") =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
*)
let filter_line rand_csv f =
let htab = Hashtbl.create (List.length rand_csv.column_name) in
let rec save_line a l i =
match l with
|[] -> ()
|x :: tl -> (Hashtbl.replace htab x a.(i);save_line a tl (i+1))
in
let is_l a = begin
save_line a rand_csv.column_name 0;
if f htab then true else false
end
in
let new_data = List.filter is_l rand_csv.data in
{column_name = rand_csv.column_name;data = new_data}
(*
Applique la fonction f a une colonne mais ne modifie rien:
iter : csv -> string -> (string -> string) -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
iter moncsv "col1" print_endline = ()
(*Dans la console*)
a0
a1
a2
a3
*)
let iter rand_csv col_name f =
let idx = trouve_indice rand_csv.column_name col_name in
List.iter
(fun a -> f a.(idx))
rand_csv.data
(*
Sauvegarde le csv au chemin file_name au format csv. (,) comme séparateur
save : csv -> string -> ()
*)
let save rand_csv file_name = begin
let oc = open_out file_name in
let first_line = rand_csv.column_name in
let rec output l = match l with
|[] -> ()
|x :: [] -> (output_string oc x;output_string oc "\n")
|x :: tl -> (output_string oc x;output_string oc ",";output tl)
in
output first_line;
let data = rand_csv.data in
let output_arr a =
Array.iteri (fun k elt -> if k = Array.length a - 1 then (output_string oc elt;output_string oc "\n") else (output_string oc elt;output_string oc ",")) a
in
List.iter output_arr data;
close_out oc
end
(*
Créer un csv qui a comme nom de colonne les elements de l
create : string list -> csv
Ex :
create ["col0";"col1";"col2"] =
col1, col2, col3, col4
(*contenu vide*)
*)
let create l =
{column_name = l;data = []}
(*
Créer un csv qui ajoute une ligne au csv si les dimensions correspondent
add_line : csv -> string array -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
add_line moncsv [|"a4";"b4";"c4";"d4"|] =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
a4 , b4 , c4 , d4
add_line moncsv [|"a4";"d4"|] = Exception : failwith "Missmatching dimension"
*)
let add_line csv arr =
if List.length csv.column_name = Array.length arr then
{column_name = csv.column_name; data = arr :: csv.data}
else
failwith "Missmatching dimension"
(*
Ajoute une ligne au csv a partir d'une liste sous le format (nom_col, val_col)
create_line : csv -> (string*string) list -> csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
create_line moncsv [("col4","d4");("col1","a4")] =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
a4 , , , d4
*)
let create_line csv l =
let arr = Array.make (List.length csv.column_name) "" in
let rec set_arg l = match l with
|(nom_arg,val_arg) :: tl -> begin let idx = trouve_indice csv.column_name nom_arg in arr.(idx) <- val_arg;set_arg tl end
|[] -> ()
in
set_arg l;
add_line csv arr
(*
Convertit le csv en un graphe non_oriente, en utilisant la colonne col_name_label comme nom des noeud du graphe
Le poids d'une aretes est la valeur entre nom de la colonne et label de la colonne.
Renvoie 2 csv -> les aretes et les noeuds du graphes
to_graph : csv -> string -> csv * csv
Ex :
let moncsv =
col1, col2, col3, col4
a0 , b0 , c0 , d0
a1 , b1 , c1 , d1
a2 , b2 , c2 , d2
a3 , b3 , c3 , d3
to_graph moncsv "col2" =
(
Source, Target, Type , Id, Weight
6 , 2 , Undirected, 11, d3
6 , 1 , Undirected, 10, c3
6 , 0 , Undirected, 9 , a3
5 , 2 , Undirected, 8 , d2
5 , 1 , Undirected, 7 , c2
5 , 0 , Undirected, 6 , a2
4 , 2 , Undirected, 5 , d1
4 , 1 , Undirected, 4 , c1
4 , 0 , Undirected, 3 , a1
3 , 2 , Undirected, 2 , d0
3 , 1 , Undirected, 1 , c0
3 , 0 , Undirected, 0 , a0
)
,
(
Id, Label
6 , b3
5 , b2
4 , b1
3 , b0
2 , col4
1 , col3
0 , col1
)
*)
let to_graph csv col_name_label = begin
let nodes = create ["Id";"Label";"Categorie"] in
let edges = create ["Source";"Target";"Type";"Id";"Weight"] in
let hash_node = Hashtbl.create (List.length csv.column_name) in
let rec ajoute_assos node k nom_assos = match nom_assos with
|[] -> node,k
|nom_assos :: tl ->
if nom_assos <> "" && not (String.equal nom_assos col_name_label) then begin
Hashtbl.add hash_node nom_assos (string_of_int k);
ajoute_assos (add_line node [|string_of_int k;nom_assos;"Asso"|]) (k+1) tl
end
else
ajoute_assos node k tl
in
let nodes,k = ajoute_assos nodes 0 csv.column_name in
let id = trouve_indice csv.column_name col_name_label in
let ajoute_label node k =
let id = trouve_indice csv.column_name col_name_label in
let node,k = List.fold_left (fun (node,acc) arr ->
let label = arr.(id) in
Hashtbl.add hash_node label (string_of_int acc);
(add_line node [|string_of_int acc;label;"Eleve"|],acc + 1)
)
(node,k) csv.data
in
node,k
in
let nodes,k = ajoute_label nodes k in
let ajoute_arete edge label assos k poids =
add_line edge [|Hashtbl.find hash_node label;Hashtbl.find hash_node assos;"Undirected";k;poids|]
in
let ajoute_arete_ligne edge arr k_i =
let label = arr.(id) in
let edge,k,_ = List.fold_left (fun (edge,k,i) nom_col ->
if arr.(i) = "" || nom_col = "" || String.equal nom_col col_name_label
then
(edge,k,i+1)
else
(ajoute_arete edge label nom_col (string_of_int k) arr.(i),k+1,i+1))
(edge,k_i,0) csv.column_name
in
edge,k
in
let edges,k = List.fold_left (fun (edge,k) arr -> ajoute_arete_ligne edge arr k) (edges,0) csv.data in
edges,nodes
end
(*
Create a deep copy of a csv
copy : csv -> csv
*)
let copy csv =
{
column_name = csv.column_name;
data = List.map (fun a -> Array.copy a) csv.data
}
let get_column csv = csv.column_name
let get_data csv = csv.data