Skip to content
Extraits de code Groupes Projets
Sélectionner une révision Git
  • 8357361f12978cb0f4448f44d47ef0d5bf5b5365
  • main par défaut protégée
2 résultats

csv.ml

Blame
  • csv.ml 13,30 Kio
    
    (*Retrieve all data from a file, line by line
    get_data : string -> sting list
    Ex : 
    nom_fichier : 
    anfrzvirz, qvirù, qvr
    rqv erp,be rq,b r
    qb oepv k,q, vkerq
    reo qpv,reoq,beripkn q
    bro, pq, erp,bqer p
    
    get_data nom_fichier = [
    anfrzvirz, qvirù, qvr;
    rqv erp,be rq,b r;
    qb oepv k,q, vkerq; 
    reo qpv,reoq,beripkn q;
    bro, pq, erp,bqer p;
    ]
    *)
    let get_data name = begin
            let ic = open_in name in
            
            let rec read_file acc = 
                    let line_m = try Some (input_line ic) with
                    |End_of_file -> None
                    in
                    match line_m with
                    |Some l -> read_file (l::acc)
                    |None -> List.rev acc
            in
            let res = read_file [] in
            close_in ic;
            res
            end
    
    
    type csv = {column_name : string list;data : string array list}
    
    
    (*
    Open a file as a csv file
    open_as_csv : string -> csv
    *)
    let open_as_csv name = 
            let data = get_data name in
            let column_name = List.hd data |> String.split_on_char ',' in
            let data = List.tl data |> List.map (fun x -> String.split_on_char ',' x |> Array.of_list) in
            {column_name = column_name; data = data}
    
    
    (*
    renvoie l'indice de l'element elt dans la liste l
    trouve_indice : string list -> string -> int
    
    Ex : 
    trouve_indice ["miaou";"olala";"b"] "olala" = 1
    trouve_indice ["miaou";"olala";"b"] "" = Exception : failwith "Indice non trouvé"
    *)
    let rec trouve_indice l elt = match l with
            |[] -> failwith ("Indice non trouvé : " ^ elt)
            |x :: tl -> if String.equal x elt then 0 else 1 + (trouve_indice tl elt)
            
    (*
    Fusionne deux colonne col_name1 et col_name2 en new_col_name a l'aide de la fonction f (modifie aussi le tableau de départ par effet de bord):
    
    merge_column : csv -> string -> string -> string -> (string -> string -> string) -> csv
    
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    merge_column moncsv "col1" "col2" "new_col" (fun x y -> x ^ y) = 
    new_col, "", col3, col4
    a0b0  ,      , c0  , d0
    a1b1  ,      , c1  , d1
    a2b2  ,      , c2  , d2
    a3b3  ,      , c3  , d3
    
    *)
    let merge_column rand_csv col_name1 col_name2 new_col_name f =
            let idx1 = trouve_indice rand_csv.column_name col_name1 in
            let idx2 = trouve_indice rand_csv.column_name col_name2 in
            let new_data = List.map 
                    (fun a -> begin
                            let elt1,elt2 = a.(idx1),a.(idx2) in
                            let new_elt = f elt1 elt2 in
                            a.(idx1) <- new_elt;
                            a.(idx2) <- "";
                            a
                            end
                    ) rand_csv.data
            in     
            let rec replace l = match l with
                    |x :: tl -> if String.equal x col_name1 then new_col_name :: (replace tl) else if String.equal x col_name2 then "" :: (replace tl) else x :: (replace tl) 
                    |[] -> []
            in
            let new_col_name = replace rand_csv.column_name in
            {column_name = new_col_name;data = new_data}
    
    (*
    Supprime le contenu d'une colonne (modifie aussi le tableau de départ par effet de bord):
    
    del_column : csv -> string -> csv
    
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    del_column moncsv "col1" = 
    "", col2, col3, col4
      , b0  , c0  , d0
      , b1  , c1  , d1
      , b2  , c2  , d2
      , b3  , c3  , d3
    *)
    let del_column csv nom_col = 
            let idx = trouve_indice csv.column_name nom_col in
            let n = List.length csv.column_name in
            let new_data = List.map
                    (fun arr -> Array.init (n-1) (fun i -> if i < idx then arr.(i) else arr.(i+1))) csv.data 
            in
            let new_l = List.fold_left (fun acc elt -> if elt = nom_col then acc else elt :: acc) [] csv.column_name  |> List.rev in
            {column_name = new_l;data = new_data}
            
    
    
    (*
    Applique la fonction f a une colonne (modifie aussi le tableau de départ par effet de bord) :
    
    map : csv -> string -> (string -> string) -> csv
    
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    map moncsv "col1" (fun x -> x^x) = 
    col1, col2, col3, col4
    a0a0  , b0  , c0  , d0
    a1a1  , b1  , c1  , d1
    a2a2  , b2  , c2  , d2
    a3a3  , b3  , c3  , d3
    *)
    let map rand_csv col_name f = begin
            let idx = trouve_indice rand_csv.column_name col_name in
            let new_data = List.map 
                    (fun a -> (a.(idx) <- f a.(idx);a))
                    rand_csv.data
            in
            {column_name = rand_csv.column_name;data = new_data}
    end
    
    (*
    Applique la fonction f a chaque ligne (représenté par une table de hashage), la ligne est ensuite reconstruite :
    
    map_line : csv -> ((string, string) Hashtbl.t -> 'a) -> csv
    
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    map_line moncsv (fun htab -> Hashtbl.replace htab "col1" (Hashtbl.find htab "col2" ^ (Hashtbl.find htab "col3"))) = 
    col1, col2, col3, col4
    b0c0  , b0  , c0  , d0
    b1c1  , b1  , c1  , d1
    b2c2  , b2  , c2  , d2
    b3c3  , b3  , c3  , d3
    *)
    let map_line rand_csv f =
            let htab = Hashtbl.create (List.length rand_csv.column_name) in
            let n = List.length rand_csv.column_name in
            let col_name_arr = Array.of_list rand_csv.column_name in
            let rec save_line a l i = 
                    match l with
                    |[] -> ()
                    |x :: tl -> (Hashtbl.replace htab x a.(i);save_line a tl (i+1))
            in
            let map_l a = begin
                    save_line a rand_csv.column_name 0;
                    (f htab);
                    Array.init n (fun i -> Hashtbl.find htab (col_name_arr.(i)));
            end
            in
            let new_data = List.map map_l rand_csv.data in
            {column_name = rand_csv.column_name;data = new_data}
    
    
    
    (*
    filtre les lignes qui satisfont f
    
    filter_line : csv -> ((string, string) Hashtbl.t -> bool) -> csv
    
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    map_line moncsv (fun htab -> Hashtbl.find htab "col1" = "a0" || Hashtbl.find htab "col2" = "b1") = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    *)
    let filter_line rand_csv f =
            let htab = Hashtbl.create (List.length rand_csv.column_name) in
            let rec save_line a l i = 
                    match l with
                    |[] -> ()
                    |x :: tl -> (Hashtbl.replace htab x a.(i);save_line a tl (i+1))
            in
            let is_l a = begin
                    save_line a rand_csv.column_name 0;
                    if f htab then true else false
            end
            in
            let new_data = List.filter is_l rand_csv.data in
            {column_name = rand_csv.column_name;data = new_data}
    
    
    
    (*
    Applique la fonction f a une colonne mais ne modifie rien:
    
    iter : csv -> string -> (string -> string) -> csv
    
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    iter moncsv "col1" print_endline = ()
    (*Dans la console*)
    a0
    a1
    a2
    a3
    *)
    let iter rand_csv col_name f =
            let idx = trouve_indice rand_csv.column_name col_name in
            List.iter 
                    (fun a -> f a.(idx))
                    rand_csv.data
    
    
    (*
    Sauvegarde le csv au chemin file_name au format csv. (,) comme séparateur
    save : csv -> string -> ()
    *)
    let save rand_csv file_name = begin
            let oc = open_out file_name in
            let first_line = rand_csv.column_name in
            let rec output l = match l with
                    |[] -> ()
                    |x :: [] -> (output_string oc x;output_string oc "\n")
                    |x :: tl -> (output_string oc x;output_string oc ",";output tl)
            in
            output first_line;
            let data = rand_csv.data in
            let output_arr a =
                    Array.iteri (fun k elt -> if k = Array.length a - 1 then (output_string oc elt;output_string oc "\n") else (output_string oc elt;output_string oc ",")) a
            in 
            List.iter output_arr data;
            close_out oc
    end
    
    (*
    Créer un csv qui a comme nom de colonne les elements de l
    create : string list -> csv
    Ex :
    create ["col0";"col1";"col2"] = 
    col1, col2, col3, col4
    (*contenu vide*)
    *)
    let create l = 
            {column_name = l;data = []}
    
    (*
    Créer un csv qui ajoute une ligne au csv si les dimensions correspondent
    add_line : csv -> string array -> csv
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    add_line moncsv [|"a4";"b4";"c4";"d4"|] = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    a4  , b4  , c4  , d4
    
    add_line moncsv [|"a4";"d4"|] = Exception : failwith "Missmatching dimension"
    *)
    let add_line csv arr = 
            if List.length csv.column_name = Array.length arr then
                    {column_name = csv.column_name; data = arr :: csv.data}
            else
                    failwith "Missmatching dimension"
    
    (*
    Ajoute une ligne au csv a partir d'une liste sous le format (nom_col, val_col)
    create_line : csv -> (string*string) list -> csv
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    create_line moncsv [("col4","d4");("col1","a4")] = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    a4  ,     ,     , d4
    *)
    let create_line csv l = 
            let arr = Array.make (List.length csv.column_name) "" in
            let rec set_arg l = match l with
                    |(nom_arg,val_arg) :: tl -> begin let idx = trouve_indice csv.column_name nom_arg in arr.(idx) <- val_arg;set_arg tl end
                    |[] -> ()
            in
            set_arg l;
            add_line csv arr
    
    
    (*
    Convertit le csv en un graphe non_oriente, en utilisant la colonne col_name_label comme nom des noeud du graphe
    Le poids d'une aretes est la valeur entre nom de la colonne et label de la colonne.
    Renvoie 2 csv -> les aretes et les noeuds du graphes
    to_graph : csv -> string -> csv * csv
    Ex :
    let moncsv = 
    col1, col2, col3, col4
    a0  , b0  , c0  , d0
    a1  , b1  , c1  , d1
    a2  , b2  , c2  , d2
    a3  , b3  , c3  , d3
    
    to_graph moncsv "col2" = 
    (
    Source, Target, Type      , Id, Weight
    6     , 2     , Undirected, 11, d3
    6     , 1     , Undirected, 10, c3
    6     , 0     , Undirected, 9 , a3
    5     , 2     , Undirected, 8 , d2
    5     , 1     , Undirected, 7 , c2
    5     , 0     , Undirected, 6 , a2
    4     , 2     , Undirected, 5 , d1
    4     , 1     , Undirected, 4 , c1
    4     , 0     , Undirected, 3 , a1
    3     , 2     , Undirected, 2 , d0
    3     , 1     , Undirected, 1 , c0
    3     , 0     , Undirected, 0 , a0
    )
    ,
    (
    Id, Label
    6 , b3
    5 , b2
    4 , b1
    3 , b0
    2 , col4
    1 , col3
    0 , col1
    )
    *)
    let to_graph csv col_name_label = begin
            let nodes =  create ["Id";"Label";"Categorie"] in
            let edges = create ["Source";"Target";"Type";"Id";"Weight"] in
            let hash_node = Hashtbl.create (List.length csv.column_name) in
            
            let rec ajoute_assos node k  nom_assos = match nom_assos with
                    |[] -> node,k
                    |nom_assos :: tl ->
                            if nom_assos <> "" && not (String.equal nom_assos col_name_label) then begin
                                    Hashtbl.add hash_node nom_assos (string_of_int k);
                                    ajoute_assos (add_line node [|string_of_int k;nom_assos;"Asso"|]) (k+1) tl
                                    end
                            else
                                    ajoute_assos node k tl
            in
            let nodes,k = ajoute_assos nodes 0 csv.column_name in
            
            let id = trouve_indice csv.column_name col_name_label in
    
            let ajoute_label node k =
                    let id = trouve_indice csv.column_name col_name_label in
                    let node,k = List.fold_left (fun (node,acc) arr ->
                                    let label = arr.(id) in 
                                    Hashtbl.add hash_node label (string_of_int acc);
                                    
                                    (add_line node [|string_of_int acc;label;"Eleve"|],acc + 1)
                                    )
                                    (node,k) csv.data 
                                    in
                    node,k
            in
            let nodes,k = ajoute_label nodes k in
    
            let ajoute_arete edge label assos k poids = 
                    add_line edge [|Hashtbl.find hash_node label;Hashtbl.find hash_node assos;"Undirected";k;poids|]
            in
            let ajoute_arete_ligne edge arr k_i =
                    let label = arr.(id) in
                    let edge,k,_ = List.fold_left (fun (edge,k,i) nom_col -> 
                            if arr.(i) = "" || nom_col = "" || String.equal nom_col col_name_label 
                            then 
                                    (edge,k,i+1) 
                            else 
                                    (ajoute_arete edge label nom_col (string_of_int k) arr.(i),k+1,i+1)) 
                    (edge,k_i,0) csv.column_name
                    in
                    edge,k
            in
            let edges,k = List.fold_left (fun (edge,k) arr -> ajoute_arete_ligne edge arr k) (edges,0) csv.data in
    
            edges,nodes
    end
    
    (*
     Create a deep copy of a csv
    copy : csv -> csv
     *)
    let copy csv = 
            {       
                    column_name = csv.column_name;
                    data = List.map (fun a -> Array.copy a) csv.data
            }
    
    let get_column csv = csv.column_name 
    
    let get_data csv = csv.data