diff --git a/Makefile b/Makefile index e2b0f80150c34fb933d22757091f87346a248965..2dd4ec463506165ed265816eeb9eb2bdc14f9610 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ CC = gcc OPTIONS = -Wall -Wextra -O3 MATH = -lm -EXECUTABLE = TP1 TP2 TP3 TP4 +EXECUTABLE = TP1 TP2 TP3 TP4 TP4_q4 all: $(EXECUTABLE) @@ -27,6 +27,8 @@ TP3: TP3.o plotout.o file.o TP4: TP4.o $(CC) $(OPTIONS) -o $@ $^ $(MATH) -g +TP4_q4: TP4_q4.o + $(CC) $(OPTIONS) -o $@ $^ $(MATH) -g # TP1.o: implems.c diff --git a/TP3.c b/TP3.c index 7cdcb1b2199d3befcf051df1324cfeaa2eb601ab..74cf40af8c7dc3bc926984e91571621f68907bea 100644 --- a/TP3.c +++ b/TP3.c @@ -6,37 +6,40 @@ #include <limits.h> #include <float.h> - // fonction écrivant les degrés sortant de chaque noeuds dans le tableau out_res -void calc_out_deg(edgelist *g, unsigned long *out_res) { +void calc_out_deg(edgelist *g, unsigned long *out_res) +{ unsigned long i; - for (i = 0; i < g->n; i += 1) { + for (i = 0; i < g->n; i += 1) + { out_res[i] = 0; } - for (i = 0; i < g->e; i += 1) { + for (i = 0; i < g->e; i += 1) + { out_res[g->edges[i].s]++; } return; } - // fonction écrivant les degrés entrant de chaque noeuds dans le tableau in_res -void calc_in_deg(edgelist *g, unsigned long *in_res) { +void calc_in_deg(edgelist *g, unsigned long *in_res) +{ unsigned long i; - for (i = 0; i < g->n; i += 1) { + for (i = 0; i < g->n; i += 1) + { in_res[i] = 0; } - for (i = 0; i < g->e; i +=1) { + for (i = 0; i < g->e; i += 1) + { in_res[g->edges[i].t]++; } return; } - - // fonction réalisant nb_iter itération de power pagerank // distr_res doit être malloc et de taille g->n -int pagerank(edgelist *g, long double *distr_res, long double alpha, unsigned long nb_iter) { +int pagerank(edgelist *g, long double *distr_res, long double alpha, unsigned long nb_iter) +{ unsigned long i, j; unsigned long *out_deg; @@ -44,47 +47,48 @@ int pagerank(edgelist *g, long double *distr_res, long double alpha, unsigned lo long double *prob_t1; long double *swap_pointeur; - - out_deg = (unsigned long*) malloc((g->n)*sizeof(unsigned long)); - prob_t1 = (long double*) malloc((g->n)*sizeof(long double)); + out_deg = (unsigned long *)malloc((g->n) * sizeof(unsigned long)); + prob_t1 = (long double *)malloc((g->n) * sizeof(long double)); // calcul les degrés sortants calc_out_deg(g, out_deg); // initialisation des probas - for (i = 0; i < g->n; i += 1) { - prob_t[i] = 1./((long double) (g->n)); + for (i = 0; i < g->n; i += 1) + { + prob_t[i] = 1. / ((long double)(g->n)); } - - // itérations de PageRank - for (i = 0; i < nb_iter; i += 1) { + for (i = 0; i < nb_iter; i += 1) + { // mise à zero de prob_t1 - for (j = 0; j < g->n; j += 1) { + for (j = 0; j < g->n; j += 1) + { prob_t1[j] = 0; } - - // itération sur les arêtes pour chaque quantité - for (j = 0; j < g->e; j += 1) { - prob_t1[g->edges[j].t] += (1 - alpha)*(prob_t[g->edges[j].s]/out_deg[g->edges[j].s]); + for (j = 0; j < g->e; j += 1) + { + prob_t1[g->edges[j].t] += (1 - alpha) * (prob_t[g->edges[j].s] / out_deg[g->edges[j].s]); } - // ajout du poids alpha en plus - for (j = 0; j < g->n; j +=1) { - prob_t1[j] += alpha*(1./((long double) (g->n))); + for (j = 0; j < g->n; j += 1) + { + prob_t1[j] += alpha * (1. / ((long double)(g->n))); } // renormalisation du vecteur long double v = 0; - for (j = 0; j < g->n; j += 1) { + for (j = 0; j < g->n; j += 1) + { v += prob_t1[j]; } - v = (1-v)/((long double) (g->n)); - for (j = 0; j < g->n; j += 1) { + v = (1 - v) / ((long double)(g->n)); + for (j = 0; j < g->n; j += 1) + { prob_t1[j] += v; } @@ -95,71 +99,77 @@ int pagerank(edgelist *g, long double *distr_res, long double alpha, unsigned lo } // si nombre impair d'itération, copie du résultat final dans le bon tableau - if (nb_iter%2 == 1) { - for (i = 0; i < g->n; i += 1) { + if (nb_iter % 2 == 1) + { + for (i = 0; i < g->n; i += 1) + { distr_res[i] = prob_t[i]; } free(prob_t); } - else { + else + { free(prob_t1); } free(out_deg); return 0; - } // fonction réalisant nb_iter itérations de personalized power pagerank // distr_res doit être malloc de taille g-> // p0 doit être un vecteur de probabilités de taille g->n dont la somme est 1 -int pers_pagerank(edgelist *g, long double *distr_res, long double *p0, long double alpha, unsigned long nb_iter) { +int pers_pagerank(edgelist *g, long double *distr_res, long double *p0, long double alpha, unsigned long nb_iter) +{ unsigned long i, j; unsigned long *out_deg; long double *prob_t = distr_res; long double *prob_t1; long double *swap_pointeur; - out_deg = (unsigned long*) malloc((g->n)*sizeof(unsigned long)); - prob_t1 = (long double*) malloc((g->n)*sizeof(long double)); - - + out_deg = (unsigned long *)malloc((g->n) * sizeof(unsigned long)); + prob_t1 = (long double *)malloc((g->n) * sizeof(long double)); // calcul les degrés sortants calc_out_deg(g, out_deg); - // initialisation des probas - for (i = 0; i < g->n; i += 1) { - prob_t[i] = 1./((long double) (g->n)); + for (i = 0; i < g->n; i += 1) + { + prob_t[i] = 1. / ((long double)(g->n)); } - // itérations de PageRank - for (i = 0; i < nb_iter; i += 1) { + for (i = 0; i < nb_iter; i += 1) + { // mise à zero de prob_t1 - for (j = 0; j < g->n; j += 1) { + for (j = 0; j < g->n; j += 1) + { prob_t1[j] = 0; } // itération sur les arêtes pour chaque quantité - for (j = 0; j < g->e; j += 1) { - prob_t1[g->edges[j].t] += (1 - alpha)*(prob_t[g->edges[j].s]/out_deg[g->edges[j].s]); + for (j = 0; j < g->e; j += 1) + { + prob_t1[g->edges[j].t] += (1 - alpha) * (prob_t[g->edges[j].s] / out_deg[g->edges[j].s]); } // ajout du poids alpha en plus (pondéré par P0) /!\ remise à n pour le poid de P0 - for (j = 0; j < g->n; j +=1) { - prob_t1[j] += alpha*p0[j]; // renvoie une proportion alpha sur p0 - // prob_t1[j] += alpha*(1./((long double) (g->n))); // renvoie une proportion alpha dans le reste du tableau + for (j = 0; j < g->n; j += 1) + { + prob_t1[j] += alpha * p0[j]; // renvoie une proportion alpha sur p0 + // prob_t1[j] += alpha*(1./((long double) (g->n))); // renvoie une proportion alpha dans le reste du tableau } // renormalisation du vecteur (avec poid pondéré par P0) /!\ remise à n pour le poid de p0 long double v = 0; - for (j = 0; j < g->n; j += 1) { + for (j = 0; j < g->n; j += 1) + { v += prob_t1[j]; } - long double vprim = (1-v)/((long double) (g->n)); - v = (1-v); - for (j = 0; j < g->n; j += 1) { + long double vprim = (1 - v) / ((long double)(g->n)); + v = (1 - v); + for (j = 0; j < g->n; j += 1) + { // prob_t1[j] += v*p0[j]; // normalise uniquement sur les composantes de P0 prob_t1[j] += vprim; // normalise sur la totalité du vecteur } @@ -169,29 +179,31 @@ int pers_pagerank(edgelist *g, long double *distr_res, long double *p0, long dou prob_t1 = swap_pointeur; } // si nombre impair d'itération, copie du résultat final dans le bon tableau - if (nb_iter%2 == 1) { - for (i = 0; i < g->n; i += 1) { + if (nb_iter % 2 == 1) + { + for (i = 0; i < g->n; i += 1) + { distr_res[i] = prob_t[i]; } free(prob_t); } - else { + else + { free(prob_t1); } free(out_deg); return 0; } - - // fonction déterminant quels noeuds sont dans la catégorie donnée -// requires - // node_res est un tableau de taille g->n - // filepath est le chemin vers le fichier d'association noeud ID -> category ID +// requires +// node_res est un tableau de taille g->n +// filepath est le chemin vers le fichier d'association noeud ID -> category ID // ensures - // node_res contient 1 si le noeud est dans la catégorie demandée, 0 sinon +// node_res contient 1 si le noeud est dans la catégorie demandée, 0 sinon -int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *filecatdir, unsigned long cat_wanted, int proxim) { +int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *filecatdir, unsigned long cat_wanted, int proxim) +{ // parcour en largeur pour trouver les sous catégories de échec adjlist *cat_graph; cat_graph = al_readedgelist(filecatdir); @@ -237,30 +249,26 @@ int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *file } // ################################ - - // trouve les noeuds qui ont été marqués int count_cat = 0; - for (unsigned long i = 0; i < cat_graph->n; i += 1) { - if (marked[i] != -1) { + for (unsigned long i = 0; i < cat_graph->n; i += 1) + { + if (marked[i] != -1) + { count_cat += 1; } } printf("cat : %lu related nb : %i/%lu\n", cat_wanted, count_cat, cat_graph->n); - - free_adjlist(cat_graph); // --------------------------------------------------------------------- // --------------------------------marked != -1 si subcat de wanted_cat--- // --------------------------------------------------------------------- - - - // mise à zero du tableau resultat - for (unsigned long i = 0; i < g->n; i += 1) { + for (unsigned long i = 0; i < g->n; i += 1) + { node_res[i] = 0; } @@ -271,53 +279,65 @@ int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *file FILE *file = fopen(filepath, "r"); char line[1000]; // ignore les premières lignes de commentaire - while(fgets(line, sizeof line, file) && *line == '#') {} + while (fgets(line, sizeof line, file) && *line == '#') + { + } // lit les entrées - while(fscanf(file, "%lu%c", &node_ID, &test_end) == 2) { + while (fscanf(file, "%lu%c", &node_ID, &test_end) == 2) + { // printf("node start : %lu\n", node_ID); - if (test_end != '\n') { - while(fscanf(file, "%lu%c", &cat_ID, &test_end)==2) { + if (test_end != '\n') + { + while (fscanf(file, "%lu%c", &cat_ID, &test_end) == 2) + { // printf("lié à %lu\n", cat_ID); - if (marked[cat_ID] != -1 && marked[cat_ID] <= proxim) { + if (marked[cat_ID] != -1 && marked[cat_ID] <= proxim) + { // printf("ouais !\n"); node_res[node_ID] = 1; } - if (test_end == '\n') {break;} + if (test_end == '\n') + { + break; + } } } } - - free(marked); return 0; } - - -void debug_print_tabl(long double *tab) { +void debug_print_tabl(long double *tab) +{ printf("\n\n---------\n"); - for (int i = 0; i < 5; i += 1) { + for (int i = 0; i < 5; i += 1) + { +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) + printf("%lf\n", (double) tab[i]); +#else printf("%Le\n", tab[i]); +#endif } printf("\n"); } - // fonction déterminant, pour un vecteur de distribution donné les 5 meilleurs et les 5 pire // requires : - // resmin est un tableau de 5 long double malloc - // resmax est un tableau de 5 long double malloc +// resmin est un tableau de 5 long double malloc +// resmax est un tableau de 5 long double malloc // ensures : - // resmin contient les 5 pages les moins visitées du moins au plus - // resmax contient les 5 pages les plus visités du plus au moins -int find_5(edgelist *g, long double *distr, unsigned long *resmin, unsigned long *resmax) { +// resmin contient les 5 pages les moins visitées du moins au plus +// resmax contient les 5 pages les plus visités du plus au moins +int find_5(edgelist *g, long double *distr, unsigned long *resmin, unsigned long *resmax) +{ unsigned long ind; // initialise les tableaux max et min - long double *valmin = (long double*) malloc(5*sizeof(long double)); - long double *valmax = (long double*) malloc(5*sizeof(long double)); - for (int i = 0; i < 5; i += 1) { + long double *valmin = (long double *)malloc(5 * sizeof(long double)); + long double *valmax = (long double *)malloc(5 * sizeof(long double)); + for (int i = 0; i < 5; i += 1) + { resmin[i] = 0; resmax[i] = 0; valmin[i] = DBL_MAX; @@ -325,15 +345,18 @@ int find_5(edgelist *g, long double *distr, unsigned long *resmin, unsigned long } // itères sur l'ensemble des probas de distribution - // resmin trié par ordre croissant - // resmax trié par ordre décroissant - for (unsigned long i = 0; i < g->n; i += 1) { + // resmin trié par ordre croissant + // resmax trié par ordre décroissant + for (unsigned long i = 0; i < g->n; i += 1) + { // si la valeur est plus petite que le plus grand des plus petit - if (distr[i] <= valmin[4] && (i != 13834638 && i != 13834637 && i != 13834634 && i!= 13834633 && i != 13834632 && i != 13834631 && i != 13834630 && !(i > 13834615 && i < 13834629) && !(i>13834611 && i <13834615) && !(i>13834606 && i<13834611))) { + if (distr[i] <= valmin[4] && (i != 13834638 && i != 13834637 && i != 13834634 && i != 13834633 && i != 13834632 && i != 13834631 && i != 13834630 && !(i > 13834615 && i < 13834629) && !(i > 13834611 && i < 13834615) && !(i > 13834606 && i < 13834611))) + { ind = 4; - while (ind > 0 && distr[i] <= valmin[ind - 1]) { - valmin[ind] = valmin[ind-1]; - resmin[ind] = resmin[ind-1]; + while (ind > 0 && distr[i] <= valmin[ind - 1]) + { + valmin[ind] = valmin[ind - 1]; + resmin[ind] = resmin[ind - 1]; ind -= 1; } valmin[ind] = distr[i]; @@ -341,24 +364,30 @@ int find_5(edgelist *g, long double *distr, unsigned long *resmin, unsigned long } // si la valeur est plus grande que le plus petit des plus grands - if (distr[i] > valmax[4]) { + if (distr[i] > valmax[4]) + { ind = 4; - while (ind > 0 && distr[i] > valmax[ind - 1]) { - valmax[ind] = valmax[ind-1]; - resmax[ind] = resmax[ind-1]; + while (ind > 0 && distr[i] > valmax[ind - 1]) + { + valmax[ind] = valmax[ind - 1]; + resmax[ind] = resmax[ind - 1]; ind -= 1; } valmax[ind] = distr[i]; resmax[ind] = i; // debug_print_tabl(valmax); } - } // affiche les résultats printf("max |min\n"); - for (ind = 0; ind < 5; ind += 1) { + for (ind = 0; ind < 5; ind += 1) + { +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) + printf("|%lu :: %lf |%lu :: %lf\n", resmax[ind], (double) valmax[ind], resmin[ind], (double) valmin[ind]); +#else printf("|%lu :: %Le |%lu :: %Le\n", resmax[ind], valmax[ind], resmin[ind], valmin[ind]); +#endif } free(valmin); @@ -366,13 +395,10 @@ int find_5(edgelist *g, long double *distr, unsigned long *resmin, unsigned long return 0; } - - - - - -int main(int argc, char **argv) { - if (argc < 2) { +int main(int argc, char **argv) +{ + if (argc < 2) + { printf("un argument est attendu\n"); return 1; } @@ -381,28 +407,27 @@ int main(int argc, char **argv) { time_t t1, t2, t3; // PARSING - t1=time(NULL); + t1 = time(NULL); - printf("Reading edgelist from file %s\n",argv[1]); - g=el_readedgelist(argv[1]); + printf("Reading edgelist from file %s\n", argv[1]); + g = el_readedgelist(argv[1]); t3 = time(NULL); - printf("- edge list time = %ldh%ldm%lds\n",(t3-t1)/3600,((t3-t1)%3600)/60,((t3-t1)%60)); - // printf("- edge list time = %I64dh%I64dm%I64ds\n",(t3-t1)/3600,((t3-t1)%3600)/60,((t3-t1)%60)); - - printf("Number of nodes: %lu\n",g->n); - printf("Number of edges: %lu\n",g->e); - - - t2=time(NULL); - - printf("- Overall time = %ldh%ldm%lds\n",(t2-t1)/3600,((t2-t1)%3600)/60,((t2-t1)%60)); - // printf("- Overall time = %I64dh%I64dm%I64ds\n",(t2-t1)/3600,((t2-t1)%3600)/60,((t2-t1)%60)); - - - +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) + printf("- edge list time = %I64dh%I64dm%I64ds\n", (t3 - t1) / 3600, ((t3 - t1) % 3600) / 60, ((t3 - t1) % 60)); +#else + printf("- edge list time = %ldh%ldm%lds\n", (t3 - t1) / 3600, ((t3 - t1) % 3600) / 60, ((t3 - t1) % 60)); +#endif + printf("Number of nodes: %lu\n", g->n); + printf("Number of edges: %lu\n", g->e); + t2 = time(NULL); +#if defined(WIN32) || defined(_WIN32) || defined(__WIN32) && !defined(__CYGWIN__) + printf("- Overall time = %I64dh%I64dm%I64ds\n", (t2 - t1) / 3600, ((t2 - t1) % 3600) / 60, ((t2 - t1) % 60)); +#else + printf("- Overall time = %ldh%ldm%lds\n", (t2 - t1) / 3600, ((t2 - t1) % 3600) / 60, ((t2 - t1) % 60)); +#endif // fonctions du TP3 // long double *p_distrib; @@ -412,16 +437,10 @@ int main(int argc, char **argv) { // highest = (unsigned long*) malloc(5*sizeof(unsigned long)); // lowest = (unsigned long*) malloc(5*sizeof(unsigned long)); - // pagerank(g, p_distrib, 0.15, 70); - // find_5(g, p_distrib, lowest, highest); - - - - // //TABLEAUX pour les CORRELATIONS // long double *p_15 = p_distrib; @@ -452,7 +471,6 @@ int main(int argc, char **argv) { // printf("pagerank 0.9 processing...\n"); // pagerank(g, p_9, 0.9, 70); - // printf("output plots ...\n"); // plot_out_2D_ld_ul("1.csv", g->n, p_15, in_deg); // plot_out_2D_ld_ul("2.csv", g->n, p_15, out_deg); @@ -461,94 +479,75 @@ int main(int argc, char **argv) { // plot_out_2D_ld_ld("5.csv", g->n, p_15, p_5); // plot_out_2D_ld_ld("6.csv", g->n, p_15, p_9); - // free(p_1); // free(p_2); // free(p_5); // free(p_9); // free(p_15); - - - - - - - - // personalized PAGERANK - // ################# DATA ############## - // ID of category chess : 691713 - // ID of page Magnus carlsen : 442682 - // ID of category boxing : 738624 - // ##################################### - + // ################# DATA ############## + // ID of category chess : 691713 + // ID of page Magnus carlsen : 442682 + // ID of category boxing : 738624 + // ##################################### long double *p_distrib; long double *p0_magnus; unsigned long *nodes; unsigned long *nodes_2; - p_distrib = (long double*) malloc((g->n)*sizeof(long double)); - nodes = (unsigned long*) malloc((g->n)*sizeof(unsigned long)); - nodes_2 = (unsigned long*) malloc((g->n)*sizeof(unsigned long)); - + p_distrib = (long double *)malloc((g->n) * sizeof(long double)); + nodes = (unsigned long *)malloc((g->n) * sizeof(unsigned long)); + nodes_2 = (unsigned long *)malloc((g->n) * sizeof(unsigned long)); printf("finding nodes related to chess category...\n"); mark_in_cat(g, nodes, "../alr21--pageCategList--enwiki--20071018.txt", "../alr21--categDAG--dirLinks--enwiki-20071018.txt", 691713, 3); - - - - - // ROOTED PAGERANK of MAGNUS CARLSEN - p0_magnus = (long double*) malloc((g->n)*sizeof(long double)); + // ROOTED PAGERANK of MAGNUS CARLSEN + p0_magnus = (long double *)malloc((g->n) * sizeof(long double)); - for (unsigned long i = 0; i < g->n; i += 1) { + for (unsigned long i = 0; i < g->n; i += 1) + { p0_magnus[i] = 0; } - p0_magnus[442682] = 1; //page magnus carlsen + p0_magnus[442682] = 1; //page magnus carlsen printf("computing rooted pagerank of Magnus Carlsen...\n"); - pers_pagerank(g, p_distrib, p0_magnus, 0.15, 15); plot_out_2D_ld_ul("magnus.csv", g->n, p_distrib, nodes); - - - - - - - - // restart vector for chess && boxing + // restart vector for chess && boxing printf("finding nodes related to boxing category...\n"); mark_in_cat(g, nodes_2, "../alr21--pageCategList--enwiki--20071018.txt", "../alr21--categDAG--dirLinks--enwiki-20071018.txt", 738624, 4); printf("construct of the chess/boxing restart vector ...\n"); long double *p0_chess_box = p0_magnus; unsigned long count = 0; - for (unsigned long i = 0; i < g->n; i += 1) { + for (unsigned long i = 0; i < g->n; i += 1) + { count += nodes[i] + nodes_2[i]; p0_chess_box[i] = nodes[i] + nodes_2[i]; } - for (unsigned long i = 0; i < g->n; i += 1) { - p0_chess_box[i] = p0_chess_box[i]/count; + for (unsigned long i = 0; i < g->n; i += 1) + { + p0_chess_box[i] = p0_chess_box[i] / count; } printf("computing personalized pagerank of chess and box...\n"); pers_pagerank(g, p_distrib, p0_chess_box, 0.15, 50); - for (unsigned long i = 0; i < g->n; i += 1) { - if (nodes[i] == 1 || nodes_2[i] == 1) { + for (unsigned long i = 0; i < g->n; i += 1) + { + if (nodes[i] == 1 || nodes_2[i] == 1) + { nodes[i] = 1; } } plot_out_2D_ld_ul("chessbox.csv", g->n, p_distrib, nodes); - free(nodes); free(p_distrib); return 0; diff --git a/small_test.txt b/small_test.txt index 61050cba65629400cb048b0616b841535c07f362..2fc6308314ec75fd5019cc2266ba380f8a993df8 100644 --- a/small_test.txt +++ b/small_test.txt @@ -2,12 +2,24 @@ # Maison # Nodes: 334863 Edges: 925872 # FromNodeId ToNodeId +0 1 +0 2 +0 3 +0 4 1 2 1 3 -1 4 2 3 -2 4 -3 4 -4 5 -6 7 -6 8 \ No newline at end of file +3 6 +4 5 +4 6 +4 7 +5 6 +5 7 +6 7 +8 9 +8 10 +8 11 +8 12 +8 13 +10 11 +12 13 \ No newline at end of file