From e9e1682dff621c12023f121c14fd4f1f81e37f8c Mon Sep 17 00:00:00 2001
From: Louis Fourcade <louis.fourcade142@gmail.com>
Date: Tue, 23 Mar 2021 10:32:26 +0100
Subject: [PATCH] python stuff

---
 TP3.c           | 76 ++++---------------------------------------------
 trace_graphe.py | 21 ++++++++------
 2 files changed, 18 insertions(+), 79 deletions(-)

diff --git a/TP3.c b/TP3.c
index ad59b6e..7cdcb1b 100644
--- a/TP3.c
+++ b/TP3.c
@@ -191,7 +191,7 @@ int pers_pagerank(edgelist *g, long double *distr_res, long double *p0, long dou
 // ensures
 	// node_res contient 1 si le noeud est dans la catégorie demandée, 0 sinon
 
-int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *filecatdir, unsigned long cat_wanted) {
+int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *filecatdir, unsigned long cat_wanted, int proxim) {
 	// parcour en largeur pour trouver les sous catégories de échec
 	adjlist *cat_graph;
 	cat_graph = al_readedgelist(filecatdir);
@@ -279,7 +279,7 @@ int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *file
 		if (test_end != '\n') {
 			while(fscanf(file, "%lu%c", &cat_ID, &test_end)==2) {
 				// printf("lié à %lu\n", cat_ID);
-				if (marked[cat_ID] != -1) {
+				if (marked[cat_ID] != -1 && marked[cat_ID] <= proxim) {
 					// printf("ouais !\n");
 					node_res[node_ID] = 1;
 				}
@@ -295,72 +295,6 @@ int mark_in_cat(edgelist *g, unsigned long *node_res, char *filepath, char *file
 }
 
 
-// trouve les subcatégories de la catégorie passée en paramètre
-int find_sub_cat(char *filecatdir, unsigned long cat_wanted) {
-	adjlist *cat_graph;
-	cat_graph = al_readedgelist(filecatdir);
-	mkadjlist(cat_graph);
-
-	// fait un BFS
-	// ################################
-	file_t F = new_file(cat_graph->n);
-	int *marked = (int *)malloc((cat_graph->n) * sizeof(int));
-	for (unsigned long i = 0; i < cat_graph->n; i++)
-	{
-		marked[i] = -1;
-	}
-	enfile(F, cat_wanted);
-	marked[cat_wanted] = 0;
-	unsigned long tmp = cat_wanted;
-	while (!is_empty(F))
-	{
-		tmp = defile(F);
-		// printf("%li %li %li %li\n", tmp, marked[tmp], cat_graph->cd[tmp], cat_graph->cd[tmp+1]);
-		for (unsigned long i = cat_graph->cd[tmp]; i < cat_graph->cd[tmp + 1]; i++)
-		{
-			if (marked[cat_graph->adj[i]] == -1)
-			{
-				if (enfile(F, cat_graph->adj[i]) == 1)
-				{
-					marked[cat_graph->adj[i]] = marked[tmp] + 1;
-				}
-				else
-				{
-					printf("\n\nPile trop petite !\n\n");
-				}
-			}
-		}
-	}
-	unsigned long no_marque = 0;
-	for (unsigned long i = 0; i < cat_graph->n; i++)
-	{
-		if (marked[i] == -1 && cat_graph->cd[0] != 0)
-		{
-			no_marque++;
-		}
-	}
-	// ################################
-
-
-
-	// trouve les noeuds qui ont été marqués
-	int count_cat = 0;
-	for (unsigned long i = 0; i < cat_graph->n; i += 1) {
-		if (marked[i] != -1) {
-			count_cat += 1;
-		}
-	}
-
-	printf("cat : %lu related nb : %i/%lu\n", cat_wanted, count_cat, cat_graph->n);
-
-
-
-	free(marked);
-	free_adjlist(cat_graph);
-	return 1;
-}
-
-
 
 void debug_print_tabl(long double *tab) {
 	printf("\n\n---------\n");
@@ -561,7 +495,7 @@ int main(int argc, char **argv) {
 
 
 	printf("finding nodes related to chess category...\n");
-	mark_in_cat(g, nodes, "../alr21--pageCategList--enwiki--20071018.txt", "../alr21--categDAG--dirLinks--enwiki-20071018.txt", 691713);
+	mark_in_cat(g, nodes, "../alr21--pageCategList--enwiki--20071018.txt", "../alr21--categDAG--dirLinks--enwiki-20071018.txt", 691713, 3);
 	
 	
 
@@ -577,7 +511,7 @@ int main(int argc, char **argv) {
 	printf("computing rooted pagerank of Magnus Carlsen...\n");
 
 
-	pers_pagerank(g, p_distrib, p0_magnus, 0.55, 50);
+	pers_pagerank(g, p_distrib, p0_magnus, 0.15, 15);
 
 	plot_out_2D_ld_ul("magnus.csv", g->n, p_distrib, nodes);
 
@@ -590,7 +524,7 @@ int main(int argc, char **argv) {
 
 		// restart vector for chess && boxing
 	printf("finding nodes related to boxing category...\n");
-	mark_in_cat(g, nodes_2, "../alr21--pageCategList--enwiki--20071018.txt", "../alr21--categDAG--dirLinks--enwiki-20071018.txt", 738624);
+	mark_in_cat(g, nodes_2, "../alr21--pageCategList--enwiki--20071018.txt", "../alr21--categDAG--dirLinks--enwiki-20071018.txt", 738624, 4);
 
 	printf("construct of the chess/boxing restart vector ...\n");
 	long double *p0_chess_box = p0_magnus;
diff --git a/trace_graphe.py b/trace_graphe.py
index cebdcb3..75f4ce7 100644
--- a/trace_graphe.py
+++ b/trace_graphe.py
@@ -88,7 +88,7 @@ plt.plot(data['x'], data['y'], 'o')
 
 #plot de la régression linéaire
 m, b = np.polyfit(data['x'], data['y'], 1)
-plt.plot(data['x'], m*data['x']+b, 'r.', markersize=10)
+plt.plot(data['x'], m*data['x']+b, 'r.', markersize=1)
 
 plt.xscale("log")
 plt.yscale("log")
@@ -114,7 +114,7 @@ plt.scatter(data['x'], data['y'])
 
 #plot de la régression linéaire
 m, b = np.polyfit(data['x'], data['y'], 1)
-#plt.plot(data['x'], m*data['x']+b, 'r.')
+plt.plot(data['x'], m*data['x']+b, 'r.', markersize=1)
 
 plt.xscale("log")
 plt.yscale("log")
@@ -140,7 +140,7 @@ plt.scatter(data['x'], data['y'])
 
 #plot de la régression linéaire
 m, b = np.polyfit(data['x'], data['y'], 1)
-#plt.plot(data['x'], m*data['x']+b, 'r.')
+plt.plot(data['x'], m*data['x']+b, 'r.', markersize=1)
 
 plt.xscale("log")
 plt.yscale("log")
@@ -167,7 +167,7 @@ plt.scatter(data['x'], data['y'])
 
 #plot de la régression linéaire
 m, b = np.polyfit(data['x'], data['y'], 1)
-#plt.plot(data['x'], m*data['x']+b, 'r.')
+plt.plot(data['x'], m*data['x']+b, 'r.', markersize=1)
 
 plt.xscale("log")
 plt.yscale("log")
@@ -206,7 +206,7 @@ work = data_magnus.sort_values(by = 'x', ascending = False)
 ord_pagerank = list(work['x'])
 ord_incat = list(work['y'])
 
-fig, axs = plt.subplots(2, sharex=True)
+fig, axs = plt.subplots(2, sharex=True, figsize=(10, 9))
 fig.suptitle('Corrélation entre la proximité aux noeuds et les catégories')
 axs[0].plot(range(len(ord_pagerank)), ord_pagerank)
 
@@ -236,6 +236,9 @@ axs[0].set_ylabel("Proximité au noeud \"magnus carlsen\"")
 axs[1].set_ylabel("Nombre de noeuds dans les sous-catégories de \"échec\"")
 
 
+plt.savefig("chess.png", dpi=200, format = 'png')
+
+
 plt.show()
 
 
@@ -252,7 +255,7 @@ work = data_magnus.sort_values(by = 'x', ascending = False)
 ord_pagerank = list(work['x'])
 ord_incat = list(work['y'])
 
-fig, axs = plt.subplots(2, sharex=True)
+fig, axs = plt.subplots(2, sharex=True, figsize=(10, 9))
 fig.suptitle('Personalized pagerank avec un vecteur p0 orienté échec ou box')
 axs[0].plot(range(len(ord_pagerank)), ord_pagerank)
 
@@ -273,14 +276,16 @@ for i in range(len(ord_incat)):
 axs[1].plot(range(len(summed_incat)), summed_incat)
 axs[1].set_xscale("log")
 axs[1].set_yscale("linear")
-axs[0].axvline(x = 2000000, color = "black", linestyle='--')
-axs[1].axvline(x = 2000000, color = "black", linestyle='--')
+axs[0].axvline(x = 400000, color = "black", linestyle='--')
+axs[1].axvline(x = 400000, color = "black", linestyle='--')
 
 
 axs[1].set_xlabel("Rang du noeud")
 axs[0].set_ylabel("Valeur du pagerank")
 axs[1].set_ylabel("Nombre de noeuds dans les sous-catégories de \"échec\" et \"box\"")
 
+plt.savefig("chessbox.png", dpi=200, format = 'png')
+
 
 plt.show()
 
-- 
GitLab