diff --git a/Projet/CODE/apm/$ b/Projet/CODE/apm/$
new file mode 100644
index 0000000000000000000000000000000000000000..f2ff57bca5727ab4bcbc529bc656405b7cd5670e
--- /dev/null
+++ b/Projet/CODE/apm/$
@@ -0,0 +1,318 @@
+/**
+ * APPROXIMATE PATTERN MATCHING
+ *
+ * INF560 X2016
+ */
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#define APM_DEBUG 0
+
+char * read_input_file(char * filename, int * size)
+{
+    char * buf;
+    off_t fsize;
+    int fd = 0;
+    int n_bytes = 1;
+
+    /* Open the text file */
+    fd = open(filename, O_RDONLY);
+    if (fd == -1)
+    {
+        fprintf(stderr, "Unable to open the text file <%s>\n", filename);
+        return NULL;
+    }
+
+    /* Get the number of characters in the textfile */
+    fsize = lseek(fd, 0, SEEK_END);
+    lseek(fd, 0, SEEK_SET);
+    /* TODO check return of lseek */
+
+#if APM_DEBUG
+    printf("File length: %lld\n", fsize);
+#endif
+
+    /* Allocate data to copy the target text */
+    buf = (char *)malloc(fsize * sizeof (char));
+    if (buf == NULL)
+    {
+        fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n",
+                 fsize);
+        return NULL;
+    }
+
+    n_bytes = read(fd, buf, fsize);
+    if (n_bytes != fsize)
+    {
+        fprintf(stderr,
+          "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n",
+          fsize, n_bytes);
+        return NULL;
+    }
+
+#if APM_DEBUG
+    printf("Number of read bytes: %d\n", n_bytes);
+#endif
+
+    *size = n_bytes;
+    close(fd);
+    return buf;
+}
+
+#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c)))
+
+int levenshtein(char *s1, char *s2, int len, int * column, int approx_factor)
+{
+    int x, y, lastdiag, olddiag;
+
+    for (y = 1; y <= len; y++)
+    {
+        column[y] = y;
+    }
+    for (x = 1; x <= len; x++)
+    {
+        column[0] = x;
+        lastdiag = x-1;
+        for (y = 1; y <= len; y++)
+        {
+            olddiag = column[y];
+            column[y] = MIN3(column[y] + 1,
+                            column[y-1] + 1,
+                            lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1));
+            lastdiag = olddiag;
+        }
+    }
+    return (column[len] <= approx_factor) ? 1 : 0;
+}
+
+__global__ void levenshtein_cu(char *find, char *buf, int len, int n_bytes
+		,int approx_factor, int* g_column, int* result_vec)
+{		
+		int tId = blockIdx.x * blockDim.x + threadIdx.x;//global thread id
+		if (tId > n_bytes)
+		{return;}//we are past the buffer length - do not process
+
+		//position s2 and column to the right position in the pre-allocated
+		//arrays
+		char* s2 = buf+tId;
+		int* column = g_column+tId*(len+1);	
+
+		//i do not understand this algorithm and god do i not want to. 
+		int x, y, lastdiag, olddiag;
+
+    for (y = 1; y <= len; y++)
+    {
+        column[y] = y;
+    }
+    for (x = 1; x <= len; x++)
+    {
+        column[0] = x;
+        lastdiag = x-1;
+        for (y = 1; y <= len; y++)
+        {
+            olddiag = column[y];
+            column[y] = MIN3(column[y] + 1,
+                            column[y-1] + 1,
+                            lastdiag + (find[y-1] == s2[x-1] ? 0 : 1));
+            lastdiag = olddiag;
+        }
+    }
+	
+		if (column[len] <= approx_factor)
+		{result_vec[tId] = 1;}//its a match
+} 
+
+int main(int argc, char ** argv)
+{
+    char ** pattern;
+    char * filename;
+    int approx_factor = 0;
+    int nb_patterns = 0;
+    
+		char * buf;
+    struct timeval t1, t2;
+    double duration;
+    int n_bytes;
+    int * n_matches;
+		
+		//cuda-related vars
+		char *buf_dev;
+		int NTBB = 1024; //Number of threads by blocks
+		int NB;//Number of blocks
+		cudaError_t cu_err;
+
+    /* Check number of arguments */
+    if (argc < 4)
+    {
+        printf("Usage: %s approximation_factor "
+                "dna_database pattern1 pattern2 ...\n",
+                argv[0]);
+        return 1;
+    }
+
+    approx_factor = atoi(argv[1]);/* Get the distance factor */
+    filename = argv[2];/* Grab the filename containing the target text */
+    nb_patterns = argc - 3;/* Get the number of patterns to search for */
+    
+		pattern = (char **)malloc(nb_patterns * sizeof(char*));    
+		if (pattern == NULL)/*Fill the pattern*/
+    {
+        fprintf(stderr,
+                 "Unable to allocate array of pattern of size %d\n",
+                 nb_patterns);
+        return 1;
+    }
+
+    for (int i=0; i < nb_patterns; i++) /* Grab the patterns */
+    {
+        int l;
+        l = strlen(argv[i+3]);
+    
+				if (l <= 0)
+        {
+            fprintf(stderr, "Error while parsing argument %d\n", i+3);
+            return 1;
+        }
+        
+				pattern[i] = (char *)malloc((l+1) * sizeof(char));
+        if (pattern[i] == NULL)
+        {
+            fprintf(stderr, "Unable to allocate string of size %d\n", l);
+            return 1;
+        }
+
+        strncpy(pattern[i], argv[i+3], (l+1));
+    }
+
+
+    printf("Approximate Pattern Mathing: "
+            "looking for %d pattern(s) in file %s w/ distance of %d\n",
+            nb_patterns, filename, approx_factor);
+
+    buf = read_input_file(filename, &n_bytes);
+    if (buf == NULL)
+    {
+				fprintf(stderr, "Error: NULL pointer from reading input file.");
+        return 1;
+    }
+
+		cudaMalloc((void**)&buf_dev, n_bytes * sizeof(char));
+		if ((cu_err = cudaGetLastError()) != cudaSuccess)
+		{
+			fprintf(stderr, "Unable to allocate buffer on device: %s.\n"
+					, cudaGetErrorString(cu_err));
+			return ;
+		}
+
+		cudaMemcpy(NULL, buf, n_bytes, cudaMemcpyHostToDevice);
+		if ((cu_err = cudaGetLastError()) != cudaSuccess)
+		{
+			fprintf(stderr, "Unable to copy buffer onto device: %s.\n"
+					, cudaGetErrorString(cu_err));
+			return ;
+		}
+
+
+    n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/
+    if (n_matches == NULL)
+    {
+        fprintf(stderr, "Error: unable to allocate memory for %ldB\n",
+                 nb_patterns * sizeof(int));
+        return 1;
+    }
+
+    /*****
+     * BEGIN MAIN LOOP
+     ******/
+
+    /* Timer start */
+    gettimeofday(&t1, NULL);
+
+    for (int i = 0; i < nb_patterns; i++)
+    {
+        n_matches[i] = 0;
+				
+				//TODO err check
+        int size_pattern = strlen(pattern[i]);
+				char* pattern_dev;
+				cudaMalloc((void**)&pattern_dev, size_pattern);
+				
+				cudaMemcpy(pattern_dev, pattern[i]
+						, size_pattern, cudaMemcpyHostToDevice);
+
+				NB = (n_bytes / NTBB) + (((n_bytes % NTBB) > 0) ? 1 : 0);
+				//printf("n_bytes : %i, NB : %i ; nb_threads : %i\n"
+				//		, n_bytes, NB, NB*NTBB);	
+
+				//TODO err check
+				int * column_dev;
+        cudaMalloc((void**)&column_dev, (size_pattern+1)*NTBB*NB*sizeof(int));
+				
+
+
+				//TODO err check	
+				int * result_vec_dev; //result vectors.
+				cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(int));
+				
+				int * result_vec =(int*) malloc(NTBB*NB*sizeof(int));
+				memset(result_vec, 0, NTBB*NB*sizeof(int));
+				
+				cudaMemcpy(result_vec_dev, result_vec
+						, NTBB*NB, cudaMemcpyHostToDevice);
+
+
+
+				levenshtein_cu<<<NB,NTBB>>>(pattern_dev, buf_dev, size_pattern
+						, n_bytes, approx_factor, column_dev, result_vec_dev);
+				
+				//get result
+				cudaMemcpy(result_vec, result_vec_dev
+						, NTBB*NB*sizeof(int), cudaMemcpyDeviceToHost);
+				
+				for (int j = 0 ; j<n_bytes ; j++)
+				{
+					/* Highly advanced debbugging (printfs)
+					int column[size_pattern+1]; 
+					int d = 
+						levenshtein(pattern[i], buf+j, size_pattern, column, approx_factor);
+					//printf("%d",d);
+					if (d != result_vec[j])
+					{
+						printf("MISMATCH FOUND %s should have match at %i :"
+								,pattern[i], j);
+						printf("%.*s\n",size_pattern,&buf[j]);
+					} 
+					*/
+					n_matches[i] += result_vec[j];
+				} 
+
+				//free memory - and then get onto the next pattern.
+				cudaFree(pattern_dev);
+				cudaFree(column_dev);
+				cudaFree(result_vec_dev);
+				free(result_vec);
+    }
+
+    /* Timer stop */
+    gettimeofday(&t2, NULL);
+
+    duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6);
+
+    printf("APM done in %lf s\n", duration);
+
+    /*****
+     * END MAIN LOOP
+     ******/
+
+    for (int i=0; i < nb_patterns; i++)
+    {
+        printf("Number of matches for pattern <%s>: %d\n",
+                pattern[i], n_matches[i]);
+    }
+
+    return 0;
+}
diff --git a/Projet/CODE/apm/src/apm_gpu.cu b/Projet/CODE/apm/src/apm_gpu.cu
index b6fce6030314c3e75e7509862e6758b4bd66fed8..9526660230da152ca1de0dfcb76e364b8653cd69 100644
--- a/Projet/CODE/apm/src/apm_gpu.cu
+++ b/Projet/CODE/apm/src/apm_gpu.cu
@@ -90,7 +90,7 @@ int levenshtein(char *s1, char *s2, int len, int * column, int approx_factor)
 }
 
 __global__ void levenshtein_cu(char *find, char *buf, int len, int n_bytes
-		,int approx_factor, int* g_column, int* result_vec)
+		,int approx_factor, int* g_column, char* result_vec)
 {		
 		int tId = blockIdx.x * blockDim.x + threadIdx.x;//global thread id
 		if (tId > n_bytes)
@@ -142,7 +142,8 @@ int main(int argc, char ** argv)
 		//cuda-related vars
 		char *buf_dev;
 		int NTBB = 1024; //Number of threads by blocks
-		int NB;//Number of blocks 
+		int NB;//Number of blocks
+		cudaError_t cu_err;
 
     /* Check number of arguments */
     if (argc < 4)
@@ -198,9 +199,22 @@ int main(int argc, char ** argv)
 				fprintf(stderr, "Error: NULL pointer from reading input file.");
         return 1;
     }
+
 		cudaMalloc((void**)&buf_dev, n_bytes * sizeof(char));
-		cudaMemcpy(buf_dev, buf, n_bytes, cudaMemcpyHostToDevice);
+		if ((cu_err = cudaGetLastError()) != cudaSuccess)
+		{
+			fprintf(stderr, "Unable to allocate buffer on device: %s.\n"
+					, cudaGetErrorString(cu_err));
+			return 1;
+		}
 
+		cudaMemcpy(buf_dev, buf, n_bytes, cudaMemcpyHostToDevice);
+		if ((cu_err = cudaGetLastError()) != cudaSuccess)
+		{
+			fprintf(stderr, "Unable to copy buffer onto device: %s.\n"
+					, cudaGetErrorString(cu_err));
+			return 1;
+		}
 
     n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/
     if (n_matches == NULL)
@@ -215,49 +229,95 @@ int main(int argc, char ** argv)
      ******/
 
     /* Timer start */
+		//TODO MAYBE count time with cudaevents (see older tp)
     gettimeofday(&t1, NULL);
 
     for (int i = 0; i < nb_patterns; i++)
     {
         n_matches[i] = 0;
 				
-				//TODO err check
         int size_pattern = strlen(pattern[i]);
 				char* pattern_dev;
+
 				cudaMalloc((void**)&pattern_dev, size_pattern);
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to allocate pattern on device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
 				
 				cudaMemcpy(pattern_dev, pattern[i]
 						, size_pattern, cudaMemcpyHostToDevice);
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to copy pattern onto device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
 
+				
 				NB = (n_bytes / NTBB) + (((n_bytes % NTBB) > 0) ? 1 : 0);
-				//printf("n_bytes : %i, NB : %i ; nb_threads : %i\n"
-				//		, n_bytes, NB, NB*NTBB);	
+
 
 				//TODO err check
 				int * column_dev;
         cudaMalloc((void**)&column_dev, (size_pattern+1)*NTBB*NB*sizeof(int));
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to allocate column vector on device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
 				
 
 
 				//TODO err check	
-				int * result_vec_dev; //result vectors.
-				cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(int));
-				
-				int * result_vec =(int*) malloc(NTBB*NB*sizeof(int));
-				memset(result_vec, 0, NTBB*NB*sizeof(int));
+				char * result_vec_dev; //result vectors.
+				cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(char));
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to allocate result vector on device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
 				
-				cudaMemcpy(result_vec_dev, result_vec
-						, NTBB*NB, cudaMemcpyHostToDevice);
+				cudaMemset(result_vec_dev, 0, NTBB*NB*sizeof(char));
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to init result vector to 0 on device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
 
+				char* result_vec =(char*) malloc(NTBB*NB*sizeof(char));
+				if (result_vec == NULL)
+        {
+            fprintf(stderr, "Unable to allocate result vec on host.");
+            return 1;
+        }
 
 
 				levenshtein_cu<<<NB,NTBB>>>(pattern_dev, buf_dev, size_pattern
 						, n_bytes, approx_factor, column_dev, result_vec_dev);
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Kernel execution of levenshtein_cu failed: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
 				
 				//get result
 				cudaMemcpy(result_vec, result_vec_dev
-						, NTBB*NB*sizeof(int), cudaMemcpyDeviceToHost);
+						, NTBB*NB*sizeof(char), cudaMemcpyDeviceToHost);
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to retrieve result vector on host: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
 				
+				//TODO MAYBE - reduction on gpu.	
 				for (int j = 0 ; j<n_bytes ; j++)
 				{
 					/* Highly advanced debbugging (printfs)
@@ -277,8 +337,29 @@ int main(int argc, char ** argv)
 
 				//free memory - and then get onto the next pattern.
 				cudaFree(pattern_dev);
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to free memory for pattern on device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
+
 				cudaFree(column_dev);
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to free memory for column on device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
+
 				cudaFree(result_vec_dev);
+				if ((cu_err = cudaGetLastError()) != cudaSuccess)
+				{
+					fprintf(stderr, "Unable to free memory for result on device: %s.\n"
+							, cudaGetErrorString(cu_err));
+					return 1;
+				}
+
 				free(result_vec);
     }