diff --git a/Projet/CODE/apm/Makefile b/Projet/CODE/apm/Makefile
index 5e1a6860bf1eb672380f1774015d3c3e666fdca9..41bb027c1298c037c9fc7130565ee1a0a6c953de 100644
--- a/Projet/CODE/apm/Makefile
+++ b/Projet/CODE/apm/Makefile
@@ -1,14 +1,15 @@
 INCLUDED=include
 
 CC=gcc
-CFLAGS=-O3 -I $(INCLUDED) -pg -g
+CFLAGS=-O3 -I $(INCLUDED) -pg -g -Wall
 CFLAGS_OMP= -fopenmp -DUSE_OMP $(CFLAGS)
+CFLAGS_CU=-O3 -Xcompiler "$(CFLAGS)"
 LDFLAGS=
 
 OBJ=obj/apm.o
 
 .PHONY: all
-all: dir apm apm_omp #apm_gpu
+all: dir apm apm_omp apm_gpu
 
 dir:
 	mkdir -p obj
@@ -25,8 +26,8 @@ apm: obj/apm.o
 apm_omp: obj/apm_omp.o
 	$(CC) $(CFLAGS_OMP) $(LDFLAGS) -o $@ $^
 
-apm_gpu:
-	$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^
+apm_gpu: src/apm_gpu.cu
+	nvcc $(CFLAGS_CU) $(LDFLAGS) -o $@ $^
 
 .PHONY: clean
 clean:
diff --git a/Projet/CODE/apm/src/apm.c b/Projet/CODE/apm/src/apm.c
index 6dc52b69bcf486c33aac542794ae841c0bcf551e..f1bde78a2ff79483d7738581e9a20a48b4ec7317 100644
--- a/Projet/CODE/apm/src/apm.c
+++ b/Projet/CODE/apm/src/apm.c
@@ -40,7 +40,7 @@ char * read_input_file(char * filename, int * size)
     buf = (char *)malloc(fsize * sizeof (char));
     if (buf == NULL)
     {
-        fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n",
+        fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n",
                  fsize);
         return NULL;
     }
@@ -49,7 +49,7 @@ char * read_input_file(char * filename, int * size)
     if (n_bytes != fsize)
     {
         fprintf(stderr,
-          "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n",
+          "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n",
           fsize, n_bytes);
         return NULL;
     }
diff --git a/Projet/CODE/apm/src/apm_gpu.cu b/Projet/CODE/apm/src/apm_gpu.cu
new file mode 100644
index 0000000000000000000000000000000000000000..b6fce6030314c3e75e7509862e6758b4bd66fed8
--- /dev/null
+++ b/Projet/CODE/apm/src/apm_gpu.cu
@@ -0,0 +1,303 @@
+/**
+ * APPROXIMATE PATTERN MATCHING
+ *
+ * INF560 X2016
+ */
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+
+#define APM_DEBUG 0
+
+char * read_input_file(char * filename, int * size)
+{
+    char * buf;
+    off_t fsize;
+    int fd = 0;
+    int n_bytes = 1;
+
+    /* Open the text file */
+    fd = open(filename, O_RDONLY);
+    if (fd == -1)
+    {
+        fprintf(stderr, "Unable to open the text file <%s>\n", filename);
+        return NULL;
+    }
+
+    /* Get the number of characters in the textfile */
+    fsize = lseek(fd, 0, SEEK_END);
+    lseek(fd, 0, SEEK_SET);
+    /* TODO check return of lseek */
+
+#if APM_DEBUG
+    printf("File length: %lld\n", fsize);
+#endif
+
+    /* Allocate data to copy the target text */
+    buf = (char *)malloc(fsize * sizeof (char));
+    if (buf == NULL)
+    {
+        fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n",
+                 fsize);
+        return NULL;
+    }
+
+    n_bytes = read(fd, buf, fsize);
+    if (n_bytes != fsize)
+    {
+        fprintf(stderr,
+          "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n",
+          fsize, n_bytes);
+        return NULL;
+    }
+
+#if APM_DEBUG
+    printf("Number of read bytes: %d\n", n_bytes);
+#endif
+
+    *size = n_bytes;
+    close(fd);
+    return buf;
+}
+
+#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c)))
+
+int levenshtein(char *s1, char *s2, int len, int * column, int approx_factor)
+{
+    int x, y, lastdiag, olddiag;
+
+    for (y = 1; y <= len; y++)
+    {
+        column[y] = y;
+    }
+    for (x = 1; x <= len; x++)
+    {
+        column[0] = x;
+        lastdiag = x-1;
+        for (y = 1; y <= len; y++)
+        {
+            olddiag = column[y];
+            column[y] = MIN3(column[y] + 1,
+                            column[y-1] + 1,
+                            lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1));
+            lastdiag = olddiag;
+        }
+    }
+    return (column[len] <= approx_factor) ? 1 : 0;
+}
+
+__global__ void levenshtein_cu(char *find, char *buf, int len, int n_bytes
+		,int approx_factor, int* g_column, int* result_vec)
+{		
+		int tId = blockIdx.x * blockDim.x + threadIdx.x;//global thread id
+		if (tId > n_bytes)
+		{return;}//we are past the buffer length - do not process
+
+		//position s2 and column to the right position in the pre-allocated
+		//arrays
+		char* s2 = buf+tId;
+		int* column = g_column+tId*(len+1);	
+
+		//i do not understand this algorithm and god do i not want to. 
+		int x, y, lastdiag, olddiag;
+
+    for (y = 1; y <= len; y++)
+    {
+        column[y] = y;
+    }
+    for (x = 1; x <= len; x++)
+    {
+        column[0] = x;
+        lastdiag = x-1;
+        for (y = 1; y <= len; y++)
+        {
+            olddiag = column[y];
+            column[y] = MIN3(column[y] + 1,
+                            column[y-1] + 1,
+                            lastdiag + (find[y-1] == s2[x-1] ? 0 : 1));
+            lastdiag = olddiag;
+        }
+    }
+	
+		if (column[len] <= approx_factor)
+		{result_vec[tId] = 1;}//its a match
+} 
+
+int main(int argc, char ** argv)
+{
+    char ** pattern;
+    char * filename;
+    int approx_factor = 0;
+    int nb_patterns = 0;
+    
+		char * buf;
+    struct timeval t1, t2;
+    double duration;
+    int n_bytes;
+    int * n_matches;
+		
+		//cuda-related vars
+		char *buf_dev;
+		int NTBB = 1024; //Number of threads by blocks
+		int NB;//Number of blocks 
+
+    /* Check number of arguments */
+    if (argc < 4)
+    {
+        printf("Usage: %s approximation_factor "
+                "dna_database pattern1 pattern2 ...\n",
+                argv[0]);
+        return 1;
+    }
+
+    approx_factor = atoi(argv[1]);/* Get the distance factor */
+    filename = argv[2];/* Grab the filename containing the target text */
+    nb_patterns = argc - 3;/* Get the number of patterns to search for */
+    
+		pattern = (char **)malloc(nb_patterns * sizeof(char*));    
+		if (pattern == NULL)/*Fill the pattern*/
+    {
+        fprintf(stderr,
+                 "Unable to allocate array of pattern of size %d\n",
+                 nb_patterns);
+        return 1;
+    }
+
+    for (int i=0; i < nb_patterns; i++) /* Grab the patterns */
+    {
+        int l;
+        l = strlen(argv[i+3]);
+    
+				if (l <= 0)
+        {
+            fprintf(stderr, "Error while parsing argument %d\n", i+3);
+            return 1;
+        }
+        
+				pattern[i] = (char *)malloc((l+1) * sizeof(char));
+        if (pattern[i] == NULL)
+        {
+            fprintf(stderr, "Unable to allocate string of size %d\n", l);
+            return 1;
+        }
+
+        strncpy(pattern[i], argv[i+3], (l+1));
+    }
+
+
+    printf("Approximate Pattern Mathing: "
+            "looking for %d pattern(s) in file %s w/ distance of %d\n",
+            nb_patterns, filename, approx_factor);
+
+    buf = read_input_file(filename, &n_bytes);
+    if (buf == NULL)
+    {
+				fprintf(stderr, "Error: NULL pointer from reading input file.");
+        return 1;
+    }
+		cudaMalloc((void**)&buf_dev, n_bytes * sizeof(char));
+		cudaMemcpy(buf_dev, buf, n_bytes, cudaMemcpyHostToDevice);
+
+
+    n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/
+    if (n_matches == NULL)
+    {
+        fprintf(stderr, "Error: unable to allocate memory for %ldB\n",
+                 nb_patterns * sizeof(int));
+        return 1;
+    }
+
+    /*****
+     * BEGIN MAIN LOOP
+     ******/
+
+    /* Timer start */
+    gettimeofday(&t1, NULL);
+
+    for (int i = 0; i < nb_patterns; i++)
+    {
+        n_matches[i] = 0;
+				
+				//TODO err check
+        int size_pattern = strlen(pattern[i]);
+				char* pattern_dev;
+				cudaMalloc((void**)&pattern_dev, size_pattern);
+				
+				cudaMemcpy(pattern_dev, pattern[i]
+						, size_pattern, cudaMemcpyHostToDevice);
+
+				NB = (n_bytes / NTBB) + (((n_bytes % NTBB) > 0) ? 1 : 0);
+				//printf("n_bytes : %i, NB : %i ; nb_threads : %i\n"
+				//		, n_bytes, NB, NB*NTBB);	
+
+				//TODO err check
+				int * column_dev;
+        cudaMalloc((void**)&column_dev, (size_pattern+1)*NTBB*NB*sizeof(int));
+				
+
+
+				//TODO err check	
+				int * result_vec_dev; //result vectors.
+				cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(int));
+				
+				int * result_vec =(int*) malloc(NTBB*NB*sizeof(int));
+				memset(result_vec, 0, NTBB*NB*sizeof(int));
+				
+				cudaMemcpy(result_vec_dev, result_vec
+						, NTBB*NB, cudaMemcpyHostToDevice);
+
+
+
+				levenshtein_cu<<<NB,NTBB>>>(pattern_dev, buf_dev, size_pattern
+						, n_bytes, approx_factor, column_dev, result_vec_dev);
+				
+				//get result
+				cudaMemcpy(result_vec, result_vec_dev
+						, NTBB*NB*sizeof(int), cudaMemcpyDeviceToHost);
+				
+				for (int j = 0 ; j<n_bytes ; j++)
+				{
+					/* Highly advanced debbugging (printfs)
+					int column[size_pattern+1]; 
+					int d = 
+						levenshtein(pattern[i], buf+j, size_pattern, column, approx_factor);
+					//printf("%d",d);
+					if (d != result_vec[j])
+					{
+						printf("MISMATCH FOUND %s should have match at %i :"
+								,pattern[i], j);
+						printf("%.*s\n",size_pattern,&buf[j]);
+					} 
+					*/
+					n_matches[i] += result_vec[j];
+				} 
+
+				//free memory - and then get onto the next pattern.
+				cudaFree(pattern_dev);
+				cudaFree(column_dev);
+				cudaFree(result_vec_dev);
+				free(result_vec);
+    }
+
+    /* Timer stop */
+    gettimeofday(&t2, NULL);
+
+    duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6);
+
+    printf("APM done in %lf s\n", duration);
+
+    /*****
+     * END MAIN LOOP
+     ******/
+
+    for (int i=0; i < nb_patterns; i++)
+    {
+        printf("Number of matches for pattern <%s>: %d\n",
+                pattern[i], n_matches[i]);
+    }
+
+    return 0;
+}
diff --git a/Projet/CODE/apm/src/apm_omp.c b/Projet/CODE/apm/src/apm_omp.c
index d40a9cdef1c8db1582b3b5efb5a73b0e8aa58012..3162e396f16e1e7c4af458cc8c88b0770475705f 100644
--- a/Projet/CODE/apm/src/apm_omp.c
+++ b/Projet/CODE/apm/src/apm_omp.c
@@ -42,7 +42,7 @@ char * read_input_file(char * filename, int * size)
     buf = (char *)malloc(fsize * sizeof (char));
     if (buf == NULL)
     {
-        fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n",
+        fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n",
                  fsize);
         return NULL;
     }
@@ -51,7 +51,7 @@ char * read_input_file(char * filename, int * size)
     if (n_bytes != fsize)
     {
         fprintf(stderr,
-          "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n",
+          "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n",
           fsize, n_bytes);
         return NULL;
     }
@@ -188,7 +188,7 @@ int main(int argc, char ** argv)
         {
             fprintf(stderr, 
 								"Error: unable to allocate memory for column (%ldB)\n",
-                (size_pattern+1) * sizeof(int));
+                (size_pattern+1) * sizeof(int) * n_bytes);
             return 1;
         }
 
@@ -204,7 +204,6 @@ int main(int argc, char ** argv)
             }
 #endif
 
-            size_pattern;
             if (n_bytes - j < size_pattern)
             {
                 //size = n_bytes - j;