diff --git a/Projet/CODE/apm/Makefile b/Projet/CODE/apm/Makefile index 5e1a6860bf1eb672380f1774015d3c3e666fdca9..41bb027c1298c037c9fc7130565ee1a0a6c953de 100644 --- a/Projet/CODE/apm/Makefile +++ b/Projet/CODE/apm/Makefile @@ -1,14 +1,15 @@ INCLUDED=include CC=gcc -CFLAGS=-O3 -I $(INCLUDED) -pg -g +CFLAGS=-O3 -I $(INCLUDED) -pg -g -Wall CFLAGS_OMP= -fopenmp -DUSE_OMP $(CFLAGS) +CFLAGS_CU=-O3 -Xcompiler "$(CFLAGS)" LDFLAGS= OBJ=obj/apm.o .PHONY: all -all: dir apm apm_omp #apm_gpu +all: dir apm apm_omp apm_gpu dir: mkdir -p obj @@ -25,8 +26,8 @@ apm: obj/apm.o apm_omp: obj/apm_omp.o $(CC) $(CFLAGS_OMP) $(LDFLAGS) -o $@ $^ -apm_gpu: - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ +apm_gpu: src/apm_gpu.cu + nvcc $(CFLAGS_CU) $(LDFLAGS) -o $@ $^ .PHONY: clean clean: diff --git a/Projet/CODE/apm/src/apm.c b/Projet/CODE/apm/src/apm.c index 6dc52b69bcf486c33aac542794ae841c0bcf551e..f1bde78a2ff79483d7738581e9a20a48b4ec7317 100644 --- a/Projet/CODE/apm/src/apm.c +++ b/Projet/CODE/apm/src/apm.c @@ -40,7 +40,7 @@ char * read_input_file(char * filename, int * size) buf = (char *)malloc(fsize * sizeof (char)); if (buf == NULL) { - fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", + fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n", fsize); return NULL; } @@ -49,7 +49,7 @@ char * read_input_file(char * filename, int * size) if (n_bytes != fsize) { fprintf(stderr, - "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", + "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n", fsize, n_bytes); return NULL; } diff --git a/Projet/CODE/apm/src/apm_gpu.cu b/Projet/CODE/apm/src/apm_gpu.cu new file mode 100644 index 0000000000000000000000000000000000000000..b6fce6030314c3e75e7509862e6758b4bd66fed8 --- /dev/null +++ b/Projet/CODE/apm/src/apm_gpu.cu @@ -0,0 +1,303 @@ +/** + * APPROXIMATE PATTERN MATCHING + * + * INF560 X2016 + */ +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/time.h> + +#define APM_DEBUG 0 + +char * read_input_file(char * filename, int * size) +{ + char * buf; + off_t fsize; + int fd = 0; + int n_bytes = 1; + + /* Open the text file */ + fd = open(filename, O_RDONLY); + if (fd == -1) + { + fprintf(stderr, "Unable to open the text file <%s>\n", filename); + return NULL; + } + + /* Get the number of characters in the textfile */ + fsize = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + /* TODO check return of lseek */ + +#if APM_DEBUG + printf("File length: %lld\n", fsize); +#endif + + /* Allocate data to copy the target text */ + buf = (char *)malloc(fsize * sizeof (char)); + if (buf == NULL) + { + fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n", + fsize); + return NULL; + } + + n_bytes = read(fd, buf, fsize); + if (n_bytes != fsize) + { + fprintf(stderr, + "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n", + fsize, n_bytes); + return NULL; + } + +#if APM_DEBUG + printf("Number of read bytes: %d\n", n_bytes); +#endif + + *size = n_bytes; + close(fd); + return buf; +} + +#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c))) + +int levenshtein(char *s1, char *s2, int len, int * column, int approx_factor) +{ + int x, y, lastdiag, olddiag; + + for (y = 1; y <= len; y++) + { + column[y] = y; + } + for (x = 1; x <= len; x++) + { + column[0] = x; + lastdiag = x-1; + for (y = 1; y <= len; y++) + { + olddiag = column[y]; + column[y] = MIN3(column[y] + 1, + column[y-1] + 1, + lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1)); + lastdiag = olddiag; + } + } + return (column[len] <= approx_factor) ? 1 : 0; +} + +__global__ void levenshtein_cu(char *find, char *buf, int len, int n_bytes + ,int approx_factor, int* g_column, int* result_vec) +{ + int tId = blockIdx.x * blockDim.x + threadIdx.x;//global thread id + if (tId > n_bytes) + {return;}//we are past the buffer length - do not process + + //position s2 and column to the right position in the pre-allocated + //arrays + char* s2 = buf+tId; + int* column = g_column+tId*(len+1); + + //i do not understand this algorithm and god do i not want to. + int x, y, lastdiag, olddiag; + + for (y = 1; y <= len; y++) + { + column[y] = y; + } + for (x = 1; x <= len; x++) + { + column[0] = x; + lastdiag = x-1; + for (y = 1; y <= len; y++) + { + olddiag = column[y]; + column[y] = MIN3(column[y] + 1, + column[y-1] + 1, + lastdiag + (find[y-1] == s2[x-1] ? 0 : 1)); + lastdiag = olddiag; + } + } + + if (column[len] <= approx_factor) + {result_vec[tId] = 1;}//its a match +} + +int main(int argc, char ** argv) +{ + char ** pattern; + char * filename; + int approx_factor = 0; + int nb_patterns = 0; + + char * buf; + struct timeval t1, t2; + double duration; + int n_bytes; + int * n_matches; + + //cuda-related vars + char *buf_dev; + int NTBB = 1024; //Number of threads by blocks + int NB;//Number of blocks + + /* Check number of arguments */ + if (argc < 4) + { + printf("Usage: %s approximation_factor " + "dna_database pattern1 pattern2 ...\n", + argv[0]); + return 1; + } + + approx_factor = atoi(argv[1]);/* Get the distance factor */ + filename = argv[2];/* Grab the filename containing the target text */ + nb_patterns = argc - 3;/* Get the number of patterns to search for */ + + pattern = (char **)malloc(nb_patterns * sizeof(char*)); + if (pattern == NULL)/*Fill the pattern*/ + { + fprintf(stderr, + "Unable to allocate array of pattern of size %d\n", + nb_patterns); + return 1; + } + + for (int i=0; i < nb_patterns; i++) /* Grab the patterns */ + { + int l; + l = strlen(argv[i+3]); + + if (l <= 0) + { + fprintf(stderr, "Error while parsing argument %d\n", i+3); + return 1; + } + + pattern[i] = (char *)malloc((l+1) * sizeof(char)); + if (pattern[i] == NULL) + { + fprintf(stderr, "Unable to allocate string of size %d\n", l); + return 1; + } + + strncpy(pattern[i], argv[i+3], (l+1)); + } + + + printf("Approximate Pattern Mathing: " + "looking for %d pattern(s) in file %s w/ distance of %d\n", + nb_patterns, filename, approx_factor); + + buf = read_input_file(filename, &n_bytes); + if (buf == NULL) + { + fprintf(stderr, "Error: NULL pointer from reading input file."); + return 1; + } + cudaMalloc((void**)&buf_dev, n_bytes * sizeof(char)); + cudaMemcpy(buf_dev, buf, n_bytes, cudaMemcpyHostToDevice); + + + n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ + if (n_matches == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for %ldB\n", + nb_patterns * sizeof(int)); + return 1; + } + + /***** + * BEGIN MAIN LOOP + ******/ + + /* Timer start */ + gettimeofday(&t1, NULL); + + for (int i = 0; i < nb_patterns; i++) + { + n_matches[i] = 0; + + //TODO err check + int size_pattern = strlen(pattern[i]); + char* pattern_dev; + cudaMalloc((void**)&pattern_dev, size_pattern); + + cudaMemcpy(pattern_dev, pattern[i] + , size_pattern, cudaMemcpyHostToDevice); + + NB = (n_bytes / NTBB) + (((n_bytes % NTBB) > 0) ? 1 : 0); + //printf("n_bytes : %i, NB : %i ; nb_threads : %i\n" + // , n_bytes, NB, NB*NTBB); + + //TODO err check + int * column_dev; + cudaMalloc((void**)&column_dev, (size_pattern+1)*NTBB*NB*sizeof(int)); + + + + //TODO err check + int * result_vec_dev; //result vectors. + cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(int)); + + int * result_vec =(int*) malloc(NTBB*NB*sizeof(int)); + memset(result_vec, 0, NTBB*NB*sizeof(int)); + + cudaMemcpy(result_vec_dev, result_vec + , NTBB*NB, cudaMemcpyHostToDevice); + + + + levenshtein_cu<<<NB,NTBB>>>(pattern_dev, buf_dev, size_pattern + , n_bytes, approx_factor, column_dev, result_vec_dev); + + //get result + cudaMemcpy(result_vec, result_vec_dev + , NTBB*NB*sizeof(int), cudaMemcpyDeviceToHost); + + for (int j = 0 ; j<n_bytes ; j++) + { + /* Highly advanced debbugging (printfs) + int column[size_pattern+1]; + int d = + levenshtein(pattern[i], buf+j, size_pattern, column, approx_factor); + //printf("%d",d); + if (d != result_vec[j]) + { + printf("MISMATCH FOUND %s should have match at %i :" + ,pattern[i], j); + printf("%.*s\n",size_pattern,&buf[j]); + } + */ + n_matches[i] += result_vec[j]; + } + + //free memory - and then get onto the next pattern. + cudaFree(pattern_dev); + cudaFree(column_dev); + cudaFree(result_vec_dev); + free(result_vec); + } + + /* Timer stop */ + gettimeofday(&t2, NULL); + + duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6); + + printf("APM done in %lf s\n", duration); + + /***** + * END MAIN LOOP + ******/ + + for (int i=0; i < nb_patterns; i++) + { + printf("Number of matches for pattern <%s>: %d\n", + pattern[i], n_matches[i]); + } + + return 0; +} diff --git a/Projet/CODE/apm/src/apm_omp.c b/Projet/CODE/apm/src/apm_omp.c index d40a9cdef1c8db1582b3b5efb5a73b0e8aa58012..3162e396f16e1e7c4af458cc8c88b0770475705f 100644 --- a/Projet/CODE/apm/src/apm_omp.c +++ b/Projet/CODE/apm/src/apm_omp.c @@ -42,7 +42,7 @@ char * read_input_file(char * filename, int * size) buf = (char *)malloc(fsize * sizeof (char)); if (buf == NULL) { - fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", + fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n", fsize); return NULL; } @@ -51,7 +51,7 @@ char * read_input_file(char * filename, int * size) if (n_bytes != fsize) { fprintf(stderr, - "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", + "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n", fsize, n_bytes); return NULL; } @@ -188,7 +188,7 @@ int main(int argc, char ** argv) { fprintf(stderr, "Error: unable to allocate memory for column (%ldB)\n", - (size_pattern+1) * sizeof(int)); + (size_pattern+1) * sizeof(int) * n_bytes); return 1; } @@ -204,7 +204,6 @@ int main(int argc, char ** argv) } #endif - size_pattern; if (n_bytes - j < size_pattern) { //size = n_bytes - j;