diff --git a/Projet/CODE/apm/$ b/Projet/CODE/apm/$ new file mode 100644 index 0000000000000000000000000000000000000000..f2ff57bca5727ab4bcbc529bc656405b7cd5670e --- /dev/null +++ b/Projet/CODE/apm/$ @@ -0,0 +1,318 @@ +/** + * APPROXIMATE PATTERN MATCHING + * + * INF560 X2016 + */ +#include <string.h> +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <unistd.h> +#include <sys/time.h> + +#define APM_DEBUG 0 + +char * read_input_file(char * filename, int * size) +{ + char * buf; + off_t fsize; + int fd = 0; + int n_bytes = 1; + + /* Open the text file */ + fd = open(filename, O_RDONLY); + if (fd == -1) + { + fprintf(stderr, "Unable to open the text file <%s>\n", filename); + return NULL; + } + + /* Get the number of characters in the textfile */ + fsize = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + /* TODO check return of lseek */ + +#if APM_DEBUG + printf("File length: %lld\n", fsize); +#endif + + /* Allocate data to copy the target text */ + buf = (char *)malloc(fsize * sizeof (char)); + if (buf == NULL) + { + fprintf(stderr, "Unable to allocate %ld byte(s) for main array\n", + fsize); + return NULL; + } + + n_bytes = read(fd, buf, fsize); + if (n_bytes != fsize) + { + fprintf(stderr, + "Unable to copy %ld byte(s) from text file (%d byte(s) copied)\n", + fsize, n_bytes); + return NULL; + } + +#if APM_DEBUG + printf("Number of read bytes: %d\n", n_bytes); +#endif + + *size = n_bytes; + close(fd); + return buf; +} + +#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c))) + +int levenshtein(char *s1, char *s2, int len, int * column, int approx_factor) +{ + int x, y, lastdiag, olddiag; + + for (y = 1; y <= len; y++) + { + column[y] = y; + } + for (x = 1; x <= len; x++) + { + column[0] = x; + lastdiag = x-1; + for (y = 1; y <= len; y++) + { + olddiag = column[y]; + column[y] = MIN3(column[y] + 1, + column[y-1] + 1, + lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1)); + lastdiag = olddiag; + } + } + return (column[len] <= approx_factor) ? 1 : 0; +} + +__global__ void levenshtein_cu(char *find, char *buf, int len, int n_bytes + ,int approx_factor, int* g_column, int* result_vec) +{ + int tId = blockIdx.x * blockDim.x + threadIdx.x;//global thread id + if (tId > n_bytes) + {return;}//we are past the buffer length - do not process + + //position s2 and column to the right position in the pre-allocated + //arrays + char* s2 = buf+tId; + int* column = g_column+tId*(len+1); + + //i do not understand this algorithm and god do i not want to. + int x, y, lastdiag, olddiag; + + for (y = 1; y <= len; y++) + { + column[y] = y; + } + for (x = 1; x <= len; x++) + { + column[0] = x; + lastdiag = x-1; + for (y = 1; y <= len; y++) + { + olddiag = column[y]; + column[y] = MIN3(column[y] + 1, + column[y-1] + 1, + lastdiag + (find[y-1] == s2[x-1] ? 0 : 1)); + lastdiag = olddiag; + } + } + + if (column[len] <= approx_factor) + {result_vec[tId] = 1;}//its a match +} + +int main(int argc, char ** argv) +{ + char ** pattern; + char * filename; + int approx_factor = 0; + int nb_patterns = 0; + + char * buf; + struct timeval t1, t2; + double duration; + int n_bytes; + int * n_matches; + + //cuda-related vars + char *buf_dev; + int NTBB = 1024; //Number of threads by blocks + int NB;//Number of blocks + cudaError_t cu_err; + + /* Check number of arguments */ + if (argc < 4) + { + printf("Usage: %s approximation_factor " + "dna_database pattern1 pattern2 ...\n", + argv[0]); + return 1; + } + + approx_factor = atoi(argv[1]);/* Get the distance factor */ + filename = argv[2];/* Grab the filename containing the target text */ + nb_patterns = argc - 3;/* Get the number of patterns to search for */ + + pattern = (char **)malloc(nb_patterns * sizeof(char*)); + if (pattern == NULL)/*Fill the pattern*/ + { + fprintf(stderr, + "Unable to allocate array of pattern of size %d\n", + nb_patterns); + return 1; + } + + for (int i=0; i < nb_patterns; i++) /* Grab the patterns */ + { + int l; + l = strlen(argv[i+3]); + + if (l <= 0) + { + fprintf(stderr, "Error while parsing argument %d\n", i+3); + return 1; + } + + pattern[i] = (char *)malloc((l+1) * sizeof(char)); + if (pattern[i] == NULL) + { + fprintf(stderr, "Unable to allocate string of size %d\n", l); + return 1; + } + + strncpy(pattern[i], argv[i+3], (l+1)); + } + + + printf("Approximate Pattern Mathing: " + "looking for %d pattern(s) in file %s w/ distance of %d\n", + nb_patterns, filename, approx_factor); + + buf = read_input_file(filename, &n_bytes); + if (buf == NULL) + { + fprintf(stderr, "Error: NULL pointer from reading input file."); + return 1; + } + + cudaMalloc((void**)&buf_dev, n_bytes * sizeof(char)); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to allocate buffer on device: %s.\n" + , cudaGetErrorString(cu_err)); + return ; + } + + cudaMemcpy(NULL, buf, n_bytes, cudaMemcpyHostToDevice); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to copy buffer onto device: %s.\n" + , cudaGetErrorString(cu_err)); + return ; + } + + + n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ + if (n_matches == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for %ldB\n", + nb_patterns * sizeof(int)); + return 1; + } + + /***** + * BEGIN MAIN LOOP + ******/ + + /* Timer start */ + gettimeofday(&t1, NULL); + + for (int i = 0; i < nb_patterns; i++) + { + n_matches[i] = 0; + + //TODO err check + int size_pattern = strlen(pattern[i]); + char* pattern_dev; + cudaMalloc((void**)&pattern_dev, size_pattern); + + cudaMemcpy(pattern_dev, pattern[i] + , size_pattern, cudaMemcpyHostToDevice); + + NB = (n_bytes / NTBB) + (((n_bytes % NTBB) > 0) ? 1 : 0); + //printf("n_bytes : %i, NB : %i ; nb_threads : %i\n" + // , n_bytes, NB, NB*NTBB); + + //TODO err check + int * column_dev; + cudaMalloc((void**)&column_dev, (size_pattern+1)*NTBB*NB*sizeof(int)); + + + + //TODO err check + int * result_vec_dev; //result vectors. + cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(int)); + + int * result_vec =(int*) malloc(NTBB*NB*sizeof(int)); + memset(result_vec, 0, NTBB*NB*sizeof(int)); + + cudaMemcpy(result_vec_dev, result_vec + , NTBB*NB, cudaMemcpyHostToDevice); + + + + levenshtein_cu<<<NB,NTBB>>>(pattern_dev, buf_dev, size_pattern + , n_bytes, approx_factor, column_dev, result_vec_dev); + + //get result + cudaMemcpy(result_vec, result_vec_dev + , NTBB*NB*sizeof(int), cudaMemcpyDeviceToHost); + + for (int j = 0 ; j<n_bytes ; j++) + { + /* Highly advanced debbugging (printfs) + int column[size_pattern+1]; + int d = + levenshtein(pattern[i], buf+j, size_pattern, column, approx_factor); + //printf("%d",d); + if (d != result_vec[j]) + { + printf("MISMATCH FOUND %s should have match at %i :" + ,pattern[i], j); + printf("%.*s\n",size_pattern,&buf[j]); + } + */ + n_matches[i] += result_vec[j]; + } + + //free memory - and then get onto the next pattern. + cudaFree(pattern_dev); + cudaFree(column_dev); + cudaFree(result_vec_dev); + free(result_vec); + } + + /* Timer stop */ + gettimeofday(&t2, NULL); + + duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6); + + printf("APM done in %lf s\n", duration); + + /***** + * END MAIN LOOP + ******/ + + for (int i=0; i < nb_patterns; i++) + { + printf("Number of matches for pattern <%s>: %d\n", + pattern[i], n_matches[i]); + } + + return 0; +} diff --git a/Projet/CODE/apm/src/apm_gpu.cu b/Projet/CODE/apm/src/apm_gpu.cu index b6fce6030314c3e75e7509862e6758b4bd66fed8..9526660230da152ca1de0dfcb76e364b8653cd69 100644 --- a/Projet/CODE/apm/src/apm_gpu.cu +++ b/Projet/CODE/apm/src/apm_gpu.cu @@ -90,7 +90,7 @@ int levenshtein(char *s1, char *s2, int len, int * column, int approx_factor) } __global__ void levenshtein_cu(char *find, char *buf, int len, int n_bytes - ,int approx_factor, int* g_column, int* result_vec) + ,int approx_factor, int* g_column, char* result_vec) { int tId = blockIdx.x * blockDim.x + threadIdx.x;//global thread id if (tId > n_bytes) @@ -142,7 +142,8 @@ int main(int argc, char ** argv) //cuda-related vars char *buf_dev; int NTBB = 1024; //Number of threads by blocks - int NB;//Number of blocks + int NB;//Number of blocks + cudaError_t cu_err; /* Check number of arguments */ if (argc < 4) @@ -198,9 +199,22 @@ int main(int argc, char ** argv) fprintf(stderr, "Error: NULL pointer from reading input file."); return 1; } + cudaMalloc((void**)&buf_dev, n_bytes * sizeof(char)); - cudaMemcpy(buf_dev, buf, n_bytes, cudaMemcpyHostToDevice); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to allocate buffer on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } + cudaMemcpy(buf_dev, buf, n_bytes, cudaMemcpyHostToDevice); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to copy buffer onto device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ if (n_matches == NULL) @@ -215,49 +229,95 @@ int main(int argc, char ** argv) ******/ /* Timer start */ + //TODO MAYBE count time with cudaevents (see older tp) gettimeofday(&t1, NULL); for (int i = 0; i < nb_patterns; i++) { n_matches[i] = 0; - //TODO err check int size_pattern = strlen(pattern[i]); char* pattern_dev; + cudaMalloc((void**)&pattern_dev, size_pattern); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to allocate pattern on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } cudaMemcpy(pattern_dev, pattern[i] , size_pattern, cudaMemcpyHostToDevice); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to copy pattern onto device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } + NB = (n_bytes / NTBB) + (((n_bytes % NTBB) > 0) ? 1 : 0); - //printf("n_bytes : %i, NB : %i ; nb_threads : %i\n" - // , n_bytes, NB, NB*NTBB); + //TODO err check int * column_dev; cudaMalloc((void**)&column_dev, (size_pattern+1)*NTBB*NB*sizeof(int)); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to allocate column vector on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } //TODO err check - int * result_vec_dev; //result vectors. - cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(int)); - - int * result_vec =(int*) malloc(NTBB*NB*sizeof(int)); - memset(result_vec, 0, NTBB*NB*sizeof(int)); + char * result_vec_dev; //result vectors. + cudaMalloc((void**)&result_vec_dev, NTBB*NB*sizeof(char)); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to allocate result vector on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } - cudaMemcpy(result_vec_dev, result_vec - , NTBB*NB, cudaMemcpyHostToDevice); + cudaMemset(result_vec_dev, 0, NTBB*NB*sizeof(char)); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to init result vector to 0 on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } + char* result_vec =(char*) malloc(NTBB*NB*sizeof(char)); + if (result_vec == NULL) + { + fprintf(stderr, "Unable to allocate result vec on host."); + return 1; + } levenshtein_cu<<<NB,NTBB>>>(pattern_dev, buf_dev, size_pattern , n_bytes, approx_factor, column_dev, result_vec_dev); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Kernel execution of levenshtein_cu failed: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } //get result cudaMemcpy(result_vec, result_vec_dev - , NTBB*NB*sizeof(int), cudaMemcpyDeviceToHost); + , NTBB*NB*sizeof(char), cudaMemcpyDeviceToHost); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to retrieve result vector on host: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } + //TODO MAYBE - reduction on gpu. for (int j = 0 ; j<n_bytes ; j++) { /* Highly advanced debbugging (printfs) @@ -277,8 +337,29 @@ int main(int argc, char ** argv) //free memory - and then get onto the next pattern. cudaFree(pattern_dev); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to free memory for pattern on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } + cudaFree(column_dev); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to free memory for column on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } + cudaFree(result_vec_dev); + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "Unable to free memory for result on device: %s.\n" + , cudaGetErrorString(cu_err)); + return 1; + } + free(result_vec); }