From 4deee54a4c8d23a18c2ecebde1fb137097e0baa8 Mon Sep 17 00:00:00 2001 From: "nicolas.marie" <nicolas.marie@ensiie.eu> Date: Thu, 21 Dec 2023 11:55:48 +0100 Subject: [PATCH] clean gpu code --- Projet/CODE/apm/gmon.out | Bin 3479 -> 0 bytes Projet/CODE/apm/src/apm_gpu.cu | 185 +++++++-------------------------- 2 files changed, 39 insertions(+), 146 deletions(-) delete mode 100644 Projet/CODE/apm/gmon.out diff --git a/Projet/CODE/apm/gmon.out b/Projet/CODE/apm/gmon.out deleted file mode 100644 index 7d8ec1418417db2239e4ff48af92b024ec7f5f40..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3479 zcmYe#&Cg?GzyU&}AQCg!7#LE3%;MDK{JfN6m})o!CNN5mhQMeDjE2By2#kinXb6mk zz-S1JhQMeD49yT=V_;%n1VaWOi2%rKgeV7?$qFQ888!oTvN0Gka01yL4Ezi`h3i># z86+6Q8LSwD85$Ur8AO1z3RJTikj@9&!wh5rI~q{H$N(~pfq@6k2e|{g?NB8&qd;z9 p0zybgFfo8!3UUdlu3<bN1aaeWF$fJd<bfoFb5I;Yf9YP&0svhc5r+T( diff --git a/Projet/CODE/apm/src/apm_gpu.cu b/Projet/CODE/apm/src/apm_gpu.cu index efbdb48..7405782 100644 --- a/Projet/CODE/apm/src/apm_gpu.cu +++ b/Projet/CODE/apm/src/apm_gpu.cu @@ -18,6 +18,21 @@ //the amount of RAM used by the programm is approx~Â 30*MAX_BUFFER_SIZE. //If you get out of memory errors, you should reduce this value + +inline +void +__cudaCheckErrors(char const *msg) +{ + cudaError_t cu_err; + + if ((cu_err = cudaGetLastError()) != cudaSuccess) + { + fprintf(stderr, "%s: %s.\n", msg, cudaGetErrorString(cu_err)); + exit(EXIT_FAILURE); + } +} + + char * read_input_file(char *filename, int *size) { @@ -79,8 +94,7 @@ get_file_size(int fd) return size; } char * -read_input_file_max(int fd, int *size - , int pattern_len, off_t offset) +read_input_file_max(int fd, int *size, off_t offset) { char *buf; off_t fsize; @@ -169,7 +183,7 @@ levenshtein_cu(char *find, char *buf, int len, int n_bytes //arrays char *s2 = buf + tId; int *column = g_column + tId * (len + 1); - //i do not understand this algorithm and god do i not want to. + int x, y, lastdiag, olddiag; for (y = 1; y <= len; y++) @@ -221,7 +235,6 @@ main(int argc, char **argv) char *buf_dev; int NTBB = NUMBER_THREADS_BY_BLOCK; //Number of threads by blocks int NB = 0;//Number of blocks - cudaError_t cu_err; /* Check number of arguments */ if (argc < 4) @@ -252,7 +265,7 @@ main(int argc, char **argv) } nb_patterns = argc - 3;/* Get the number of patterns to search for */ - pattern = (char **)malloc(nb_patterns * sizeof(char *)); + pattern = (char **) malloc(nb_patterns * sizeof(char *)); if (pattern == NULL)/*Fill the pattern*/ { @@ -287,7 +300,8 @@ main(int argc, char **argv) printf("Approximate Pattern Mathing: " "looking for %d pattern(s) in file %s w/ distance of %d\n", nb_patterns, filename, approx_factor); - n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ + + n_matches = (int *) malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ if (n_matches == NULL) { @@ -300,43 +314,25 @@ main(int argc, char **argv) * BEGIN MAIN LOOP ******/ /* Timer start */ - //TODO MAYBE count time with cudaevents (see older tp) gettimeofday(&t1, NULL); for (int i = 0; i < nb_patterns; i++) { n_matches[i] = 0; int size_pattern = strlen(pattern[i]); - char *pattern_dev; - cudaMalloc((void **)&pattern_dev, size_pattern); - - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to allocate pattern on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } - - cudaMemcpy(pattern_dev, pattern[i], - size_pattern, cudaMemcpyHostToDevice); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to copy pattern onto device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } + char *pattern_dev; + cudaMalloc((void **) &pattern_dev, size_pattern); + __cudaCheckErrors("Unable to allocate pattern on device"); + cudaMemcpy(pattern_dev, pattern[i], size_pattern, + cudaMemcpyHostToDevice); + __cudaCheckErrors("Unable to copy pattern onto device"); int result = 0; int *result_dev; cudaMalloc((void **)&result_dev, sizeof(int)); + __cudaCheckErrors("Unable to allocate result on device"); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to allocate result on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } off_t offset = 0; @@ -345,7 +341,7 @@ main(int argc, char **argv) #if APM_DEBUG printf("offset: %i, filesize: %i\n", offset, filesize); #endif - buf = read_input_file_max(fdesc, &n_bytes, size_pattern, offset); + buf = read_input_file_max(fdesc, &n_bytes, offset); if (buf == NULL) { @@ -353,47 +349,22 @@ main(int argc, char **argv) return 1; } - cudaMalloc((void **)&buf_dev, n_bytes * sizeof(char)); - - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to allocate buffer on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } - + cudaMalloc((void **) &buf_dev, n_bytes * sizeof(char)); + __cudaCheckErrors("Unable to allocate buffer on device"); cudaMemcpy(buf_dev, buf, n_bytes, cudaMemcpyHostToDevice); + __cudaCheckErrors("Unable to copy buffer onto device"); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to copy buffer onto device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } NB = (n_bytes / NTBB) + (((n_bytes % NTBB) > 0) ? 1 : 0); int *column_dev; cudaMalloc((void **)&column_dev, (size_pattern + 1)*NTBB * NB * sizeof(int)); + __cudaCheckErrors("Unable to allocate column vector on device"); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, - "Unable to allocate column vector on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } char *result_vec_dev; //result vectors. cudaMalloc((void **)&result_vec_dev, NTBB * NB * sizeof(char)); - - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, - "Unable to allocate result vector on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } + __cudaCheckErrors("Unable to allocate result vector on device"); char *result_vec = (char *) malloc(NTBB * NB * sizeof(char)); @@ -403,110 +374,32 @@ main(int argc, char **argv) return 1; } - /* - cudaMemset(result_vec_dev, 0, NTBB*NB*sizeof(char)); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to init result vector to 0 on device: %s.\n" - , cudaGetErrorString(cu_err)); - return 1; - } - - cudaMemset(result_dev, 0, sizeof(int)); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to init result to 0 on device: %s.\n" - , cudaGetErrorString(cu_err)); - return 1; - } - */ levenshtein_cu <<< NB, NTBB>>>(pattern_dev, buf_dev, size_pattern, n_bytes, approx_factor, column_dev, result_dev); + __cudaCheckErrors("Kernel execution of levenshtein_cu failed"); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, - "Kernel execution of levenshtein_cu failed: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } //get result cudaMemcpy(&result, result_dev, sizeof(int), cudaMemcpyDeviceToHost); - - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to retrieve result on host: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } + __cudaCheckErrors("Unable to retrieve result on host"); n_matches[i] += result; - /* - for (int j = 0 ; j<n_bytes ; j++) - { - // Highly advanced debbugging (printfs) - int column[size_pattern+1]; - int d = - levenshtein(pattern[i], buf+j, size_pattern, column - , approx_factor); - //printf("%d",d); - if (d != result_vec[j]) - { - printf("MISMATCH FOUND %s should have match at %i :" - ,pattern[i], j); - printf("%.*s\n",size_pattern,&buf[j]); - } - - n_matches[i] += result_vec[j]; - } - */ + offset += (MAX_BUFFER_SIZE - size_pattern + 1); cudaFree(buf_dev); + __cudaCheckErrors("Unable to free memory for dev on device"); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, - "Unable to free memory for dev on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } free(buf); cudaFree(column_dev); + __cudaCheckErrors("Unable to free memory for column on device"); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, - "Unable to free memory for column on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } - - /* - cudaFree(result_vec_dev); - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, "Unable to free memory for result on device: %s.\n" - , cudaGetErrorString(cu_err)); - return 1; - } - - free(result_vec); - */ } //free memory - and then get onto the next pattern. cudaFree(pattern_dev); - - if ((cu_err = cudaGetLastError()) != cudaSuccess) - { - fprintf(stderr, - "Unable to free memory for pattern on device: %s.\n", - cudaGetErrorString(cu_err)); - return 1; - } + __cudaCheckErrors("Unable to free memory for pattern on device"); } /* Timer stop */ -- GitLab