diff --git a/Projet/CODE/apm/Makefile b/Projet/CODE/apm/Makefile index ecb8e1884e9c44118e4c59f97f516ff61047d29a..5e1a6860bf1eb672380f1774015d3c3e666fdca9 100644 --- a/Projet/CODE/apm/Makefile +++ b/Projet/CODE/apm/Makefile @@ -1,7 +1,8 @@ -ICLUDED=include +INCLUDED=include CC=gcc CFLAGS=-O3 -I $(INCLUDED) -pg -g +CFLAGS_OMP= -fopenmp -DUSE_OMP $(CFLAGS) LDFLAGS= OBJ=obj/apm.o @@ -12,6 +13,9 @@ all: dir apm apm_omp #apm_gpu dir: mkdir -p obj +obj/apm_omp.o : src/apm_omp.c + $(CC) $(CFLAGS_OMP) -c -o $@ $^ + obj/%.o: src/%.c $(CC) $(CFLAGS) -c -o $@ $^ @@ -19,7 +23,7 @@ apm: obj/apm.o $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ apm_omp: obj/apm_omp.o - $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ + $(CC) $(CFLAGS_OMP) $(LDFLAGS) -o $@ $^ apm_gpu: $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $^ diff --git a/Projet/CODE/apm/src/.apm.c.swp b/Projet/CODE/apm/src/.apm.c.swp deleted file mode 100644 index 28e989bebdb020ae307b90cfb0a208c9ff1045df..0000000000000000000000000000000000000000 Binary files a/Projet/CODE/apm/src/.apm.c.swp and /dev/null differ diff --git a/Projet/CODE/apm/src/apm_omp.c b/Projet/CODE/apm/src/apm_omp.c index f5db3e57414d6a84a2d1b472364efaca25ed21e4..d40a9cdef1c8db1582b3b5efb5a73b0e8aa58012 100644 --- a/Projet/CODE/apm/src/apm_omp.c +++ b/Projet/CODE/apm/src/apm_omp.c @@ -1,6 +1,7 @@ /** * APPROXIMATE PATTERN MATCHING * + * INF560 X2016 */ #include <string.h> #include <stdio.h> @@ -9,12 +10,13 @@ #include <unistd.h> #include <sys/time.h> +#include "omp.h" + #define APM_DEBUG 0 -char * -read_input_file(char *filename, int *size) +char * read_input_file(char * filename, int * size) { - char *buf; + char * buf; off_t fsize; int fd = 0; int n_bytes = 1; @@ -30,7 +32,6 @@ read_input_file(char *filename, int *size) /* Get the number of characters in the textfile */ fsize = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); - /* TODO check return of lseek */ #if APM_DEBUG @@ -38,11 +39,11 @@ read_input_file(char *filename, int *size) #endif /* Allocate data to copy the target text */ - buf = (char *) malloc(fsize * sizeof (char)); + buf = (char *)malloc(fsize * sizeof (char)); if (buf == NULL) { fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", - fsize); + fsize); return NULL; } @@ -50,8 +51,8 @@ read_input_file(char *filename, int *size) if (n_bytes != fsize) { fprintf(stderr, - "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", - fsize, n_bytes); + "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", + fsize, n_bytes); return NULL; } @@ -60,18 +61,13 @@ read_input_file(char *filename, int *size) #endif *size = n_bytes; - close(fd); - return buf; } +#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c))) -#define MIN3(a, b, c)\ - ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) - -int -levenshtein(char *s1, char *s2, int len, int * column) +int levenshtein(char *s1, char *s2, int len, int * column) { unsigned int x, y, lastdiag, olddiag; @@ -82,15 +78,13 @@ levenshtein(char *s1, char *s2, int len, int * column) for (x = 1; x <= len; x++) { column[0] = x; - lastdiag = x-1 ; + lastdiag = x-1; for (y = 1; y <= len; y++) { olddiag = column[y]; - column[y] = MIN3( - column[y] + 1, + column[y] = MIN3(column[y] + 1, column[y-1] + 1, - lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1) - ); + lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1)); lastdiag = olddiag; } } @@ -98,87 +92,79 @@ levenshtein(char *s1, char *s2, int len, int * column) } -int -main(int argc, char **argv) +int main(int argc, char ** argv) { - char **pattern; - char *filename; + char ** pattern; + char * filename; int approx_factor = 0; int nb_patterns = 0; int i, j; - char *buf; + char * buf; struct timeval t1, t2; double duration; int n_bytes; - int *n_matches; + int * n_matches; /* Check number of arguments */ if (argc < 4) { printf("Usage: %s approximation_factor " - "dna_database pattern1 pattern2 ...\n", - argv[0]); + "dna_database pattern1 pattern2 ...\n", + argv[0]); return 1; } - /* Get the distance factor */ - approx_factor = atoi(argv[1]); - - /* Grab the filename containing the target text */ - filename = argv[2]; - - /* Get the number of patterns that the user wants to search for */ - nb_patterns = argc - 3; - - /* Fill the pattern array */ - pattern = (char **)malloc(nb_patterns * sizeof(char *)); - if (pattern == NULL) + approx_factor = atoi(argv[1]);/* Get the distance factor */ + filename = argv[2];/* Grab the filename containing the target text */ + nb_patterns = argc - 3;/* Get the number of patterns to search for */ + + pattern = (char **)malloc(nb_patterns * sizeof(char*)); + if (pattern == NULL)/*Fill the pattern*/ { fprintf(stderr, - "Unable to allocate array of pattern of size %d\n", - nb_patterns ); + "Unable to allocate array of pattern of size %d\n", + nb_patterns); return 1; } - /* Grab the patterns */ - for (i = 0; i < nb_patterns; i++) + for (i=0; i < nb_patterns; i++) /* Grab the patterns */ { int l; - l = strlen(argv[i+3]); - if (l <= 0) + + if (l <= 0) { - fprintf(stderr, "Error while parsing argument %d\n", i + 3); + fprintf(stderr, "Error while parsing argument %d\n", i+3); return 1; } - - pattern[i] = (char *) malloc((l+1) * sizeof(char)); + + pattern[i] = (char *)malloc((l+1) * sizeof(char)); if (pattern[i] == NULL) { fprintf(stderr, "Unable to allocate string of size %d\n", l); return 1; } - strncpy(pattern[i], argv[i + 3], (l + 1)); + strncpy(pattern[i], argv[i+3], (l+1)); } printf("Approximate Pattern Mathing: " - "looking for %d pattern(s) in file %s w/ distance of %d\n", - nb_patterns, filename, approx_factor); + "looking for %d pattern(s) in file %s w/ distance of %d\n", + nb_patterns, filename, approx_factor); buf = read_input_file(filename, &n_bytes); if (buf == NULL) { + fprintf(stderr, "Error: NULL pointer from reading input file."); return 1; } - /* Allocate the array of matches */ - n_matches = (int *) malloc(nb_patterns * sizeof(int)); + n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ if (n_matches == NULL) { fprintf(stderr, "Error: unable to allocate memory for %ldB\n", - nb_patterns * sizeof(int)); + nb_patterns * sizeof(int)); return 1; } @@ -191,26 +177,25 @@ main(int argc, char **argv) for (i = 0; i < nb_patterns; i++) { - int size_pattern = strlen(pattern[i]); - int * column; + int distance = 0; n_matches[i] = 0; - column = (int *) malloc((size_pattern + 1) * sizeof(int)); + column = (int *)malloc((size_pattern+1) * n_bytes * sizeof(int)); if (column == NULL) { - fprintf(stderr, "Error: unable to allocate memory for column " - "(%ldB)\n", - (size_pattern + 1) * sizeof(int)); + fprintf(stderr, + "Error: unable to allocate memory for column (%ldB)\n", + (size_pattern+1) * sizeof(int)); return 1; } + int matches = 0; + #pragma omp parallel for reduction(+:matches) private(distance) for (j = 0; j < n_bytes; j++) { - int distance = 0; - int size; #if APM_DEBUG if (j % 100 == 0) @@ -219,27 +204,31 @@ main(int argc, char **argv) } #endif - size = size_pattern; + size_pattern; if (n_bytes - j < size_pattern) { - size = n_bytes - j; + //size = n_bytes - j; + //NO ! we do not want to match substring of our input, wth + continue; } - distance = levenshtein(pattern[i], &buf[j], size, column); + distance = levenshtein(pattern[i], &buf[j], size_pattern, + column+(j*(size_pattern+1))); if (distance <= approx_factor) { - n_matches[i]++; + matches++; } } + n_matches[i] = matches; free(column); } /* Timer stop */ gettimeofday(&t2, NULL); - duration = (t2.tv_sec - t1.tv_sec) + ((t2.tv_usec - t1.tv_usec) / 1e6); + duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6); printf("APM done in %lf s\n", duration); @@ -250,7 +239,7 @@ main(int argc, char **argv) for (i = 0; i < nb_patterns; i++) { printf("Number of matches for pattern <%s>: %d\n", - pattern[i], n_matches[i]); + pattern[i], n_matches[i]); } return 0;