diff --git a/Projet/CODE/apm/src/.apm.c.swp b/Projet/CODE/apm/src/.apm.c.swp new file mode 100644 index 0000000000000000000000000000000000000000..28e989bebdb020ae307b90cfb0a208c9ff1045df Binary files /dev/null and b/Projet/CODE/apm/src/.apm.c.swp differ diff --git a/Projet/CODE/apm/src/apm.c b/Projet/CODE/apm/src/apm.c index 1c3e60c29aaf32be05c7fef5087b4701e1e03a23..6dc52b69bcf486c33aac542794ae841c0bcf551e 100644 --- a/Projet/CODE/apm/src/apm.c +++ b/Projet/CODE/apm/src/apm.c @@ -12,248 +12,230 @@ #define APM_DEBUG 0 -char * -read_input_file( char * filename, int * size ) +char * read_input_file(char * filename, int * size) { - char * buf ; + char * buf; off_t fsize; - int fd = 0 ; - int n_bytes = 1 ; + int fd = 0; + int n_bytes = 1; /* Open the text file */ - fd = open( filename, O_RDONLY ) ; - if ( fd == -1 ) + fd = open(filename, O_RDONLY); + if (fd == -1) { - fprintf( stderr, "Unable to open the text file <%s>\n", filename ) ; - return NULL ; + fprintf(stderr, "Unable to open the text file <%s>\n", filename); + return NULL; } /* Get the number of characters in the textfile */ fsize = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); - /* TODO check return of lseek */ #if APM_DEBUG - printf( "File length: %lld\n", fsize ) ; + printf("File length: %lld\n", fsize); #endif /* Allocate data to copy the target text */ - buf = (char *)malloc( fsize * sizeof ( char ) ) ; - if ( buf == NULL ) + buf = (char *)malloc(fsize * sizeof (char)); + if (buf == NULL) { - fprintf( stderr, "Unable to allocate %lld byte(s) for main array\n", - fsize ) ; - return NULL ; + fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", + fsize); + return NULL; } - n_bytes = read( fd, buf, fsize ) ; - if ( n_bytes != fsize ) + n_bytes = read(fd, buf, fsize); + if (n_bytes != fsize) { - fprintf( stderr, - "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", - fsize, n_bytes) ; - return NULL ; + fprintf(stderr, + "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", + fsize, n_bytes); + return NULL; } #if APM_DEBUG - printf( "Number of read bytes: %d\n", n_bytes ) ; + printf("Number of read bytes: %d\n", n_bytes); #endif - *size = n_bytes ; - - - close( fd ) ; - - - return buf ; + *size = n_bytes; + close(fd); + return buf; } +#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c))) -#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) - -int levenshtein(char *s1, char *s2, int len, int * column) { +int levenshtein(char *s1, char *s2, int len, int * column) +{ unsigned int x, y, lastdiag, olddiag; for (y = 1; y <= len; y++) { column[y] = y; } - for (x = 1; x <= len; x++) { + for (x = 1; x <= len; x++) + { column[0] = x; - lastdiag = x-1 ; - for (y = 1; y <= len; y++) { + lastdiag = x-1; + for (y = 1; y <= len; y++) + { olddiag = column[y]; - column[y] = MIN3( - column[y] + 1, - column[y-1] + 1, - lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1) - ); + column[y] = MIN3(column[y] + 1, + column[y-1] + 1, + lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1)); lastdiag = olddiag; - } } return(column[len]); } +int main(int argc, char ** argv) +{ + char ** pattern; + char * filename; + int approx_factor = 0; + int nb_patterns = 0; + int i, j; + char * buf; + struct timeval t1, t2; + double duration; + int n_bytes; + int * n_matches; + + /* Check number of arguments */ + if (argc < 4) + { + printf("Usage: %s approximation_factor " + "dna_database pattern1 pattern2 ...\n", + argv[0]); + return 1; + } + approx_factor = atoi(argv[1]);/* Get the distance factor */ + filename = argv[2];/* Grab the filename containing the target text */ + nb_patterns = argc - 3;/* Get the number of patterns to search for */ + + pattern = (char **)malloc(nb_patterns * sizeof(char*)); + if (pattern == NULL)/*Fill the pattern*/ + { + fprintf(stderr, + "Unable to allocate array of pattern of size %d\n", + nb_patterns); + return 1; + } -int -main( int argc, char ** argv ) -{ - char ** pattern ; - char * filename ; - int approx_factor = 0 ; - int nb_patterns = 0 ; - int i, j ; - char * buf ; - struct timeval t1, t2; - double duration ; - int n_bytes ; - int * n_matches ; - - /* Check number of arguments */ - if ( argc < 4 ) - { - printf( "Usage: %s approximation_factor " - "dna_database pattern1 pattern2 ...\n", - argv[0] ) ; - return 1 ; - } - - /* Get the distance factor */ - approx_factor = atoi( argv[1] ) ; - - /* Grab the filename containing the target text */ - filename = argv[2] ; - - /* Get the number of patterns that the user wants to search for */ - nb_patterns = argc - 3 ; - - /* Fill the pattern array */ - pattern = (char **)malloc( nb_patterns * sizeof( char * ) ) ; - if ( pattern == NULL ) - { - fprintf( stderr, - "Unable to allocate array of pattern of size %d\n", - nb_patterns ) ; - return 1 ; - } - - /* Grab the patterns */ - for ( i = 0 ; i < nb_patterns ; i++ ) - { - int l ; - - l = strlen(argv[i+3]) ; - if ( l <= 0 ) - { - fprintf( stderr, "Error while parsing argument %d\n", i+3 ) ; - return 1 ; - } - - pattern[i] = (char *)malloc( (l+1) * sizeof( char ) ) ; - if ( pattern[i] == NULL ) - { - fprintf( stderr, "Unable to allocate string of size %d\n", l ) ; - return 1 ; - } - - strncpy( pattern[i], argv[i+3], (l+1) ) ; - - } - - - printf( "Approximate Pattern Mathing: " - "looking for %d pattern(s) in file %s w/ distance of %d\n", - nb_patterns, filename, approx_factor ) ; - - buf = read_input_file( filename, &n_bytes ) ; - if ( buf == NULL ) - { - return 1 ; - } - - /* Allocate the array of matches */ - n_matches = (int *)malloc( nb_patterns * sizeof( int ) ) ; - if ( n_matches == NULL ) - { - fprintf( stderr, "Error: unable to allocate memory for %ldB\n", - nb_patterns * sizeof( int ) ) ; - return 1 ; - } - - /***** - * BEGIN MAIN LOOP - ******/ - - /* Timer start */ - gettimeofday(&t1, NULL); - - for ( i = 0 ; i < nb_patterns ; i++ ) - { - - - - int size_pattern = strlen(pattern[i]) ; - - int * column ; - - n_matches[i] = 0 ; - - column = (int *)malloc( (size_pattern+1) * sizeof( int ) ) ; - if ( column == NULL ) - { - fprintf( stderr, "Error: unable to allocate memory for column (%ldB)\n", - (size_pattern+1) * sizeof( int ) ) ; - return 1 ; - } - - for ( j = 0 ; j < n_bytes ; j++ ) - { - int distance = 0 ; - int size ; + for (i=0; i < nb_patterns; i++) /* Grab the patterns */ + { + int l; + l = strlen(argv[i+3]); + + if (l <= 0) + { + fprintf(stderr, "Error while parsing argument %d\n", i+3); + return 1; + } + + pattern[i] = (char *)malloc((l+1) * sizeof(char)); + if (pattern[i] == NULL) + { + fprintf(stderr, "Unable to allocate string of size %d\n", l); + return 1; + } + + strncpy(pattern[i], argv[i+3], (l+1)); + } + + + printf("Approximate Pattern Mathing: " + "looking for %d pattern(s) in file %s w/ distance of %d\n", + nb_patterns, filename, approx_factor); + + buf = read_input_file(filename, &n_bytes); + if (buf == NULL) + { + fprintf(stderr, "Error: NULL pointer from reading input file."); + return 1; + } + + n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ + if (n_matches == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for %ldB\n", + nb_patterns * sizeof(int)); + return 1; + } + + /***** + * BEGIN MAIN LOOP + ******/ + + /* Timer start */ + gettimeofday(&t1, NULL); + + for (i = 0; i < nb_patterns; i++) + { + int size_pattern = strlen(pattern[i]); + int * column; + + n_matches[i] = 0; + + column = (int *)malloc((size_pattern+1) * sizeof(int)); + if (column == NULL) + { + fprintf(stderr, + "Error: unable to allocate memory for column (%ldB)\n", + (size_pattern+1) * sizeof(int)); + return 1; + } + + for (j = 0; j < n_bytes; j++) + { + int distance = 0; + int size; #if APM_DEBUG - if ( j % 100 == 0 ) - { - printf( "Procesing byte %d (out of %d)\n", j, n_bytes ) ; - } + if (j % 100 == 0) + { + printf("Procesing byte %d (out of %d)\n", j, n_bytes); + } #endif - size = size_pattern ; - if ( n_bytes - j < size_pattern ) - { - size = n_bytes - j ; - } + size = size_pattern; + if (n_bytes - j < size_pattern) + { + //size = n_bytes - j; + //NO ! we do not want to match substring of our input, wth + break; + } - distance = levenshtein( pattern[i], &buf[j], size, column ) ; + distance = levenshtein(pattern[i], &buf[j], size, column); - if ( distance <= approx_factor ) { - n_matches[i]++ ; - } - } + if (distance <= approx_factor) + { + n_matches[i]++; + } + } - free( column ); - } + free(column); + } - /* Timer stop */ - gettimeofday(&t2, NULL); + /* Timer stop */ + gettimeofday(&t2, NULL); - duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6); + duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6); - printf( "APM done in %lf s\n", duration ) ; + printf("APM done in %lf s\n", duration); - /***** - * END MAIN LOOP - ******/ + /***** + * END MAIN LOOP + ******/ - for ( i = 0 ; i < nb_patterns ; i++ ) - { - printf( "Number of matches for pattern <%s>: %d\n", - pattern[i], n_matches[i] ) ; - } + for (i = 0; i < nb_patterns; i++) + { + printf("Number of matches for pattern <%s>: %d\n", + pattern[i], n_matches[i]); + } - return 0 ; + return 0; } diff --git a/Projet/CODE/apm/src/apm_omp.c b/Projet/CODE/apm/src/apm_omp.c index f0d611a4ae3ee1f3646c622ba9f5241b3ec61a5f..f5db3e57414d6a84a2d1b472364efaca25ed21e4 100644 --- a/Projet/CODE/apm/src/apm_omp.c +++ b/Projet/CODE/apm/src/apm_omp.c @@ -14,56 +14,56 @@ char * read_input_file(char *filename, int *size) { - char *buf; - off_t fsize; - int fd = 0; - int n_bytes = 1; - - /* Open the text file */ - fd = open(filename, O_RDONLY); - if (fd == -1) - { - fprintf(stderr, "Unable to open the text file <%s>\n", filename); - return NULL; - } - - /* Get the number of characters in the textfile */ - fsize = lseek(fd, 0, SEEK_END); - lseek(fd, 0, SEEK_SET); - - /* TODO check return of lseek */ - - #if APM_DEBUG - printf("File length: %lld\n", fsize); - #endif - - /* Allocate data to copy the target text */ - buf = (char *) malloc(fsize * sizeof (char)); - if (buf == NULL) - { - fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", - fsize); - return NULL; - } - - n_bytes = read(fd, buf, fsize); - if (n_bytes != fsize) - { - fprintf(stderr, - "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", - fsize, n_bytes); - return NULL; - } - - #if APM_DEBUG - printf("Number of read bytes: %d\n", n_bytes); - #endif - - *size = n_bytes; - - close(fd); - - return buf; + char *buf; + off_t fsize; + int fd = 0; + int n_bytes = 1; + + /* Open the text file */ + fd = open(filename, O_RDONLY); + if (fd == -1) + { + fprintf(stderr, "Unable to open the text file <%s>\n", filename); + return NULL; + } + + /* Get the number of characters in the textfile */ + fsize = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + + /* TODO check return of lseek */ + +#if APM_DEBUG + printf("File length: %lld\n", fsize); +#endif + + /* Allocate data to copy the target text */ + buf = (char *) malloc(fsize * sizeof (char)); + if (buf == NULL) + { + fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", + fsize); + return NULL; + } + + n_bytes = read(fd, buf, fsize); + if (n_bytes != fsize) + { + fprintf(stderr, + "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", + fsize, n_bytes); + return NULL; + } + +#if APM_DEBUG + printf("Number of read bytes: %d\n", n_bytes); +#endif + + *size = n_bytes; + + close(fd); + + return buf; } @@ -73,183 +73,185 @@ read_input_file(char *filename, int *size) int levenshtein(char *s1, char *s2, int len, int * column) { - unsigned int x, y, lastdiag, olddiag; - - for (y = 1; y <= len; y++) - { - column[y] = y; - } - for (x = 1; x <= len; x++) { - column[0] = x; - lastdiag = x-1 ; - for (y = 1; y <= len; y++) { - olddiag = column[y]; - column[y] = MIN3( - column[y] + 1, - column[y-1] + 1, - lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1) - ); - lastdiag = olddiag; - } - } - return(column[len]); + unsigned int x, y, lastdiag, olddiag; + + for (y = 1; y <= len; y++) + { + column[y] = y; + } + for (x = 1; x <= len; x++) + { + column[0] = x; + lastdiag = x-1 ; + for (y = 1; y <= len; y++) + { + olddiag = column[y]; + column[y] = MIN3( + column[y] + 1, + column[y-1] + 1, + lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1) + ); + lastdiag = olddiag; + } + } + return(column[len]); } -int +int main(int argc, char **argv) { - char **pattern; - char *filename; - int approx_factor = 0; - int nb_patterns = 0; - int i, j; - char *buf; - struct timeval t1, t2; - double duration; - int n_bytes; - int *n_matches; - - /* Check number of arguments */ - if (argc < 4) - { - printf("Usage: %s approximation_factor " - "dna_database pattern1 pattern2 ...\n", - argv[0]); - return 1; - } - - /* Get the distance factor */ - approx_factor = atoi(argv[1]); - - /* Grab the filename containing the target text */ - filename = argv[2]; - - /* Get the number of patterns that the user wants to search for */ - nb_patterns = argc - 3; - - /* Fill the pattern array */ - pattern = (char **)malloc(nb_patterns * sizeof(char *)); - if (pattern == NULL) - { - fprintf(stderr, - "Unable to allocate array of pattern of size %d\n", - nb_patterns ); - return 1; - } - - /* Grab the patterns */ - for (i = 0; i < nb_patterns; i++) - { - int l; - - l = strlen(argv[i+3]); - if (l <= 0) - { - fprintf(stderr, "Error while parsing argument %d\n", i + 3); - return 1; - } - - pattern[i] = (char *) malloc((l+1) * sizeof(char)); - if (pattern[i] == NULL) - { - fprintf(stderr, "Unable to allocate string of size %d\n", l); - return 1; - } - - strncpy(pattern[i], argv[i + 3], (l + 1)); - } - - - printf("Approximate Pattern Mathing: " - "looking for %d pattern(s) in file %s w/ distance of %d\n", - nb_patterns, filename, approx_factor); - - buf = read_input_file(filename, &n_bytes); - if (buf == NULL) - { - return 1; - } - - /* Allocate the array of matches */ - n_matches = (int *) malloc(nb_patterns * sizeof(int)); - if (n_matches == NULL) - { - fprintf(stderr, "Error: unable to allocate memory for %ldB\n", - nb_patterns * sizeof(int)); - return 1; - } - - /***** - * BEGIN MAIN LOOP - ******/ - - /* Timer start */ - gettimeofday(&t1, NULL); - - for (i = 0; i < nb_patterns; i++) - { - - int size_pattern = strlen(pattern[i]); - - int * column; - - n_matches[i] = 0; - - column = (int *) malloc((size_pattern + 1) * sizeof(int)); - if (column == NULL) - { - fprintf(stderr, "Error: unable to allocate memory for column " - "(%ldB)\n", - (size_pattern + 1) * sizeof(int)); - return 1; - } - - for (j = 0; j < n_bytes; j++) - { - int distance = 0; - int size; - - #if APM_DEBUG - if (j % 100 == 0) - { - printf("Procesing byte %d (out of %d)\n", j, n_bytes); - } - #endif - - size = size_pattern; - if (n_bytes - j < size_pattern) - { - size = n_bytes - j; - } - - distance = levenshtein(pattern[i], &buf[j], size, column); - - if (distance <= approx_factor) - { - n_matches[i]++; - } - } - - free(column); - } - - /* Timer stop */ - gettimeofday(&t2, NULL); - - duration = (t2.tv_sec - t1.tv_sec) + ((t2.tv_usec - t1.tv_usec) / 1e6); - - printf("APM done in %lf s\n", duration); - - /***** - * END MAIN LOOP - ******/ - - for (i = 0; i < nb_patterns; i++) - { - printf("Number of matches for pattern <%s>: %d\n", - pattern[i], n_matches[i]); - } - - return 0; + char **pattern; + char *filename; + int approx_factor = 0; + int nb_patterns = 0; + int i, j; + char *buf; + struct timeval t1, t2; + double duration; + int n_bytes; + int *n_matches; + + /* Check number of arguments */ + if (argc < 4) + { + printf("Usage: %s approximation_factor " + "dna_database pattern1 pattern2 ...\n", + argv[0]); + return 1; + } + + /* Get the distance factor */ + approx_factor = atoi(argv[1]); + + /* Grab the filename containing the target text */ + filename = argv[2]; + + /* Get the number of patterns that the user wants to search for */ + nb_patterns = argc - 3; + + /* Fill the pattern array */ + pattern = (char **)malloc(nb_patterns * sizeof(char *)); + if (pattern == NULL) + { + fprintf(stderr, + "Unable to allocate array of pattern of size %d\n", + nb_patterns ); + return 1; + } + + /* Grab the patterns */ + for (i = 0; i < nb_patterns; i++) + { + int l; + + l = strlen(argv[i+3]); + if (l <= 0) + { + fprintf(stderr, "Error while parsing argument %d\n", i + 3); + return 1; + } + + pattern[i] = (char *) malloc((l+1) * sizeof(char)); + if (pattern[i] == NULL) + { + fprintf(stderr, "Unable to allocate string of size %d\n", l); + return 1; + } + + strncpy(pattern[i], argv[i + 3], (l + 1)); + } + + + printf("Approximate Pattern Mathing: " + "looking for %d pattern(s) in file %s w/ distance of %d\n", + nb_patterns, filename, approx_factor); + + buf = read_input_file(filename, &n_bytes); + if (buf == NULL) + { + return 1; + } + + /* Allocate the array of matches */ + n_matches = (int *) malloc(nb_patterns * sizeof(int)); + if (n_matches == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for %ldB\n", + nb_patterns * sizeof(int)); + return 1; + } + + /***** + * BEGIN MAIN LOOP + ******/ + + /* Timer start */ + gettimeofday(&t1, NULL); + + for (i = 0; i < nb_patterns; i++) + { + + int size_pattern = strlen(pattern[i]); + + int * column; + + n_matches[i] = 0; + + column = (int *) malloc((size_pattern + 1) * sizeof(int)); + if (column == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for column " + "(%ldB)\n", + (size_pattern + 1) * sizeof(int)); + return 1; + } + + for (j = 0; j < n_bytes; j++) + { + int distance = 0; + int size; + +#if APM_DEBUG + if (j % 100 == 0) + { + printf("Procesing byte %d (out of %d)\n", j, n_bytes); + } +#endif + + size = size_pattern; + if (n_bytes - j < size_pattern) + { + size = n_bytes - j; + } + + distance = levenshtein(pattern[i], &buf[j], size, column); + + if (distance <= approx_factor) + { + n_matches[i]++; + } + } + + free(column); + } + + /* Timer stop */ + gettimeofday(&t2, NULL); + + duration = (t2.tv_sec - t1.tv_sec) + ((t2.tv_usec - t1.tv_usec) / 1e6); + + printf("APM done in %lf s\n", duration); + + /***** + * END MAIN LOOP + ******/ + + for (i = 0; i < nb_patterns; i++) + { + printf("Number of matches for pattern <%s>: %d\n", + pattern[i], n_matches[i]); + } + + return 0; }