From 08448aef30a48c5f0f4f1d04a33885deeacc4313 Mon Sep 17 00:00:00 2001 From: "enzo.decarvalhobittencourt" <ezdecarvalho@gmail.com> Date: Mon, 11 Dec 2023 11:24:10 +0100 Subject: [PATCH] reformatted apm.c file, fixed 'bug'. TODO : change apm_omp.c file --- Projet/CODE/apm/src/.apm.c.swp | Bin 0 -> 16384 bytes Projet/CODE/apm/src/apm.c | 354 ++++++++++++-------------- Projet/CODE/apm/src/apm_omp.c | 450 +++++++++++++++++---------------- 3 files changed, 394 insertions(+), 410 deletions(-) create mode 100644 Projet/CODE/apm/src/.apm.c.swp diff --git a/Projet/CODE/apm/src/.apm.c.swp b/Projet/CODE/apm/src/.apm.c.swp new file mode 100644 index 0000000000000000000000000000000000000000..28e989bebdb020ae307b90cfb0a208c9ff1045df GIT binary patch literal 16384 zcmeI3Yit}>6~}KIC~;_cqvi|Yc7wz2dhOk{n~?eul*F6VO8iJ-hg8N|W_M@3<BVsv zGqbU`Zc`x?1eHKSLPZF*B~%Sm;{Bm%3#tfhDpl|S6%axoR8+oH<)MlKMG6uBb00Ia z_BsvUDm0`1yfb(1oVoX$b06nSS}B~InB;d16c~>8Ft+{WGv=k4J?w{HV{FNA`VGf- z>DILl%Xa%6!)Y4*E5?ekdCjw0V7@eUVq&6{pE*1;S8jUlk{O8e!NTBBenws&oE|Ub zjplOR_p0<eP~BW`awpM1qJe8`pcT~n_uj&W28&{-?@Zms-~Yb3Yd@3RO*D{bAkjdg zfkXp|1`-V<8b~ydXyE@!13~jfc0YP_qw3Y?)$a}Kez&OWhWdSE-F<bL{3IGkG>~W@ z(LkbsL<5Ni5)C98NHmaWAkjdgfkXpuK@Au+#^w?1e}w{keE-++|CM($_IvPK@B+9D zegGZ@4}cX=1(V<?*b6=ewtySJYwu$0CGZRIBk(kM2;2i2paQmoE#N=5Fm?r82Is*! z@F`FR$G}Gc2OkE1yP2_Pz~kUi@CmRFWWY|40$1P3*q^{3!1G`gEPy+~0k9to0S5lP zjj=bt%ivewIq*|(3ET~uU^f^7Jzy(%r3dxF&%yKHo8UZn0C->;l)y*88(SIs88`<_ zFc0RyC>REJfW6>W@Ik=9tM7o`z(w#l_$oLHEN}`O1P8zla5K0G{Q4&77kmcX4Nik4 zkOeQjov{bOm%s^-1}U%&Tz(s47r>Xn7r__6F>nNof)9c1U@N!^AGrw51M-*8ffX<g zPJliz0#1TCFbl3Cv2+>y4m<{)P}fi4_epROFs_dKaMp35%Da?xDWb}r8MHiy7x_q7 zfr{%*&vJr#n)^Y`^t>ETl{^nM@b2q5xU58y=%8jshiB^Z&C2!t>-p`JvEfy+cilS| z@0;uKm4;=TylHs8<t%c;TWl?xPQZI>pL9|=ZuJdiA~9>M2ZvSXX`7FZ@j`}CTg4wb zvH2J@=h>!{hMY6=cz7Z9G*ruNDQrzVQ^)Jm)>vVLTYQu|m2%Sv0@HJR6!i6Fcs|Py zdq#x^4Qimg#j|<o2$2S~F{5@;W5f}Y8Ec5X%TsfXQ9<tmmm9Y2R*k@f20Ww9-MW4w zo%+^2rqT%04{Kjh>J;UqAJUF$EFT-=Q*%d;X7brXmTkxTfnN0Vgw=Su+Autx&1RNG zJJPXs^Q?HiTNib*nal{aK)g=;iB}yX`&D^A9~vRunSpqv(^{^WUaU(#b=WrzuiD^b z12hIAc^!`67|SNCD9kswAf#vugtx1%6Bw35?n0IcP?_Fjt_E6?(Zs*jS}oU&Y5?&N z7r2&mi6R^#*0h@C2ZmEMxvKiV7+18h5O*-@1RbfGW0Y$~U{nl0^qitL=pY{$7$E0n zqB8!dQJ<RgjYV^q_xfCRz*;r}%XM@=QsM#Wx@2Xc>rsBY90lmr!G>8q%_DWAf*4GG z=(IEhvfL>{aJ6Cj(qpO6DOcKoE~vRJ^2%Dv6YrO`FtA!xsM%UJ&lvEfVvYxc@|JWS zUTM`MfmV($MOU3pQiyJ>x0Jf2Bx;k#Uz62(9ARsWn!XHQNYe@FBgd5|3m3{@LX_iF zj?r}0wOh;1Jp4@CEgHtr8QH})Ft99R5#Hn4H9C`xpnKhpuznv;`^EWoe{q3^pFcRi zN~a&^1wPD+7(J}p)4EKxPdweni;;^^1$`5O`Q*gZknT-;fp4g*+KigvQ_-8(n0}v_ z0o$lm9N}%6QlS}~Ve$*BgdH2>=t(3*$t_T8R%Nl2D`c)pnX949RV_18bJ#M}ZEuj! zy;Y)FEyuSO9ka&C>Z>{4&V}a6Mb?tuVVh@6$8Q8CoJ6{Y56@L+gIHi70-cs8S*0!b z&CZ%xw;WR{+Q{(=9?I}^+Q^KiD;d~Ro}vt=Mh3=BS2N-Vw?}bK<)|pjAu#TT(NL+{ zu5YI6HRTssG1HGl!;$SYotjk_-PY5@R5+h{rcvW!;9}O03#J;#khc?a#QvGd@_6aM z+~G~uELt5RlVS>{_viHMJDfGPR#n|*oA=tbe0SQ<@Ve(N%Ndx4EKm1Fw@}$KYnhG- zulw3XPD%5c>H@!QOtwe*qAnf7=E<l*Ww1%Bo>NsuuCs&m4jiJNvF@#FP}haIv%^?d zXyU;%gXKExoE!nv3A8O-ei^+|6zGLkud|{Z;R-A7>Nx@v`hLABpv8n1O`-WNi(WT3 zIYgF#YcB>3cn7Q{HdMOjUAzM?oShz@#^9i3MD1#@`>=h}Jgt{jaUrx{$ng`U(vk9s z(yY>?><uOndZ43DX=+^6i7md-LPf(7Yqjb57>{yM$7(KIY10+C({-<5K(}c|GQ}o? zkM{ceq&C!+Lp3fkU%f`w>n}>*qbp8w(t1V8@#*sM@#(3fcSZf0-uNQjdYWY-2UfEv z6$;5}(R1tdGD>I&$}uTtDLGlJF?B`WU$&fPD=1TA)T9<}p7caUQex9sI1zCCv)Y}O zQ?*+)laKmsKTj+AKx1F5wB=x1RO>9OSDnD_EJ7XIs%$8;-1S8WY8I(X_)wNlOdZ<2 zr@&7Q7K(dNhW{{6%^aUTH8Ht=w!~-l&(4;PPjOrvymMmeFw1ANjN<<%5ZgJ36Dj_0 z$WVv<3o-mpz}LZpfMWbTAP4%u6~yiT1pff9gBQT3!4#MTJ3tSh`2HEh?cW9$z+>Pf z7y-M%UlFfg0^b7^(|-#*1U?JeU=>tA35<gsU^{pPvHLH<i{MFc5v+iDa17iIUPr9{ zEcgz16nq2R3p_9j_JJ{w2k!-2z>j+v`!1lE{sK4$?gB>u#q)neeEuSM4m=C)1#4gi z8~`cs0k949fWIR){|opt_zid(JOu*agF~PMhCwgb4sHdn!5*)I?}KyTesC5v0om#i z@GwY@L<5Ni5)Hf+G|;gV$J#5lXHi&!z1o}CpdFnKMCYvq+A^z{8dqvdAgaiOUMkwN zt7;<gp%XB)AJIE_Mvuvjh|Eu6>#<peN;VyN8#-hX1&b**TUb0Nrufn6=@}soD<}?G zTSpPt5%8Le2nRc^UK=?ku_4kiKe`~E)YoW4Iv5-X&Xj!<37(*+PBVRJRo=o4RZgdw ze9_#ar64~;y9~EpGur800m3ekmXf<1+IHfmht!?h3^NsxJ?fr`U5*IcwLDT$RAJ*4 zmBbCw*C$^^<#b5(%5xcWgoz-;z#Tb>jMA~H9M+>yIf^WmA%!RG!cKpF2`(XHsf{fs zQTK^VCS@}#*wCFG2`}^cscC*2UqSlC<&GQh6$9HuZ2g7baKBYSB8F05*qFO557xs@ zFvnMd22)K8fAV>10W7C?p8ZJ4@Q!L56%nh8oG1y<S<xsZKyOf@;}zgCv1nB{qti}Q zsyz_@2W8b1Sziia5y0}a+X|>-y)s_pOWi}!ZU$W9y}Vc`(5@Y6;V#jQZV&Ccsnwkx zL8UR<svS8!6RAiH$0a!&mndPZa%OyqavMy)ptHl9Cx_E<-SrhSVHilsED8e2JLsT~ z0IoO8TN{(b%jUA{wZ&W_{S2YET|3YjZBxV6$46mnqwZ1Es)Xpgaac>2BV7(fg=yk9 z?LsIZJ{pAvBYg>pD)AB-g-YWJ!b2*xAB{EWh~to5xX#IP-Bg5JQ1_|G@I*FQc0f#@ z(FA*-bQrVi=2L9s0h?Pt{<&d;<*8S6thd_rc>;MrmyAScg3i1|tazPu<4N1_BY^bH zd{(3;M5wM7G^mGBo4a+Mym+&|kotMkg<LEFB9BB7F3oUaY7>F&K&r#k*T}D|kB*~w zeU}=p@im0n8|x*KNh%+ZTek>Aeo2p{Y}Y+aEhL}Aw52?f5@A?ONe1QcSMt$}BIdNv zCVB$gFFsRHmfz$v^7|5>G=c_15AzhZO7yZwpYVUD+HC8uJ7TdBA0g;FR}SCAFp0!_ z7d=UGy1U{At91Dy6)iY4Aa$bARwg0Yas<*DnI;;bk*ARd#LL$w!bIbvaX`5aod+Ya F?7!h%*zN!T literal 0 HcmV?d00001 diff --git a/Projet/CODE/apm/src/apm.c b/Projet/CODE/apm/src/apm.c index 1c3e60c..6dc52b6 100644 --- a/Projet/CODE/apm/src/apm.c +++ b/Projet/CODE/apm/src/apm.c @@ -12,248 +12,230 @@ #define APM_DEBUG 0 -char * -read_input_file( char * filename, int * size ) +char * read_input_file(char * filename, int * size) { - char * buf ; + char * buf; off_t fsize; - int fd = 0 ; - int n_bytes = 1 ; + int fd = 0; + int n_bytes = 1; /* Open the text file */ - fd = open( filename, O_RDONLY ) ; - if ( fd == -1 ) + fd = open(filename, O_RDONLY); + if (fd == -1) { - fprintf( stderr, "Unable to open the text file <%s>\n", filename ) ; - return NULL ; + fprintf(stderr, "Unable to open the text file <%s>\n", filename); + return NULL; } /* Get the number of characters in the textfile */ fsize = lseek(fd, 0, SEEK_END); lseek(fd, 0, SEEK_SET); - /* TODO check return of lseek */ #if APM_DEBUG - printf( "File length: %lld\n", fsize ) ; + printf("File length: %lld\n", fsize); #endif /* Allocate data to copy the target text */ - buf = (char *)malloc( fsize * sizeof ( char ) ) ; - if ( buf == NULL ) + buf = (char *)malloc(fsize * sizeof (char)); + if (buf == NULL) { - fprintf( stderr, "Unable to allocate %lld byte(s) for main array\n", - fsize ) ; - return NULL ; + fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", + fsize); + return NULL; } - n_bytes = read( fd, buf, fsize ) ; - if ( n_bytes != fsize ) + n_bytes = read(fd, buf, fsize); + if (n_bytes != fsize) { - fprintf( stderr, - "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", - fsize, n_bytes) ; - return NULL ; + fprintf(stderr, + "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", + fsize, n_bytes); + return NULL; } #if APM_DEBUG - printf( "Number of read bytes: %d\n", n_bytes ) ; + printf("Number of read bytes: %d\n", n_bytes); #endif - *size = n_bytes ; - - - close( fd ) ; - - - return buf ; + *size = n_bytes; + close(fd); + return buf; } +#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c))) -#define MIN3(a, b, c) ((a) < (b) ? ((a) < (c) ? (a) : (c)) : ((b) < (c) ? (b) : (c))) - -int levenshtein(char *s1, char *s2, int len, int * column) { +int levenshtein(char *s1, char *s2, int len, int * column) +{ unsigned int x, y, lastdiag, olddiag; for (y = 1; y <= len; y++) { column[y] = y; } - for (x = 1; x <= len; x++) { + for (x = 1; x <= len; x++) + { column[0] = x; - lastdiag = x-1 ; - for (y = 1; y <= len; y++) { + lastdiag = x-1; + for (y = 1; y <= len; y++) + { olddiag = column[y]; - column[y] = MIN3( - column[y] + 1, - column[y-1] + 1, - lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1) - ); + column[y] = MIN3(column[y] + 1, + column[y-1] + 1, + lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1)); lastdiag = olddiag; - } } return(column[len]); } +int main(int argc, char ** argv) +{ + char ** pattern; + char * filename; + int approx_factor = 0; + int nb_patterns = 0; + int i, j; + char * buf; + struct timeval t1, t2; + double duration; + int n_bytes; + int * n_matches; + + /* Check number of arguments */ + if (argc < 4) + { + printf("Usage: %s approximation_factor " + "dna_database pattern1 pattern2 ...\n", + argv[0]); + return 1; + } + approx_factor = atoi(argv[1]);/* Get the distance factor */ + filename = argv[2];/* Grab the filename containing the target text */ + nb_patterns = argc - 3;/* Get the number of patterns to search for */ + + pattern = (char **)malloc(nb_patterns * sizeof(char*)); + if (pattern == NULL)/*Fill the pattern*/ + { + fprintf(stderr, + "Unable to allocate array of pattern of size %d\n", + nb_patterns); + return 1; + } -int -main( int argc, char ** argv ) -{ - char ** pattern ; - char * filename ; - int approx_factor = 0 ; - int nb_patterns = 0 ; - int i, j ; - char * buf ; - struct timeval t1, t2; - double duration ; - int n_bytes ; - int * n_matches ; - - /* Check number of arguments */ - if ( argc < 4 ) - { - printf( "Usage: %s approximation_factor " - "dna_database pattern1 pattern2 ...\n", - argv[0] ) ; - return 1 ; - } - - /* Get the distance factor */ - approx_factor = atoi( argv[1] ) ; - - /* Grab the filename containing the target text */ - filename = argv[2] ; - - /* Get the number of patterns that the user wants to search for */ - nb_patterns = argc - 3 ; - - /* Fill the pattern array */ - pattern = (char **)malloc( nb_patterns * sizeof( char * ) ) ; - if ( pattern == NULL ) - { - fprintf( stderr, - "Unable to allocate array of pattern of size %d\n", - nb_patterns ) ; - return 1 ; - } - - /* Grab the patterns */ - for ( i = 0 ; i < nb_patterns ; i++ ) - { - int l ; - - l = strlen(argv[i+3]) ; - if ( l <= 0 ) - { - fprintf( stderr, "Error while parsing argument %d\n", i+3 ) ; - return 1 ; - } - - pattern[i] = (char *)malloc( (l+1) * sizeof( char ) ) ; - if ( pattern[i] == NULL ) - { - fprintf( stderr, "Unable to allocate string of size %d\n", l ) ; - return 1 ; - } - - strncpy( pattern[i], argv[i+3], (l+1) ) ; - - } - - - printf( "Approximate Pattern Mathing: " - "looking for %d pattern(s) in file %s w/ distance of %d\n", - nb_patterns, filename, approx_factor ) ; - - buf = read_input_file( filename, &n_bytes ) ; - if ( buf == NULL ) - { - return 1 ; - } - - /* Allocate the array of matches */ - n_matches = (int *)malloc( nb_patterns * sizeof( int ) ) ; - if ( n_matches == NULL ) - { - fprintf( stderr, "Error: unable to allocate memory for %ldB\n", - nb_patterns * sizeof( int ) ) ; - return 1 ; - } - - /***** - * BEGIN MAIN LOOP - ******/ - - /* Timer start */ - gettimeofday(&t1, NULL); - - for ( i = 0 ; i < nb_patterns ; i++ ) - { - - - - int size_pattern = strlen(pattern[i]) ; - - int * column ; - - n_matches[i] = 0 ; - - column = (int *)malloc( (size_pattern+1) * sizeof( int ) ) ; - if ( column == NULL ) - { - fprintf( stderr, "Error: unable to allocate memory for column (%ldB)\n", - (size_pattern+1) * sizeof( int ) ) ; - return 1 ; - } - - for ( j = 0 ; j < n_bytes ; j++ ) - { - int distance = 0 ; - int size ; + for (i=0; i < nb_patterns; i++) /* Grab the patterns */ + { + int l; + l = strlen(argv[i+3]); + + if (l <= 0) + { + fprintf(stderr, "Error while parsing argument %d\n", i+3); + return 1; + } + + pattern[i] = (char *)malloc((l+1) * sizeof(char)); + if (pattern[i] == NULL) + { + fprintf(stderr, "Unable to allocate string of size %d\n", l); + return 1; + } + + strncpy(pattern[i], argv[i+3], (l+1)); + } + + + printf("Approximate Pattern Mathing: " + "looking for %d pattern(s) in file %s w/ distance of %d\n", + nb_patterns, filename, approx_factor); + + buf = read_input_file(filename, &n_bytes); + if (buf == NULL) + { + fprintf(stderr, "Error: NULL pointer from reading input file."); + return 1; + } + + n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/ + if (n_matches == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for %ldB\n", + nb_patterns * sizeof(int)); + return 1; + } + + /***** + * BEGIN MAIN LOOP + ******/ + + /* Timer start */ + gettimeofday(&t1, NULL); + + for (i = 0; i < nb_patterns; i++) + { + int size_pattern = strlen(pattern[i]); + int * column; + + n_matches[i] = 0; + + column = (int *)malloc((size_pattern+1) * sizeof(int)); + if (column == NULL) + { + fprintf(stderr, + "Error: unable to allocate memory for column (%ldB)\n", + (size_pattern+1) * sizeof(int)); + return 1; + } + + for (j = 0; j < n_bytes; j++) + { + int distance = 0; + int size; #if APM_DEBUG - if ( j % 100 == 0 ) - { - printf( "Procesing byte %d (out of %d)\n", j, n_bytes ) ; - } + if (j % 100 == 0) + { + printf("Procesing byte %d (out of %d)\n", j, n_bytes); + } #endif - size = size_pattern ; - if ( n_bytes - j < size_pattern ) - { - size = n_bytes - j ; - } + size = size_pattern; + if (n_bytes - j < size_pattern) + { + //size = n_bytes - j; + //NO ! we do not want to match substring of our input, wth + break; + } - distance = levenshtein( pattern[i], &buf[j], size, column ) ; + distance = levenshtein(pattern[i], &buf[j], size, column); - if ( distance <= approx_factor ) { - n_matches[i]++ ; - } - } + if (distance <= approx_factor) + { + n_matches[i]++; + } + } - free( column ); - } + free(column); + } - /* Timer stop */ - gettimeofday(&t2, NULL); + /* Timer stop */ + gettimeofday(&t2, NULL); - duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6); + duration = (t2.tv_sec -t1.tv_sec)+((t2.tv_usec-t1.tv_usec)/1e6); - printf( "APM done in %lf s\n", duration ) ; + printf("APM done in %lf s\n", duration); - /***** - * END MAIN LOOP - ******/ + /***** + * END MAIN LOOP + ******/ - for ( i = 0 ; i < nb_patterns ; i++ ) - { - printf( "Number of matches for pattern <%s>: %d\n", - pattern[i], n_matches[i] ) ; - } + for (i = 0; i < nb_patterns; i++) + { + printf("Number of matches for pattern <%s>: %d\n", + pattern[i], n_matches[i]); + } - return 0 ; + return 0; } diff --git a/Projet/CODE/apm/src/apm_omp.c b/Projet/CODE/apm/src/apm_omp.c index f0d611a..f5db3e5 100644 --- a/Projet/CODE/apm/src/apm_omp.c +++ b/Projet/CODE/apm/src/apm_omp.c @@ -14,56 +14,56 @@ char * read_input_file(char *filename, int *size) { - char *buf; - off_t fsize; - int fd = 0; - int n_bytes = 1; - - /* Open the text file */ - fd = open(filename, O_RDONLY); - if (fd == -1) - { - fprintf(stderr, "Unable to open the text file <%s>\n", filename); - return NULL; - } - - /* Get the number of characters in the textfile */ - fsize = lseek(fd, 0, SEEK_END); - lseek(fd, 0, SEEK_SET); - - /* TODO check return of lseek */ - - #if APM_DEBUG - printf("File length: %lld\n", fsize); - #endif - - /* Allocate data to copy the target text */ - buf = (char *) malloc(fsize * sizeof (char)); - if (buf == NULL) - { - fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", - fsize); - return NULL; - } - - n_bytes = read(fd, buf, fsize); - if (n_bytes != fsize) - { - fprintf(stderr, - "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", - fsize, n_bytes); - return NULL; - } - - #if APM_DEBUG - printf("Number of read bytes: %d\n", n_bytes); - #endif - - *size = n_bytes; - - close(fd); - - return buf; + char *buf; + off_t fsize; + int fd = 0; + int n_bytes = 1; + + /* Open the text file */ + fd = open(filename, O_RDONLY); + if (fd == -1) + { + fprintf(stderr, "Unable to open the text file <%s>\n", filename); + return NULL; + } + + /* Get the number of characters in the textfile */ + fsize = lseek(fd, 0, SEEK_END); + lseek(fd, 0, SEEK_SET); + + /* TODO check return of lseek */ + +#if APM_DEBUG + printf("File length: %lld\n", fsize); +#endif + + /* Allocate data to copy the target text */ + buf = (char *) malloc(fsize * sizeof (char)); + if (buf == NULL) + { + fprintf(stderr, "Unable to allocate %lld byte(s) for main array\n", + fsize); + return NULL; + } + + n_bytes = read(fd, buf, fsize); + if (n_bytes != fsize) + { + fprintf(stderr, + "Unable to copy %lld byte(s) from text file (%d byte(s) copied)\n", + fsize, n_bytes); + return NULL; + } + +#if APM_DEBUG + printf("Number of read bytes: %d\n", n_bytes); +#endif + + *size = n_bytes; + + close(fd); + + return buf; } @@ -73,183 +73,185 @@ read_input_file(char *filename, int *size) int levenshtein(char *s1, char *s2, int len, int * column) { - unsigned int x, y, lastdiag, olddiag; - - for (y = 1; y <= len; y++) - { - column[y] = y; - } - for (x = 1; x <= len; x++) { - column[0] = x; - lastdiag = x-1 ; - for (y = 1; y <= len; y++) { - olddiag = column[y]; - column[y] = MIN3( - column[y] + 1, - column[y-1] + 1, - lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1) - ); - lastdiag = olddiag; - } - } - return(column[len]); + unsigned int x, y, lastdiag, olddiag; + + for (y = 1; y <= len; y++) + { + column[y] = y; + } + for (x = 1; x <= len; x++) + { + column[0] = x; + lastdiag = x-1 ; + for (y = 1; y <= len; y++) + { + olddiag = column[y]; + column[y] = MIN3( + column[y] + 1, + column[y-1] + 1, + lastdiag + (s1[y-1] == s2[x-1] ? 0 : 1) + ); + lastdiag = olddiag; + } + } + return(column[len]); } -int +int main(int argc, char **argv) { - char **pattern; - char *filename; - int approx_factor = 0; - int nb_patterns = 0; - int i, j; - char *buf; - struct timeval t1, t2; - double duration; - int n_bytes; - int *n_matches; - - /* Check number of arguments */ - if (argc < 4) - { - printf("Usage: %s approximation_factor " - "dna_database pattern1 pattern2 ...\n", - argv[0]); - return 1; - } - - /* Get the distance factor */ - approx_factor = atoi(argv[1]); - - /* Grab the filename containing the target text */ - filename = argv[2]; - - /* Get the number of patterns that the user wants to search for */ - nb_patterns = argc - 3; - - /* Fill the pattern array */ - pattern = (char **)malloc(nb_patterns * sizeof(char *)); - if (pattern == NULL) - { - fprintf(stderr, - "Unable to allocate array of pattern of size %d\n", - nb_patterns ); - return 1; - } - - /* Grab the patterns */ - for (i = 0; i < nb_patterns; i++) - { - int l; - - l = strlen(argv[i+3]); - if (l <= 0) - { - fprintf(stderr, "Error while parsing argument %d\n", i + 3); - return 1; - } - - pattern[i] = (char *) malloc((l+1) * sizeof(char)); - if (pattern[i] == NULL) - { - fprintf(stderr, "Unable to allocate string of size %d\n", l); - return 1; - } - - strncpy(pattern[i], argv[i + 3], (l + 1)); - } - - - printf("Approximate Pattern Mathing: " - "looking for %d pattern(s) in file %s w/ distance of %d\n", - nb_patterns, filename, approx_factor); - - buf = read_input_file(filename, &n_bytes); - if (buf == NULL) - { - return 1; - } - - /* Allocate the array of matches */ - n_matches = (int *) malloc(nb_patterns * sizeof(int)); - if (n_matches == NULL) - { - fprintf(stderr, "Error: unable to allocate memory for %ldB\n", - nb_patterns * sizeof(int)); - return 1; - } - - /***** - * BEGIN MAIN LOOP - ******/ - - /* Timer start */ - gettimeofday(&t1, NULL); - - for (i = 0; i < nb_patterns; i++) - { - - int size_pattern = strlen(pattern[i]); - - int * column; - - n_matches[i] = 0; - - column = (int *) malloc((size_pattern + 1) * sizeof(int)); - if (column == NULL) - { - fprintf(stderr, "Error: unable to allocate memory for column " - "(%ldB)\n", - (size_pattern + 1) * sizeof(int)); - return 1; - } - - for (j = 0; j < n_bytes; j++) - { - int distance = 0; - int size; - - #if APM_DEBUG - if (j % 100 == 0) - { - printf("Procesing byte %d (out of %d)\n", j, n_bytes); - } - #endif - - size = size_pattern; - if (n_bytes - j < size_pattern) - { - size = n_bytes - j; - } - - distance = levenshtein(pattern[i], &buf[j], size, column); - - if (distance <= approx_factor) - { - n_matches[i]++; - } - } - - free(column); - } - - /* Timer stop */ - gettimeofday(&t2, NULL); - - duration = (t2.tv_sec - t1.tv_sec) + ((t2.tv_usec - t1.tv_usec) / 1e6); - - printf("APM done in %lf s\n", duration); - - /***** - * END MAIN LOOP - ******/ - - for (i = 0; i < nb_patterns; i++) - { - printf("Number of matches for pattern <%s>: %d\n", - pattern[i], n_matches[i]); - } - - return 0; + char **pattern; + char *filename; + int approx_factor = 0; + int nb_patterns = 0; + int i, j; + char *buf; + struct timeval t1, t2; + double duration; + int n_bytes; + int *n_matches; + + /* Check number of arguments */ + if (argc < 4) + { + printf("Usage: %s approximation_factor " + "dna_database pattern1 pattern2 ...\n", + argv[0]); + return 1; + } + + /* Get the distance factor */ + approx_factor = atoi(argv[1]); + + /* Grab the filename containing the target text */ + filename = argv[2]; + + /* Get the number of patterns that the user wants to search for */ + nb_patterns = argc - 3; + + /* Fill the pattern array */ + pattern = (char **)malloc(nb_patterns * sizeof(char *)); + if (pattern == NULL) + { + fprintf(stderr, + "Unable to allocate array of pattern of size %d\n", + nb_patterns ); + return 1; + } + + /* Grab the patterns */ + for (i = 0; i < nb_patterns; i++) + { + int l; + + l = strlen(argv[i+3]); + if (l <= 0) + { + fprintf(stderr, "Error while parsing argument %d\n", i + 3); + return 1; + } + + pattern[i] = (char *) malloc((l+1) * sizeof(char)); + if (pattern[i] == NULL) + { + fprintf(stderr, "Unable to allocate string of size %d\n", l); + return 1; + } + + strncpy(pattern[i], argv[i + 3], (l + 1)); + } + + + printf("Approximate Pattern Mathing: " + "looking for %d pattern(s) in file %s w/ distance of %d\n", + nb_patterns, filename, approx_factor); + + buf = read_input_file(filename, &n_bytes); + if (buf == NULL) + { + return 1; + } + + /* Allocate the array of matches */ + n_matches = (int *) malloc(nb_patterns * sizeof(int)); + if (n_matches == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for %ldB\n", + nb_patterns * sizeof(int)); + return 1; + } + + /***** + * BEGIN MAIN LOOP + ******/ + + /* Timer start */ + gettimeofday(&t1, NULL); + + for (i = 0; i < nb_patterns; i++) + { + + int size_pattern = strlen(pattern[i]); + + int * column; + + n_matches[i] = 0; + + column = (int *) malloc((size_pattern + 1) * sizeof(int)); + if (column == NULL) + { + fprintf(stderr, "Error: unable to allocate memory for column " + "(%ldB)\n", + (size_pattern + 1) * sizeof(int)); + return 1; + } + + for (j = 0; j < n_bytes; j++) + { + int distance = 0; + int size; + +#if APM_DEBUG + if (j % 100 == 0) + { + printf("Procesing byte %d (out of %d)\n", j, n_bytes); + } +#endif + + size = size_pattern; + if (n_bytes - j < size_pattern) + { + size = n_bytes - j; + } + + distance = levenshtein(pattern[i], &buf[j], size, column); + + if (distance <= approx_factor) + { + n_matches[i]++; + } + } + + free(column); + } + + /* Timer stop */ + gettimeofday(&t2, NULL); + + duration = (t2.tv_sec - t1.tv_sec) + ((t2.tv_usec - t1.tv_usec) / 1e6); + + printf("APM done in %lf s\n", duration); + + /***** + * END MAIN LOOP + ******/ + + for (i = 0; i < nb_patterns; i++) + { + printf("Number of matches for pattern <%s>: %d\n", + pattern[i], n_matches[i]); + } + + return 0; } -- GitLab