Skip to content
Extraits de code Groupes Projets
Valider a8ffa431 rédigé par Nicolas MARIE's avatar Nicolas MARIE
Parcourir les fichiers

fixe to support more than 2G files

parent 68e1f3e5
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -2,7 +2,7 @@ INCLUDED=include
CC=gcc
NVCC=nvcc
CFLAGS=-O3 -I $(INCLUDED) -pg -g -Wall
CFLAGS=-O3 -I $(INCLUDED) -pg -g -Wall -Wextra
CFLAGS_OMP= -fopenmp -DUSE_OMP $(CFLAGS)
CFLAGS_CU=-O3 -Xcompiler "$(CFLAGS)"
LDFLAGS=
......@@ -46,7 +46,6 @@ style:
--pad-header \
--unpad-paren \
--unpad-brackets \
--delete-empty-lines \
--squeeze-lines=4 \
--squeeze-ws \
--align-pointer=name \
......
......@@ -10,15 +10,18 @@
#include <unistd.h>
#include <sys/time.h>
#define APM_DEBUG 0
char *
read_input_file(char *filename, int *size)
read_input_file(char *filename, size_t *size)
{
char *buf;
off_t fsize;
size_t fsize;
int fd = 0;
int n_bytes = 1;
size_t read_bytes = 0;
size_t total_bytes = 0;
/* Open the text file */
fd = open(filename, O_RDONLY);
......@@ -33,10 +36,11 @@ read_input_file(char *filename, int *size)
lseek(fd, 0, SEEK_SET);
/* TODO check return of lseek */
#if APM_DEBUG
printf("File length: %lld\n", fsize);
printf("File length: %ld\n", fsize);
#endif
/* Allocate data to copy the target text */
buf = (char *)malloc(fsize * sizeof(char));
buf = malloc(fsize * sizeof(char));
if (buf == NULL)
{
......@@ -45,21 +49,27 @@ read_input_file(char *filename, int *size)
return NULL;
}
n_bytes = read(fd, buf, fsize);
do
{
read_bytes = read(fd, buf, fsize - total_bytes);
total_bytes += read_bytes;
}
while (read_bytes != 0);
if (n_bytes != fsize)
if (total_bytes != fsize)
{
fprintf(stderr,
"Unable to copy %ld byte(s) from text file "
"(%d byte(s) copied)\n",
fsize, n_bytes);
"(%ld byte(s) copied)\n",
fsize, total_bytes);
return NULL;
}
#if APM_DEBUG
printf("Number of read bytes: %d\n", n_bytes);
printf("Number of read bytes: %ld\n", total_bytes);
#endif
*size = n_bytes;
*size = total_bytes;
close(fd);
return buf;
}
......@@ -67,7 +77,7 @@ read_input_file(char *filename, int *size)
#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c)))
int
levenshtein(char *s1, char *s2, int len, int *column)
levenshtein(char *s1, char *s2, unsigned int len, unsigned int *column)
{
unsigned int x, y, lastdiag, olddiag;
......@@ -101,13 +111,13 @@ main(int argc, char **argv)
char **pattern;
char *filename;
int approx_factor = 0;
int nb_patterns = 0;
int i, j;
size_t nb_patterns = 0;
size_t i, j;
char *buf;
struct timeval t1, t2;
double duration;
int n_bytes;
int *n_matches;
size_t n_bytes;
size_t *n_matches;
/* Check number of arguments */
if (argc < 4)
......@@ -121,12 +131,12 @@ main(int argc, char **argv)
approx_factor = atoi(argv[1]);/* Get the distance factor */
filename = argv[2];/* Grab the filename containing the target text */
nb_patterns = argc - 3;/* Get the number of patterns to search for */
pattern = (char **)malloc(nb_patterns * sizeof(char *));
pattern = malloc(nb_patterns * sizeof(char *));
if (pattern == NULL)/*Fill the pattern*/
{
fprintf(stderr,
"Unable to allocate array of pattern of size %d\n",
"Unable to allocate array of pattern of size %ld\n",
nb_patterns);
return 1;
}
......@@ -138,7 +148,7 @@ main(int argc, char **argv)
if (l <= 0)
{
fprintf(stderr, "Error while parsing argument %d\n", i + 3);
fprintf(stderr, "Error while parsing argument %ld\n", i + 3);
return 1;
}
......@@ -154,7 +164,7 @@ main(int argc, char **argv)
}
printf("Approximate Pattern Mathing: "
"looking for %d pattern(s) in file %s w/ distance of %d\n",
"looking for %ld pattern(s) in file %s w/ distance of %d\n",
nb_patterns, filename, approx_factor);
buf = read_input_file(filename, &n_bytes);
......@@ -164,7 +174,7 @@ main(int argc, char **argv)
return 1;
}
n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/
n_matches = malloc(nb_patterns * sizeof(size_t));/*Alloc the matches*/
if (n_matches == NULL)
{
......@@ -181,41 +191,40 @@ main(int argc, char **argv)
for (i = 0; i < nb_patterns; i++)
{
int size_pattern = strlen(pattern[i]);
int *column;
size_t size_pattern = strlen(pattern[i]);
unsigned int *column;
n_matches[i] = 0;
column = (int *)malloc((size_pattern + 1) * sizeof(int));
column = malloc((size_pattern + 1) * sizeof(unsigned int));
if (column == NULL)
{
fprintf(stderr,
"Error: unable to allocate memory for column (%ldB)\n",
(size_pattern + 1) * sizeof(int));
(size_pattern + 1) * sizeof(unsigned int));
return 1;
}
for (j = 0; j < n_bytes; j++)
{
int distance = 0;
int size;
#if APM_DEBUG
if (j % 100 == 0)
if (j % (n_bytes / 100) == 0)
{
printf("Procesing byte %d (out of %d)\n", j, n_bytes);
printf("Procesing byte %ld (out of %ld)(%ld%%)\n",
j, n_bytes, j / (n_bytes / 100));
}
#endif
size = size_pattern;
if (n_bytes - j < size_pattern)
{
//size = n_bytes - j;
//NO ! we do not want to match substring of our input, wth
break;
continue;
}
distance = levenshtein(pattern[i], &buf[j], size, column);
distance = levenshtein(pattern[i], &buf[j], size_pattern, column);
if (distance <= approx_factor)
{
......@@ -237,7 +246,7 @@ main(int argc, char **argv)
for (i = 0; i < nb_patterns; i++)
{
printf("Number of matches for pattern <%s>: %d\n",
printf("Number of matches for pattern <%s>: %ld\n",
pattern[i], n_matches[i]);
}
......
......@@ -15,12 +15,14 @@
#define APM_DEBUG 0
char *
read_input_file(char *filename, int *size)
read_input_file(char *filename, size_t *size)
{
char *buf;
off_t fsize;
size_t fsize;
int fd = 0;
int n_bytes = 1;
size_t read_bytes = 0;
size_t total_bytes = 0;
/* Open the text file */
fd = open(filename, O_RDONLY);
......@@ -35,10 +37,11 @@ read_input_file(char *filename, int *size)
lseek(fd, 0, SEEK_SET);
/* TODO check return of lseek */
#if APM_DEBUG
printf("File length: %lld\n", fsize);
printf("File length: %ld\n", fsize);
#endif
/* Allocate data to copy the target text */
buf = (char *)malloc(fsize * sizeof(char));
buf = malloc(fsize * sizeof(char));
if (buf == NULL)
{
......@@ -47,21 +50,27 @@ read_input_file(char *filename, int *size)
return NULL;
}
n_bytes = read(fd, buf, fsize);
do
{
read_bytes = read(fd, buf, fsize - total_bytes);
total_bytes += read_bytes;
}
while (read_bytes != 0);
if (n_bytes != fsize)
if (total_bytes != fsize)
{
fprintf(stderr,
"Unable to copy %ld byte(s) from text file "
"(%d byte(s) copied)\n",
fsize, n_bytes);
"(%ld byte(s) copied)\n",
fsize, total_bytes);
return NULL;
}
#if APM_DEBUG
printf("Number of read bytes: %d\n", n_bytes);
printf("Number of read bytes: %ld\n", total_bytes);
#endif
*size = n_bytes;
*size = total_bytes;
close(fd);
return buf;
}
......@@ -69,7 +78,7 @@ read_input_file(char *filename, int *size)
#define MIN3(a, b, c) ((a)<(b) ? ((a)<(c) ? (a) : (c)) : ((b)<(c) ? (b) : (c)))
int
levenshtein(char *s1, char *s2, int len, int *column)
levenshtein(char *s1, char *s2, unsigned int len, unsigned int *column)
{
unsigned int x, y, lastdiag, olddiag;
......@@ -103,13 +112,13 @@ main(int argc, char **argv)
char **pattern;
char *filename;
int approx_factor = 0;
int nb_patterns = 0;
int i, j;
size_t nb_patterns = 0;
size_t i, j;
char *buf;
struct timeval t1, t2;
double duration;
int n_bytes;
int *n_matches;
size_t n_bytes;
size_t *n_matches;
int num_threads;
/* Check number of arguments */
......@@ -124,12 +133,12 @@ main(int argc, char **argv)
approx_factor = atoi(argv[1]);/* Get the distance factor */
filename = argv[2];/* Grab the filename containing the target text */
nb_patterns = argc - 3;/* Get the number of patterns to search for */
pattern = (char **)malloc(nb_patterns * sizeof(char *));
pattern = malloc(nb_patterns * sizeof(char *));
if (pattern == NULL)/*Fill the pattern*/
{
fprintf(stderr,
"Unable to allocate array of pattern of size %d\n",
"Unable to allocate array of pattern of size %ld\n",
nb_patterns);
return 1;
}
......@@ -141,7 +150,7 @@ main(int argc, char **argv)
if (l <= 0)
{
fprintf(stderr, "Error while parsing argument %d\n", i + 3);
fprintf(stderr, "Error while parsing argument %ld\n", i + 3);
return 1;
}
......@@ -157,7 +166,7 @@ main(int argc, char **argv)
}
printf("Approximate Pattern Mathing: "
"looking for %d pattern(s) in file %s w/ distance of %d\n",
"looking for %ld pattern(s) in file %s w/ distance of %d\n",
nb_patterns, filename, approx_factor);
buf = read_input_file(filename, &n_bytes);
......@@ -167,7 +176,7 @@ main(int argc, char **argv)
return 1;
}
n_matches = (int *)malloc(nb_patterns * sizeof(int));/*Alloc the matches*/
n_matches = malloc(nb_patterns * sizeof(size_t));/*Alloc the matches*/
if (n_matches == NULL)
{
......@@ -180,6 +189,11 @@ main(int argc, char **argv)
{
num_threads = omp_get_num_threads();
}
#if APM_DEBUG
printf("Number of threads: %d\n", num_threads);
#endif
/*****
* BEGIN MAIN LOOP
******/
......@@ -188,32 +202,32 @@ main(int argc, char **argv)
for (i = 0; i < nb_patterns; i++)
{
int size_pattern = strlen(pattern[i]);
int *column;
int distance = 0;
size_t size_pattern = strlen(pattern[i]);
unsigned int *column;
n_matches[i] = 0;
column = (int *)malloc((size_pattern + 1) * num_threads * sizeof(int));
column = malloc((size_pattern + 1) * num_threads * sizeof(unsigned int));
if (column == NULL)
{
fprintf(stderr,
"Error: unable to allocate memory for column (%ldB)\n",
(size_pattern + 1) * sizeof(int) * n_bytes);
(size_pattern + 1) * sizeof(unsigned int) * num_threads);
return 1;
}
int distance = 0;
int matches = 0;
#pragma omp parallel for reduction(+:matches) private(distance)
for (j = 0; j < n_bytes; j++)
{
#if APM_DEBUG
if (j % 100 == 0)
#if APM_DEBUG
if (j % (n_bytes / 100) == 0)
{
printf("Procesing byte %d (out of %d)\n", j, n_bytes);
printf("Procesing byte %ld (out of %ld)(%ld%%)\n",
j, n_bytes, j / (n_bytes / 100));
}
#endif
if (n_bytes - j < size_pattern)
......@@ -248,7 +262,7 @@ main(int argc, char **argv)
for (i = 0; i < nb_patterns; i++)
{
printf("Number of matches for pattern <%s>: %d\n",
printf("Number of matches for pattern <%s>: %ld\n",
pattern[i], n_matches[i]);
}
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Veuillez vous inscrire ou vous pour commenter