diff --git a/TPs/TP3/CODE/AXPY/axpy.c b/TPs/TP3/CODE/AXPY/axpy.c new file mode 100755 index 0000000000000000000000000000000000000000..2356954914ad85c3f86039c34f7c534982b903d4 --- /dev/null +++ b/TPs/TP3/CODE/AXPY/axpy.c @@ -0,0 +1,31 @@ +#include <stdio.h> +#include <stdlib.h> +#include "omp.h" + +int main(int argc, char** argv) +{ + double alpha = 2; + double *X = NULL, *Y = NULL; + int N = 1000; + if (argc > 1) N = atoi(argv[1]); + + X = (double*) malloc(sizeof(double) * N); + Y = (double*) malloc(sizeof(double) * N); + + for(int i = 0; i < N; ++i) + { + X[i] = i; + Y[i] = X[i] + i; + } + + for(int i = 0; i < N; ++i) + { + Y[i] += alpha * X[i]; + } + + int stop = (N > 5)?5:N; + for(int i = 0; i < stop; ++i) + fprintf(stdout, "Y[%d] = %f\n", i, Y[i]); + + return 0; +} diff --git a/TPs/TP3/CODE/DGEMM/Makefile b/TPs/TP3/CODE/DGEMM/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..645485d0a4969d2113063ffe05db79d7182c7c3c --- /dev/null +++ b/TPs/TP3/CODE/DGEMM/Makefile @@ -0,0 +1,21 @@ +CC=clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 +CFLAGS=-O3 -Wall +LDFLAGS=-lm -lrt + +EXEC=dgemm.exe +SRC=$(wildcard *.c) +OBJ=$(SRC:.c=.o) + +all : $(EXEC) + +$(EXEC) : $(OBJ) + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +%.o : %.c + $(CC) $(CFLAGS) -o $@ -c $< + +proper : + rm -f *.o + +clean : proper + rm -f $(EXEC) diff --git a/TPs/TP3/CODE/DGEMM/dgemm.c b/TPs/TP3/CODE/DGEMM/dgemm.c new file mode 100755 index 0000000000000000000000000000000000000000..817910f167789628a8d9885794508f5b6b8d2176 --- /dev/null +++ b/TPs/TP3/CODE/DGEMM/dgemm.c @@ -0,0 +1,81 @@ +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <omp.h> + +typedef struct timespec struct_time; +#define gettime(t) clock_gettime(CLOCK_MONOTONIC_RAW, t) +#define get_sub_seconde(t) (1e-9*(double)t.tv_nsec) +/** return time in second +*/ +double get_elapsedtime(void) +{ + struct_time st; + int err = gettime(&st); + if (err !=0) return 0; + return (double)st.tv_sec + get_sub_seconde(st); +} + +void mult(int N, float *A, float* B, float* C) +{ + int i = 0, j = 0, k = 0; + int total_size = N*N; + for(i = 0; i < N; ++i) + { + for(j = 0; j < N; ++j) + { + float val = 0.; + for(k = 0; k < N; ++k) + { + val += A[i * N + k] * B[k * N + j]; + } + C[i * N + j] = val; + } + } +} + +int main(int argc, char** argv) +{ + double t0 = 0., t1 = 0., duration = 0.; + + float *A = NULL; + float *B = NULL; + float *C = NULL; + + int i = 0; + + int N = 1024; + if(argc > 1) + { + N = atoi(argv[1]); + } + + fprintf(stdout, "> Matrix Multiplication Kernel...\n"); + fprintf(stdout, " Size: %dx%d\n", N, N); +#pragma omp parallel default(none) shared(stdout) + { +#pragma omp single + fprintf(stdout, " Running on %d threads\n", omp_get_num_threads()); + } + + A = (float*) calloc(N * N, sizeof(float)); + B = (float*) calloc(N * N, sizeof(float)); + C = (float*) calloc(N * N, sizeof(float)); + + for(i = 0; i < N * N; ++i) + { + A[i] = 1. * i; + B[i] = N*N - (1. * i); + C[i] = 0.; + } + + t0 = get_elapsedtime(); + mult(N, A, B, C); + t1 = get_elapsedtime(); + + duration = (t1 - t0); + + fprintf(stdout, " Elapsed Time : %f\n", duration); + + return 0; +} diff --git a/TPs/TP3/CODE/ONGPU/Makefile b/TPs/TP3/CODE/ONGPU/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..645485d0a4969d2113063ffe05db79d7182c7c3c --- /dev/null +++ b/TPs/TP3/CODE/ONGPU/Makefile @@ -0,0 +1,21 @@ +CC=clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_60 +CFLAGS=-O3 -Wall +LDFLAGS=-lm -lrt + +EXEC=dgemm.exe +SRC=$(wildcard *.c) +OBJ=$(SRC:.c=.o) + +all : $(EXEC) + +$(EXEC) : $(OBJ) + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +%.o : %.c + $(CC) $(CFLAGS) -o $@ -c $< + +proper : + rm -f *.o + +clean : proper + rm -f $(EXEC) diff --git a/TPs/TP3/CODE/ONGPU/ongpu.c b/TPs/TP3/CODE/ONGPU/ongpu.c new file mode 100755 index 0000000000000000000000000000000000000000..8a9260c7fac585f62364f7848207ff274679c730 --- /dev/null +++ b/TPs/TP3/CODE/ONGPU/ongpu.c @@ -0,0 +1,21 @@ +#include <stdio.h> +#include <omp.h> + +int main() +{ + int runningOnGPU = 0; + fprintf(stderr, "OpenMP Version: %d\n", _OPENMP); + /* Test if GPU is available using OpenMP4.5 */ +#pragma omp target map(from:runningOnGPU) + { + if (omp_is_initial_device() == 0) + runningOnGPU = 1; + } + /* If still running on CPU, GPU must not be available */ + if (runningOnGPU) + printf("### Able to use the GPU! ### \n"); + else + printf("### Unable to use the GPU, using CPU! ###\n"); + + return 0; +} diff --git a/TPs/TP3/CODE/SpMV/CSRMatrix.c b/TPs/TP3/CODE/SpMV/CSRMatrix.c new file mode 100755 index 0000000000000000000000000000000000000000..b7b48e34f08642ed894df1f9d4608f1e47d1699b --- /dev/null +++ b/TPs/TP3/CODE/SpMV/CSRMatrix.c @@ -0,0 +1,365 @@ +#include "CSRMatrix.h" +#include "omp.h" + +/** return time in second +*/ +double get_elapsedtime(void) +{ + struct_time st; + int err = gettime(&st); + if (err !=0) return 0; + return (double)st.tv_sec + get_sub_seconde(st); +} + +void init_CSR(CSRMatrix_t* A, int nrows, int nnz) +{ + A->m_nrows = nrows ; + A->m_nnz = nnz ; + A->m_values = (double*) malloc(nnz * sizeof(double) ); + A->m_cols = (int*) malloc(nnz * sizeof(int) ); + A->m_kcol = (int*) malloc((nnz + 1) * sizeof(int) ); +} + +void destruct_CSR(CSRMatrix_t* A) +{ + free(A->m_values); + free(A->m_cols); + free(A->m_kcol); +} + +void mult_CSR(CSRMatrix_t* A, double const* x, double* y) +{ + int i = 0, k = 0, nrows = A->m_nrows; + double val = 0.; + double* values = A->m_values; + int* kcol = A->m_kcol; + int* cols = A->m_cols; + + for(i=0;i<nrows;++i) + { + val = 0. ; + for(k=kcol[i];k<kcol[i+1];++k) + val += values[k]*x[cols[k]] ; + y[i] = val ; + } +} + +void mult_CSR_task(CSRMatrix_t* A, double const* x, double* y, int start, int end) +{ + int i = 0, k = 0, nrows = A->m_nrows; + double val = 0.; + double* values = A->m_values; + int* kcol = A->m_kcol; + int* cols = A->m_cols; + + if( start < 0 || start > nrows || end <0 || end > nrows) + { + fprintf(stderr, "FATAL ERROR !\n"); + return; + } + + for(i = start; i<end; ++i) + { + val = 0. ; + for(k=kcol[i];k<kcol[i+1];++k) + val += values[k]*x[cols[k]] ; + y[i] = val ; + } +} + +void print_CSR(CSRMatrix_t* A) +{ + int i = 0, k = 0, nrows = A->m_nrows; + double* values = A->m_values; + int* kcol = A->m_kcol; + int* cols = A->m_cols; + fprintf(stdout, "NROWS: %d | NNZ: %d\n", nrows, A->m_nnz); + for(i=0;i<nrows;++i) + { + fprintf(stdout, "ROW [%d]\n\t", i); + for(k=kcol[i];k<kcol[i+1];++k) + fprintf(stdout, "(%d: %f) ", cols[k], values[k]); + fprintf(stdout, "\n"); + } +} + +int hat(int i,int n) +{ + return max(0,min(i,n-1)) ; +} + +int uid(int i,int j,int nx,int ny) +{ + return hat(j,ny)*nx+hat(i,nx) ; +} + +double _trans_m_i(double* perm,int i,int j,int nx,int ny) +{ + double p1 = perm[uid(i-1,j,nx,ny)] ; + double p2 = perm[uid(i,j,nx,ny)] ; + return p1*p2/(p1+p2) ; +} + +double _trans_p_i(double* perm,int i,int j,int nx,int ny) +{ + double p1 = perm[uid(i+1,j,nx,ny)] ; + double p2 = perm[uid(i,j,nx,ny)] ; + return p1*p2/(p1+p2) ; +} + +double _trans_m_j(double* perm,int i,int j,int nx, int ny) +{ + double p1 = perm[uid(i,j-1,nx,ny)] ; + double p2 = perm[uid(i,j,nx,ny)] ; + return p1*p2/(p1+p2) ; +} + +double _trans_p_j(double* perm,int i,int j,int nx, int ny) +{ + double p1 = perm[uid(i,j+1,nx,ny)] ; + double p2 = perm[uid(i,j,nx,ny)] ; + return p1*p2/(p1+p2) ; +} + +void buildLaplacian(CSRMatrix_t* matrix, + int nx, int ny) +{ + + int i=0,j=0, nrows = nx*ny; + int nnz = 5*(nx-2)*(ny-2) + (nx+ny-4)*8+4*3 ; + fprintf(stdout, "NROWS : %d\n", nrows) ; + fprintf(stdout, "NNZ : %d\n\n", nnz) ; + init_CSR(matrix,nrows,nnz) ; + double* m_permitivity = NULL ; + m_permitivity = (double*) malloc(nrows * sizeof(double) ); + for(i = 0; i < nrows; ++i) m_permitivity[i] = 1.; + + int* cols = matrix->m_cols ; + int* kcol = matrix->m_kcol ; + double* values = matrix->m_values ; + int irow =0 ; + int offset = 0 ; + { + j=0 ; + { + i=0 ; + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 3 ; + kcol[irow] = offset ; + cols[offset] = irow ; + cols[offset+1] = irow+1 ; + cols[offset+2] = irow+nx ; + values[offset] = T_p_i + T_p_j ; + { + values[offset] += T_m_i; + } + { + values[offset] += T_m_j; + } + values[offset+1] = -T_p_i ; + values[offset+2] = -T_p_j ; + offset += row_size ; + ++irow ; + } + for(i=1;i<nx-1;++i) + { + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 4 ; + kcol[irow] = offset ; + cols[offset] = irow-1 ; + cols[offset+1] = irow ; + cols[offset+2] = irow+1 ; + cols[offset+3] = irow+nx ; + values[offset] = -T_m_i ; + values[offset+1] = T_m_i+T_p_i+T_p_j ; + { + values[offset+1] += T_m_j; + } + values[offset+2] = -T_p_i ; + values[offset+3] = -T_p_j ; + offset += row_size ; + ++irow ; + } + { + i=nx-1 ; + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 3 ; + kcol[irow] = offset ; + cols[offset] = irow-1 ; + cols[offset+1] = irow ; + cols[offset+2] = irow+nx ; + values[offset] = -T_m_i ; + values[offset+1] = T_m_i+T_p_j ; + values[offset+2] = -T_p_j ; + { + values[offset+1] += T_p_i; + } + { + values[offset+1] += T_m_j; + } + offset += row_size ; + ++irow ; + } + } + for(j=1;j<ny-1;++j) + { + { + i=0 ; + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 4 ; + kcol[irow] = offset ; + cols[offset] = irow-nx ; + cols[offset+1] = irow ; + cols[offset+2] = irow+1 ; + cols[offset+3] = irow+nx ; + values[offset] = -T_m_j ; + values[offset+1] = T_m_j+T_p_i+T_p_j ; + values[offset+2] = -T_p_i ; + values[offset+3] = -T_p_j ; + { + values[offset+1] += T_m_i; + } + offset += row_size ; + ++irow ; + } + for(i=1;i<nx-1;++i) + { + int row_size = 5 ; + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + kcol[irow] = offset ; + cols[offset] = irow-nx ; + cols[offset+1] = irow -1; + cols[offset+2] = irow ; + cols[offset+3] = irow+1 ; + cols[offset+4] = irow+nx ; + values[offset] = -T_m_j ; + values[offset+1] = -T_m_i ; + values[offset+2] = T_m_j+T_m_i+T_p_i+T_p_j ; + values[offset+3] = -T_p_i ; + values[offset+4] = -T_p_j ; + offset += row_size ; + ++irow ; + } + { + i=nx-1 ; + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 4 ; + kcol[irow] = offset ; + cols[offset] = irow-nx ; + cols[offset+1] = irow-1 ; + cols[offset+2] = irow ; + cols[offset+3] = irow+nx ; + values[offset] = -T_m_j ; + values[offset+1] = -T_m_i ; + values[offset+2] = T_m_j+T_m_i+T_p_j ; + values[offset+3] = -T_p_j ; + + { + values[offset+2] += T_p_i; + } + offset += row_size ; + ++irow ; + } + } + { + j=ny-1 ; + { + i=0 ; + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 3 ; + kcol[irow] = offset ; + cols[offset] = irow-nx ; + cols[offset+1] = irow ; + cols[offset+2] = irow+1 ; + values[offset] = -T_m_j ; + values[offset+1] = T_m_j+T_p_i ; + values[offset+2] = -T_p_i ; + { + values[offset+1] += T_m_i; + } + { + values[offset+1] += T_p_j; + } + offset += row_size ; + ++irow ; + } + for(i=1;i<nx-1;++i) + { + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 4 ; + kcol[irow] = offset ; + cols[offset] = irow-nx ; + cols[offset+1] = irow-1 ; + cols[offset+2] = irow ; + cols[offset+3] = irow+1 ; + values[offset] = -T_m_j ; + values[offset+1] = -T_m_i ; + values[offset+2] = T_m_j+T_m_i+T_p_i ; + values[offset+3] = -T_p_i ; + + { + values[offset+2] += T_p_j; + } + offset += row_size ; + ++irow ; + } + { + i=nx-1 ; + double T_m_i = _trans_m_i(m_permitivity,i,j,nx,ny) ; + double T_p_i = _trans_p_i(m_permitivity,i,j,nx,ny) ; + double T_m_j = _trans_m_j(m_permitivity,i,j,nx,ny) ; + double T_p_j = _trans_p_j(m_permitivity,i,j,nx,ny) ; + + int row_size = 3 ; + kcol[irow] = offset ; + cols[offset] = irow-nx ; + cols[offset+1] = irow -1 ; + cols[offset+2] = irow ; + values[offset] = -T_m_j ; + values[offset+1] = -T_m_i ; + values[offset+2] = T_m_j+T_m_i ; + { + values[offset+2] += T_p_i; + } + { + values[offset+2] += T_p_j; + } + offset += row_size ; + ++irow ; + } + } + kcol[irow] = offset ; + //fprintf(stdout, "NROW : %d NNZ : %d\n",irow, offset) ; +} diff --git a/TPs/TP3/CODE/SpMV/CSRMatrix.h b/TPs/TP3/CODE/SpMV/CSRMatrix.h new file mode 100755 index 0000000000000000000000000000000000000000..43eccebb79ba5c5665c42519564d185b08a56716 --- /dev/null +++ b/TPs/TP3/CODE/SpMV/CSRMatrix.h @@ -0,0 +1,44 @@ +#ifndef CSRMATRIX_H +#define CSRMATRIX_H +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +typedef struct timespec struct_time; +#define gettime(t) clock_gettime(CLOCK_MONOTONIC_RAW, t) +#define get_sub_seconde(t) (1e-9*(double)t.tv_nsec) +double get_elapsedtime(void); + +#define max(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a > _b ? _a : _b; }) + +#define min(a,b) \ + ({ __typeof__ (a) _a = (a); \ + __typeof__ (b) _b = (b); \ + _a < _b ? _a : _b; }) + +typedef struct +{ + int m_nrows, m_nnz ; + double* m_values ; + int* m_cols ; + int* m_kcol ; +} CSRMatrix_t; + +void init_CSR(CSRMatrix_t* A, int nrows, int nnz); +void destruct_CSR(CSRMatrix_t* A); +void mult_CSR(CSRMatrix_t* A, double const* x, double* y); +void mult_CSR_task(CSRMatrix_t* A, double const* x, double* y, int start, int end); +void print_CSR(CSRMatrix_t* A); +int hat(int i,int n); +int uid(int i,int j,int nx,int ny); +double _trans_m_i(double* perm,int i,int j,int nx,int ny); +double _trans_p_i(double* perm,int i,int j,int nx,int ny); +double _trans_m_j(double* perm,int i,int j,int nx, int ny); +double _trans_p_j(double* perm,int i,int j,int nx, int ny); +void buildLaplacian(CSRMatrix_t* matrix, int nx, int ny); + +#endif diff --git a/TPs/TP3/CODE/SpMV/EllMatrix.c b/TPs/TP3/CODE/SpMV/EllMatrix.c new file mode 100755 index 0000000000000000000000000000000000000000..f1b970966be18cffb2ba77a0fcdebd058299bc9f --- /dev/null +++ b/TPs/TP3/CODE/SpMV/EllMatrix.c @@ -0,0 +1,51 @@ +#include "EllMatrix.h" + +void init_Ell(EllMatrix_t* A, int nrows, int row_width) +{ + A->m_values = (double*) calloc(nrows * row_width, sizeof(double)); + A->m_cols = (int*) calloc(nrows * row_width, sizeof(int)); + A->m_nrows = nrows; + A->m_nnz = nrows * row_width; + A->m_row_width = row_width; +} + +void convert_from_CSR(CSRMatrix_t* A, EllMatrix_t* B) +{ + B->m_nrows = A->m_nrows; + B->m_nnz = A->m_nnz; + int max_row_width = -1; + for(int i = 0; i < B->m_nrows; ++i) + { + int tmp = A->m_kcol[i+1] - A->m_kcol[i]; + if(tmp > max_row_width) + { + max_row_width = tmp; + } + } + B->m_row_width = max_row_width; + +#ifdef DEBUG + fprintf(stdout, "Max row width: %d\n", max_row_width); + fprintf(stdout, "Matrix Size: %d\n", max_row_width * B->m_nrows); +#endif + + B->m_values = (double*) calloc(B->m_nrows * max_row_width, sizeof(double)); + B->m_cols = (int*) calloc(B->m_nrows * max_row_width, sizeof(int)); + + for(int i = 0; i < A->m_nrows; ++i) + { + for(int k = A->m_kcol[i], j = 0; k < A->m_kcol[i+1]; ++k, ++j) + { + B->m_cols[ (i * max_row_width) + j ] = A->m_cols[k]; + B->m_values[(i * max_row_width) + j ] = A->m_values[k]; + } + } +} + +void mult_Ell(EllMatrix_t* A, double const* x, double* y) +{ + const int N = A->m_nrows; + const int row_width = A->m_row_width; + + /* A COMPLETER */ +} diff --git a/TPs/TP3/CODE/SpMV/EllMatrix.h b/TPs/TP3/CODE/SpMV/EllMatrix.h new file mode 100755 index 0000000000000000000000000000000000000000..f1e51f342667453707b13871f99b9368650c180e --- /dev/null +++ b/TPs/TP3/CODE/SpMV/EllMatrix.h @@ -0,0 +1,20 @@ +#ifndef ELLMATRIX_H +#define ELLMATRIX_H +#include <inttypes.h> +#include <stdio.h> +#include <stdlib.h> +#include "CSRMatrix.h" + +typedef struct +{ + int m_nrows, m_nnz ; + int m_row_width ; + double* m_values ; + int* m_cols ; +} EllMatrix_t; + +void init_Ell(EllMatrix_t* A, int nrows, int row_width); +void convert_from_CSR(CSRMatrix_t* A, EllMatrix_t* B); +void mult_Ell(EllMatrix_t* A, double const* x, double* y); + +#endif diff --git a/TPs/TP3/CODE/SpMV/Makefile b/TPs/TP3/CODE/SpMV/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..5874b898fcc24ddb84f30560dd2d175e7ec78a26 --- /dev/null +++ b/TPs/TP3/CODE/SpMV/Makefile @@ -0,0 +1,24 @@ +CC=clang -fopenmp +CFLAGS=-g -Wall +LDFLAGS=-lm -lrt + +EXEC=spmv.exe +SRC=$(wildcard *.c) +OBJ=$(SRC:.c=.o) + +all : $(EXEC) + +$(EXEC) : main.o CSRMatrix.o EllMatrix.o + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +main_seq.o : main.c + $(CC) -D SEQ $(CFLAGS) -o $@ -c $< + +%.o : %.c + $(CC) $(CFLAGS) -o $@ -c $< + +proper : + rm -f *.o + +clean : proper + rm -f $(EXEC) diff --git a/TPs/TP3/CODE/SpMV/main.c b/TPs/TP3/CODE/SpMV/main.c new file mode 100755 index 0000000000000000000000000000000000000000..ada11c4e6a9f7fc8445ae3aa005d3085e1c940a3 --- /dev/null +++ b/TPs/TP3/CODE/SpMV/main.c @@ -0,0 +1,127 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <math.h> + +#include "omp.h" + +#include "CSRMatrix.h" +#include "EllMatrix.h" + +int main(int argc, char** argv) +{ + double *x = NULL, *y = NULL; + int nx = 100, ny = 100, nrows = 0; + int nb_test = 1, i = 0, j = 0; + int check = 1; + double t0 = 0., t1 = 0., duration = 0.; + double norme= 0. ; + CSRMatrix_t* cpu_matrix = NULL; + EllMatrix_t* ell_matrix = NULL; + + if(argc > 1) nx = atoi(argv[1]); + if(argc > 2) ny = atoi(argv[2]); + if(argc > 3) nb_test = atoi(argv[3]); + if(argc > 4) check = atoi(argv[4]); + nrows = nx * ny; + + fprintf(stdout, "NX: %d\tNY: %d\tNTest: %d\n", nx, ny, nb_test); + cpu_matrix = (CSRMatrix_t*) malloc( sizeof(CSRMatrix_t) ); + ell_matrix = (EllMatrix_t*) malloc( sizeof(EllMatrix_t) ); + x = (double*) malloc( nrows * sizeof(double) ); + y = (double*) malloc( nrows * sizeof(double) ); + for(i = 0; i < nrows; ++i) + { + x[i] = 1. * i ; + y[i] = 0. ; + } + + buildLaplacian(cpu_matrix,nx,ny) ; + convert_from_CSR(cpu_matrix, ell_matrix); + + #ifdef DEBUG + print_CSR(cpu_matrix); + #endif + + for(i = 0; i < nrows; ++i) + x[i] = 1. * i ; + + mult_Ell(ell_matrix, x, y); + +#ifndef SEQ + int nb_part = 4; + if(argc > 5) nb_part = atoi(argv[5]); + int* offset = NULL; + offset = (int*) malloc(sizeof(int) * (nb_part + 1)); + offset[0] = 0; + offset[nb_part] = nrows; + int chunk = nrows / nb_part; + + for(i = 1; i < nb_part; ++i) + { + offset[i] = offset[i-1] + chunk; +#ifdef DEBUG + fprintf(stdout, "%d> from %d to %d\n", i-1, offset[i-1], offset[i]); +#endif //DEBUG + } +#ifdef DEBUG + fprintf(stdout, "%d> from %d to %d\n", nb_part-1, offset[nb_part-1], offset[nb_part]); +#endif //DEBUG + +#else //SEQ + fprintf(stdout, "Sequential version...\n"); +#endif //SEQ + + for(i = 0; i < nb_test; ++i) + { + t0 = get_elapsedtime(); + +#ifdef SEQ + mult_CSR(cpu_matrix,x,y) ; +#else +#pragma omp parallel num_threads(nb_part) + { +#pragma omp single nowait + { + for(int k = 0; k < nb_part; ++k) + { +#pragma omp task shared(cpu_matrix, x, y, offset) + { + mult_CSR_task(cpu_matrix, x, y, offset[k], offset[k+1]); + } + } + } + } +#endif + + t1 = get_elapsedtime(); + duration += (t1 - t0); + + norme=0. ; + for(j=0;j<nrows;++j) + norme += y[j]*y[j] ; + norme = sqrt(norme) ; + for(j=0;j<nrows;++j) + x[j] = y[j]/norme ; + } + + if(check) + { + double norme=0. ; + for(i=0;i<nrows;++i) + norme += y[i]*y[i] ; + fprintf(stdout, "NORME Y= %.2f\n",sqrt(norme)) ; + } + + fprintf(stdout, " Time : %f\n", duration); + uint64_t flop_csr = (unsigned long long)(cpu_matrix->m_nnz) * 2; + fprintf(stdout, " MFlops : %.2f\n", flop_csr / (duration/nb_test)*1E-6); + fprintf(stdout, "AvgTime : %f\n", duration/nb_test); + + free(x); + free(y); + destruct_CSR(cpu_matrix); + free(cpu_matrix); + + return 0; +} diff --git a/TPs/TP3/SUJET/tp3.pdf b/TPs/TP3/SUJET/tp3.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6587e0ac588f8db208d0b323ff53e2437f10c6e0 Binary files /dev/null and b/TPs/TP3/SUJET/tp3.pdf differ