diff --git a/TPs/TP4/CODE/Partie1/Makefile b/TPs/TP4/CODE/Partie1/Makefile new file mode 100755 index 0000000000000000000000000000000000000000..825a917059b3230f445b055802383eeed569602c --- /dev/null +++ b/TPs/TP4/CODE/Partie1/Makefile @@ -0,0 +1,26 @@ +all: mpi openmp + +CC=nvcc + +MPILIB=-L /lib/x86_64-linux-gnu/openmpi/lib -lmpi +MPIINC=-I /lib/x86_64-linux-gnu/openmpi/include + +OMPLIB=-L /usr/lib/gcc/x86_64-linux-gnu/8/ -lgomp +OMPINC= + + +openmp: openmp_cuda_addvector.pgr + +openmp_%.pgr: openmp_%.cu + $(CC) $< -o $@ $(OMPLIB) $(OMPINC) --compiler-options "-fopenmp" + +mpi: mpi_cuda_addvector.pgr + +mpi_%.pgr: mpi_%.cu + $(CC) $< -o $@ $(MPILIB) $(MPIINC) + + + + +clean: + rm -rf *.pgr diff --git a/TPs/TP4/CODE/Partie1/mpi_cuda_addvector.cu b/TPs/TP4/CODE/Partie1/mpi_cuda_addvector.cu new file mode 100755 index 0000000000000000000000000000000000000000..1f1dbfccd7b898dfa85909bb5c425168e96d099c --- /dev/null +++ b/TPs/TP4/CODE/Partie1/mpi_cuda_addvector.cu @@ -0,0 +1,153 @@ +#include <mpi.h> +#include <cuda.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> + +#define TSIZE 1024 + + + + +__global__ void addVector(int * tab_a, int * tab_b, int * tab_c) +{ + int index; + index = blockIdx.x * blockDim.x + threadIdx.x; + tab_c[index] = tab_a[index] + tab_b[index]; +} + + +int verif_tab(int * tab, int size) +{ + int i, ret = 0; + + for(i=0; i<size; i++) + { + if(tab[i] != size) + { + ret=1; + printf("i=%d ; tab[i]=%d\n", i, tab[i]); + } + } + + return ret; +} + + + +int main(int argc, char*argv[]) +{ + int rank, size, i; + + int *tab_a; + int *tab_b; + int *tab_c; + int *tab_d; + + int *h_a; + int *h_b; + int *h_c; + int *d_a; + int *d_b; + int *d_c; + + MPI_Init(&argc, &argv); + + MPI_Comm_size(MPI_COMM_WORLD, &size); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + + + if(rank==0) + { + tab_a = (int *)malloc(sizeof(int)*TSIZE*size); + tab_b = (int *)malloc(sizeof(int)*TSIZE*size); + tab_c = (int *)malloc(sizeof(int)*TSIZE*size); + tab_d = (int *)malloc(sizeof(int)*TSIZE*size); + + h_a = (int *)malloc(sizeof(int)*TSIZE); + h_b = (int *)malloc(sizeof(int)*TSIZE); + h_c = (int *)malloc(sizeof(int)*TSIZE); + + struct timeval mpi_start; + struct timeval mpi_stop; + struct timeval cuda_start; + struct timeval cuda_stop; + + + for(i=0; i<TSIZE*size; i++) + { + tab_a[i] = i; + tab_b[i] = TSIZE*size-i; + tab_c[i] = 0; + } + gettimeofday(&mpi_start, NULL); + + + MPI_Scatter(tab_a, TSIZE, MPI_INT, h_a, TSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatter(tab_b, TSIZE, MPI_INT, h_b, TSIZE, MPI_INT, 0, MPI_COMM_WORLD); + + + for(i=0; i<TSIZE; i++) h_c[i] = h_a[i] + h_b[i]; + + MPI_Gather(h_c, TSIZE, MPI_INT, tab_c, TSIZE, MPI_INT, 0, MPI_COMM_WORLD); + + gettimeofday(&mpi_stop, NULL); + + int res; + + res = verif_tab(tab_c, TSIZE*size); + + if(res == 0) + { + printf("CPU Compute is OK ; time is %f s\n", (float)(mpi_stop.tv_sec - mpi_start.tv_sec) + ((float)(mpi_stop.tv_usec - mpi_start.tv_usec))/1000000); + } + + cudaMalloc((void**) &d_a, sizeof(int)*TSIZE*size); + cudaMalloc((void**) &d_b, sizeof(int)*TSIZE*size); + cudaMalloc((void**) &d_c, sizeof(int)*TSIZE*size); + + + + gettimeofday(&cuda_start, NULL); + cudaMemcpy(d_a, tab_a, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice); + cudaMemcpy(d_b, tab_b, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice); + + addVector<<<size, TSIZE>>>(d_a, d_b, d_c); + + cudaMemcpy(tab_d, d_c, sizeof(int)*TSIZE*size, cudaMemcpyDeviceToHost); + gettimeofday(&cuda_stop, NULL); + + res = verif_tab(tab_d, TSIZE*size); + + if(res == 0) + { + printf("GPU Compute is OK ; time is %f s\n", (float)(cuda_stop.tv_sec - cuda_start.tv_sec) + ((float)(cuda_stop.tv_usec - cuda_start.tv_usec))/1000000); + } + + cudaFree(d_a); + cudaFree(d_b); + cudaFree(d_c); + + } + else + { + h_a = (int *)malloc(sizeof(int)*TSIZE); + h_b = (int *)malloc(sizeof(int)*TSIZE); + h_c = (int *)malloc(sizeof(int)*TSIZE); + + MPI_Scatter(tab_a, TSIZE, MPI_INT, h_a, TSIZE, MPI_INT, 0, MPI_COMM_WORLD); + MPI_Scatter(tab_b, TSIZE, MPI_INT, h_b, TSIZE, MPI_INT, 0, MPI_COMM_WORLD); + + + for(i=0; i<TSIZE; i++) h_c[i] = h_a[i] + h_b[i]; + + MPI_Gather(h_c, TSIZE, MPI_INT, tab_c, TSIZE, MPI_INT, 0, MPI_COMM_WORLD); + + } + + + + MPI_Finalize(); + + return 0; +} diff --git a/TPs/TP4/CODE/Partie1/openmp_cuda_addvector.cu b/TPs/TP4/CODE/Partie1/openmp_cuda_addvector.cu new file mode 100755 index 0000000000000000000000000000000000000000..332777d23f06bfa634461ee3a667a27b6b8b777e --- /dev/null +++ b/TPs/TP4/CODE/Partie1/openmp_cuda_addvector.cu @@ -0,0 +1,125 @@ +#include <omp.h> +#include <cuda.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> + +#define TSIZE 1024 + + + + +__global__ void addVector(int * tab_a, int * tab_b, int * tab_c) +{ + int index; + + index = blockIdx.x * blockDim.x + threadIdx.x; + + tab_c[index] = tab_a[index] + tab_b[index]; +} + + +int verif_tab(int * tab, int size) +{ + int i, ret = 0; + + for(i=0; i<size; i++) + { + if(tab[i] != size) + { + ret=1; + printf("i=%d ; tab[i]=%d\n", i, tab[i]); + } + } + + return ret; +} + + + +int main(int argc, char*argv[]) +{ + int size, i; + + int *tab_a; + int *tab_b; + int *tab_c; + int *tab_d; + + int *d_a; + int *d_b; + int *d_c; + + +#pragma omp parallel +{ + size=omp_get_thread_num(); +} + + tab_a = (int *)malloc(sizeof(int)*TSIZE*size); + tab_b = (int *)malloc(sizeof(int)*TSIZE*size); + tab_c = (int *)malloc(sizeof(int)*TSIZE*size); + tab_d = (int *)malloc(sizeof(int)*TSIZE*size); +/* + h_a = (int *)malloc(sizeof(int)*TSIZE); + h_b = (int *)malloc(sizeof(int)*TSIZE); + h_c = (int *)malloc(sizeof(int)*TSIZE); +*/ + struct timeval mpi_start; + struct timeval mpi_stop; + struct timeval cuda_start; + struct timeval cuda_stop; + + + for(i=0; i<TSIZE*size; i++) + { + tab_a[i] = i; + tab_b[i] = TSIZE*size-i; + tab_c[i] = 0; + } + gettimeofday(&mpi_start, NULL); + +#pragma omp parallel for + for(i=0; i<TSIZE*size; i++) tab_c[i] = tab_a[i] + tab_b[i]; + + gettimeofday(&mpi_stop, NULL); + + int res; + + res = verif_tab(tab_c, TSIZE*size); + + if(res == 0) + { + printf("CPU Compute is OK ; time is %f s\n", (float)(mpi_stop.tv_sec - mpi_start.tv_sec) + ((float)(mpi_stop.tv_usec - mpi_start.tv_usec))/1000000); + } + + + + cudaMalloc((void**) &d_a, sizeof(int)*TSIZE*size); + cudaMalloc((void**) &d_b, sizeof(int)*TSIZE*size); + cudaMalloc((void**) &d_c, sizeof(int)*TSIZE*size); + + + + gettimeofday(&cuda_start, NULL); + cudaMemcpy(d_a, tab_a, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice); + cudaMemcpy(d_b, tab_b, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice); + + addVector<<<size, TSIZE>>>(d_a, d_b, d_c); + + cudaMemcpy(tab_d, d_c, sizeof(int)*TSIZE*size, cudaMemcpyDeviceToHost); + gettimeofday(&cuda_stop, NULL); + + res = verif_tab(tab_d, TSIZE*size); + + if(res == 0) + { + printf("GPU Compute is OK ; time is %f s\n", (float)(cuda_stop.tv_sec - cuda_start.tv_sec) + ((float)(cuda_stop.tv_usec - cuda_start.tv_usec))/1000000); + } + + cudaFree(d_a); + cudaFree(d_b); + cudaFree(d_c); + + return 0; +} diff --git a/TPs/TP4/SUJET/tp4.pdf b/TPs/TP4/SUJET/tp4.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f6e8bdb5bb3e0cb9d7656385b03f77378bf4a2aa Binary files /dev/null and b/TPs/TP4/SUJET/tp4.pdf differ