diff --git a/TPs/TP4/CODE/Partie1/Makefile b/TPs/TP4/CODE/Partie1/Makefile
new file mode 100755
index 0000000000000000000000000000000000000000..825a917059b3230f445b055802383eeed569602c
--- /dev/null
+++ b/TPs/TP4/CODE/Partie1/Makefile
@@ -0,0 +1,26 @@
+all: mpi openmp
+
+CC=nvcc
+
+MPILIB=-L /lib/x86_64-linux-gnu/openmpi/lib -lmpi
+MPIINC=-I /lib/x86_64-linux-gnu/openmpi/include
+
+OMPLIB=-L /usr/lib/gcc/x86_64-linux-gnu/8/ -lgomp
+OMPINC=
+
+
+openmp: openmp_cuda_addvector.pgr
+
+openmp_%.pgr: openmp_%.cu
+	$(CC) $< -o $@ $(OMPLIB) $(OMPINC) --compiler-options "-fopenmp"
+
+mpi: mpi_cuda_addvector.pgr
+
+mpi_%.pgr: mpi_%.cu
+	$(CC) $< -o $@ $(MPILIB) $(MPIINC)
+
+
+
+
+clean:
+	rm -rf *.pgr
diff --git a/TPs/TP4/CODE/Partie1/mpi_cuda_addvector.cu b/TPs/TP4/CODE/Partie1/mpi_cuda_addvector.cu
new file mode 100755
index 0000000000000000000000000000000000000000..1f1dbfccd7b898dfa85909bb5c425168e96d099c
--- /dev/null
+++ b/TPs/TP4/CODE/Partie1/mpi_cuda_addvector.cu
@@ -0,0 +1,153 @@
+#include <mpi.h>
+#include <cuda.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+#define TSIZE 1024
+
+
+
+
+__global__ void addVector(int * tab_a, int * tab_b, int * tab_c)
+{
+	int index;
+	index = blockIdx.x * blockDim.x + threadIdx.x;
+	tab_c[index] = tab_a[index] + tab_b[index];
+}
+
+
+int verif_tab(int * tab, int size)
+{
+	int i, ret = 0;
+
+	for(i=0; i<size; i++)
+	{
+		if(tab[i] != size)
+		{
+			 ret=1;
+			printf("i=%d ; tab[i]=%d\n", i, tab[i]);
+		}
+	}
+
+	return ret;
+}
+
+
+
+int main(int argc, char*argv[])
+{
+	int rank, size, i;
+
+	int *tab_a;
+	int *tab_b;
+	int *tab_c;
+	int *tab_d;
+
+	int *h_a;
+	int *h_b;
+	int *h_c;
+	int *d_a;
+	int *d_b;
+	int *d_c;
+
+	MPI_Init(&argc, &argv);
+
+	MPI_Comm_size(MPI_COMM_WORLD, &size);
+	MPI_Comm_rank(MPI_COMM_WORLD, &rank);
+
+
+	if(rank==0)
+	{
+		tab_a = (int *)malloc(sizeof(int)*TSIZE*size);
+		tab_b = (int *)malloc(sizeof(int)*TSIZE*size);
+		tab_c = (int *)malloc(sizeof(int)*TSIZE*size);
+		tab_d = (int *)malloc(sizeof(int)*TSIZE*size);
+
+		h_a = (int *)malloc(sizeof(int)*TSIZE);
+		h_b = (int *)malloc(sizeof(int)*TSIZE);
+		h_c = (int *)malloc(sizeof(int)*TSIZE);
+
+		struct timeval mpi_start;
+		struct timeval mpi_stop;
+		struct timeval cuda_start;
+		struct timeval cuda_stop;
+
+
+		for(i=0; i<TSIZE*size; i++)
+		{
+			tab_a[i] = i;
+			tab_b[i] = TSIZE*size-i;
+			tab_c[i] = 0;
+		}
+		gettimeofday(&mpi_start, NULL);
+
+
+		MPI_Scatter(tab_a, TSIZE, MPI_INT, h_a, TSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+		MPI_Scatter(tab_b, TSIZE, MPI_INT, h_b, TSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+
+
+		for(i=0; i<TSIZE; i++) h_c[i] = h_a[i] + h_b[i];
+
+		MPI_Gather(h_c, TSIZE, MPI_INT, tab_c, TSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+
+		gettimeofday(&mpi_stop, NULL);
+
+		int res;
+
+		res = verif_tab(tab_c, TSIZE*size);
+
+		if(res == 0)
+		{
+			printf("CPU Compute is OK ; time is %f s\n", (float)(mpi_stop.tv_sec - mpi_start.tv_sec) + ((float)(mpi_stop.tv_usec - mpi_start.tv_usec))/1000000);
+		}
+
+		cudaMalloc((void**) &d_a, sizeof(int)*TSIZE*size);
+		cudaMalloc((void**) &d_b, sizeof(int)*TSIZE*size);
+		cudaMalloc((void**) &d_c, sizeof(int)*TSIZE*size);
+
+
+
+		gettimeofday(&cuda_start, NULL);
+		cudaMemcpy(d_a, tab_a, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice);
+		cudaMemcpy(d_b, tab_b, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice);
+
+		addVector<<<size, TSIZE>>>(d_a, d_b, d_c);
+
+		cudaMemcpy(tab_d, d_c, sizeof(int)*TSIZE*size, cudaMemcpyDeviceToHost);
+		gettimeofday(&cuda_stop, NULL);
+
+		res = verif_tab(tab_d, TSIZE*size);
+
+		if(res == 0)
+		{
+			printf("GPU Compute is OK ; time is %f s\n", (float)(cuda_stop.tv_sec - cuda_start.tv_sec) + ((float)(cuda_stop.tv_usec - cuda_start.tv_usec))/1000000);
+		}
+
+		cudaFree(d_a);
+		cudaFree(d_b);
+		cudaFree(d_c);
+
+	}
+	else
+	{
+		h_a = (int *)malloc(sizeof(int)*TSIZE);
+		h_b = (int *)malloc(sizeof(int)*TSIZE);
+		h_c = (int *)malloc(sizeof(int)*TSIZE);
+
+		MPI_Scatter(tab_a, TSIZE, MPI_INT, h_a, TSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+		MPI_Scatter(tab_b, TSIZE, MPI_INT, h_b, TSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+
+
+		for(i=0; i<TSIZE; i++) h_c[i] = h_a[i] + h_b[i];
+
+		MPI_Gather(h_c, TSIZE, MPI_INT, tab_c, TSIZE, MPI_INT, 0, MPI_COMM_WORLD);
+
+	}
+
+
+
+	MPI_Finalize();
+
+	return 0;
+}
diff --git a/TPs/TP4/CODE/Partie1/openmp_cuda_addvector.cu b/TPs/TP4/CODE/Partie1/openmp_cuda_addvector.cu
new file mode 100755
index 0000000000000000000000000000000000000000..332777d23f06bfa634461ee3a667a27b6b8b777e
--- /dev/null
+++ b/TPs/TP4/CODE/Partie1/openmp_cuda_addvector.cu
@@ -0,0 +1,125 @@
+#include <omp.h>
+#include <cuda.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/time.h>
+
+#define TSIZE 1024
+
+
+
+
+__global__ void addVector(int * tab_a, int * tab_b, int * tab_c)
+{
+	int index;
+
+	index = blockIdx.x * blockDim.x + threadIdx.x;
+
+	tab_c[index] = tab_a[index] + tab_b[index];
+}
+
+
+int verif_tab(int * tab, int size)
+{
+	int i, ret = 0;
+
+	for(i=0; i<size; i++)
+	{
+		if(tab[i] != size)
+		{
+			 ret=1;
+			printf("i=%d ; tab[i]=%d\n", i, tab[i]);
+		}
+	}
+
+	return ret;
+}
+
+
+
+int main(int argc, char*argv[])
+{
+	int size, i;
+
+	int *tab_a;
+	int *tab_b;
+	int *tab_c;
+	int *tab_d;
+
+	int *d_a;
+	int *d_b;
+	int *d_c;
+
+
+#pragma omp parallel
+{
+	size=omp_get_thread_num();
+}
+
+	tab_a = (int *)malloc(sizeof(int)*TSIZE*size);
+	tab_b = (int *)malloc(sizeof(int)*TSIZE*size);
+	tab_c = (int *)malloc(sizeof(int)*TSIZE*size);
+	tab_d = (int *)malloc(sizeof(int)*TSIZE*size);
+/*
+	h_a = (int *)malloc(sizeof(int)*TSIZE);
+	h_b = (int *)malloc(sizeof(int)*TSIZE);
+	h_c = (int *)malloc(sizeof(int)*TSIZE);
+*/
+	struct timeval mpi_start;
+	struct timeval mpi_stop;
+	struct timeval cuda_start;
+	struct timeval cuda_stop;
+
+
+	for(i=0; i<TSIZE*size; i++)
+	{
+		tab_a[i] = i;
+		tab_b[i] = TSIZE*size-i;
+		tab_c[i] = 0;
+	}
+	gettimeofday(&mpi_start, NULL);
+
+#pragma omp parallel for
+	for(i=0; i<TSIZE*size; i++) tab_c[i] = tab_a[i] + tab_b[i];
+
+	gettimeofday(&mpi_stop, NULL);
+
+	int res;
+
+	res = verif_tab(tab_c, TSIZE*size);
+
+	if(res == 0)
+	{
+		printf("CPU Compute is OK ; time is %f s\n", (float)(mpi_stop.tv_sec - mpi_start.tv_sec) + ((float)(mpi_stop.tv_usec - mpi_start.tv_usec))/1000000);
+	}
+
+
+
+	cudaMalloc((void**) &d_a, sizeof(int)*TSIZE*size);
+	cudaMalloc((void**) &d_b, sizeof(int)*TSIZE*size);
+	cudaMalloc((void**) &d_c, sizeof(int)*TSIZE*size);
+
+
+
+	gettimeofday(&cuda_start, NULL);
+	cudaMemcpy(d_a, tab_a, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice);
+	cudaMemcpy(d_b, tab_b, sizeof(int)*TSIZE*size, cudaMemcpyHostToDevice);
+
+	addVector<<<size, TSIZE>>>(d_a, d_b, d_c);
+
+	cudaMemcpy(tab_d, d_c, sizeof(int)*TSIZE*size, cudaMemcpyDeviceToHost);
+	gettimeofday(&cuda_stop, NULL);
+
+	res = verif_tab(tab_d, TSIZE*size);
+
+	if(res == 0)
+	{
+		printf("GPU Compute is OK ; time is %f s\n", (float)(cuda_stop.tv_sec - cuda_start.tv_sec) + ((float)(cuda_stop.tv_usec - cuda_start.tv_usec))/1000000);
+	}
+
+	cudaFree(d_a);
+	cudaFree(d_b);
+	cudaFree(d_c);
+
+	return 0;
+}
diff --git a/TPs/TP4/SUJET/tp4.pdf b/TPs/TP4/SUJET/tp4.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..f6e8bdb5bb3e0cb9d7656385b03f77378bf4a2aa
Binary files /dev/null and b/TPs/TP4/SUJET/tp4.pdf differ