diff --git a/TDs/TD2/CODE/Makefile b/TDs/TD2/CODE/Makefile
index e564ae5268bea89dddfd42f283a6abc8993459ba..71214f90b3caa5cded767965b5692600ceee9821 100755
--- a/TDs/TD2/CODE/Makefile
+++ b/TDs/TD2/CODE/Makefile
@@ -1,10 +1,13 @@
 CC=gcc
+NVCC=nvcc
 CFLAGS_OMP=-O3 -Wall -fopenmp -DUSE_OMP
 CFLAGS=-O3 -Wall
+NVFLAGS=
 EXE1=pi.exe
 EXE2=pi_omp.exe
+EXE3=pi_cuda.exe
 
-all : $(EXE1) $(EXE2)
+all : $(EXE1) $(EXE2) $(EXE3)
 
 $(EXE1) : pi.o
 	$(CC) $(CFLAGS) -o $@ $<
@@ -12,14 +15,20 @@ $(EXE1) : pi.o
 $(EXE2) : pi_omp.o
 	$(CC) $(CFLAGS_OMP) -o $@ $<
 
+$(EXE3) : pi_cuda.o
+	$(NVCC) $(NVFLAGS) -o $@ $<
+
 %_omp.o : %_omp.c
 	$(CC) $(CFLAGS_OMP) -c -o $@ $<
 
+%_cuda.o : %_cuda.cu
+	$(NVCC) $(NVFLAGS) -c -o $@ $<
+
 %.o : %.c
 	$(CC) $(CFLAGS) -c -o $@ $<
 
 clean :
-	rm -f *.o $(EXE1) $(EXE2)
+	rm -f *.o $(EXE1) $(EXE2) $(EXE3)
 
 proper :
 	rm -f *.o
diff --git a/TDs/TD2/CODE/helper_cuda.h b/TDs/TD2/CODE/helper_cuda.h
new file mode 100644
index 0000000000000000000000000000000000000000..3dd446dced85de9d10eb9f1597c56099fa358bdb
--- /dev/null
+++ b/TDs/TD2/CODE/helper_cuda.h
@@ -0,0 +1,183 @@
+/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *  * Neither the name of NVIDIA CORPORATION nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+////////////////////////////////////////////////////////////////////////////////
+// These are CUDA Helper functions for initialization and error checking
+
+#ifndef COMMON_HELPER_CUDA_H_
+#define COMMON_HELPER_CUDA_H_
+
+#pragma once
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifndef MAX
+#define MAX(a, b) (a > b ? a : b)
+#endif
+
+#ifndef MIN
+#define MIN(a, b) (a < b ? a : b)
+#endif
+
+#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
+#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__)
+
+void check(cudaError_t result, char const *const func, const char *const file,
+           int const line)
+{
+  if (result)
+  {
+    fprintf(stderr, "CUDA error at %s:%d code=%d (%s) \"%s\" \n", file, line, (int)result, cudaGetErrorName(result), func);
+    exit(EXIT_FAILURE);
+  }
+}
+
+inline void __getLastCudaError(const char *errorMessage, const char *file,
+                               const int line)
+{
+  cudaError_t err = cudaGetLastError();
+
+  if (cudaSuccess != err)
+  {
+    fprintf(stderr,
+            "%s(%i) : getLastCudaError() CUDA error :"
+            " %s : (%d) %s.\n",
+            file, line, errorMessage, (int)(err),
+            cudaGetErrorString(err));
+    exit(EXIT_FAILURE);
+  }
+}
+
+// Beginning of GPU Architecture definitions
+int _ConvertSMVer2Cores(int major, int minor)
+{
+  // Defines for GPU Architecture types (using the SM version to determine
+  // the # of cores per SM
+  typedef struct
+  {
+    int SM; // 0xMm (hexidecimal notation), M = SM Major version,
+    // and m = SM minor version
+    int Cores;
+  } sSMtoCores;
+
+  sSMtoCores nGpuArchCoresPerSM[] = {
+      {0x30, 192},
+      {0x32, 192},
+      {0x35, 192},
+      {0x37, 192},
+      {0x50, 128},
+      {0x52, 128},
+      {0x53, 128},
+      {0x60, 64},
+      {0x61, 128},
+      {0x62, 128},
+      {0x70, 64},
+      {0x72, 64},
+      {0x75, 64},
+      {0x80, 64},
+      {0x86, 128},
+      {0x87, 128},
+      {-1, -1}};
+
+  int index = 0;
+
+  while (nGpuArchCoresPerSM[index].SM != -1)
+  {
+    if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
+    {
+      return nGpuArchCoresPerSM[index].Cores;
+    }
+
+    index++;
+  }
+
+  // If we don't find the values, we default use the previous one
+  // to run properly
+  printf(
+      "MapSMtoCores for SM %d.%d is undefined."
+      "  Default to use %d Cores/SM\n",
+      major, minor, nGpuArchCoresPerSM[index - 1].Cores);
+  return nGpuArchCoresPerSM[index - 1].Cores;
+}
+
+inline const char *_ConvertSMVer2ArchName(int major, int minor)
+{
+  // Defines for GPU Architecture types (using the SM version to determine
+  // the GPU Arch name)
+  typedef struct
+  {
+    int SM; // 0xMm (hexidecimal notation), M = SM Major version,
+    // and m = SM minor version
+    const char *name;
+  } sSMtoArchName;
+
+  sSMtoArchName nGpuArchNameSM[] = {
+      {0x30, "Kepler"},
+      {0x32, "Kepler"},
+      {0x35, "Kepler"},
+      {0x37, "Kepler"},
+      {0x50, "Maxwell"},
+      {0x52, "Maxwell"},
+      {0x53, "Maxwell"},
+      {0x60, "Pascal"},
+      {0x61, "Pascal"},
+      {0x62, "Pascal"},
+      {0x70, "Volta"},
+      {0x72, "Xavier"},
+      {0x75, "Turing"},
+      {0x80, "Ampere"},
+      {0x86, "Ampere"},
+      {-1, "Graphics Device"}};
+
+  int index = 0;
+
+  while (nGpuArchNameSM[index].SM != -1)
+  {
+    if (nGpuArchNameSM[index].SM == ((major << 4) + minor))
+    {
+      return nGpuArchNameSM[index].name;
+    }
+
+    index++;
+  }
+
+  // If we don't find the values, we default use the previous one
+  // to run properly
+  printf(
+      "MapSMtoArchName for SM %d.%d is undefined."
+      "  Default to use %s\n",
+      major, minor, nGpuArchNameSM[index - 1].name);
+  return nGpuArchNameSM[index - 1].name;
+}
+// end of GPU Architecture definitions
+
+// end of CUDA Helper Functions
+
+#endif // COMMON_HELPER_CUDA_H_
\ No newline at end of file
diff --git a/TDs/TD2/CODE/pi.c b/TDs/TD2/CODE/pi.c
index 4d0d7e23180eae12cca0fe6a2ba5cdcd94e1632f..5d4db5ca7464a5036518bf005de907ba57ad2ea6 100755
--- a/TDs/TD2/CODE/pi.c
+++ b/TDs/TD2/CODE/pi.c
@@ -2,20 +2,29 @@
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <time.h>
 
 int main(int argc, char** argv) {
-    uint64_t n_test = 10E7;
-    uint64_t i;
-    uint64_t count = 0;
-    double x = 0., y = 0.;
-    double pi = 0.;
+	uint64_t n_test = 10E7;
+	uint64_t i;
+	uint64_t count = 0;
+	double x = 0., y = 0.;
+	double pi = 0.;
 
-    // TODO: inialisation du generateur de nombres pseudo aleatoires.
-    // TODO: tirageS de flechettes
+	// inialisation du generateur de nombres pseudo aleatoires.
+	srand(time(NULL));
 
-    fprintf(stdout, "%ld of %ld throws are in the circle !\n", count, n_test);
-    // TODO: estimation de Pi
-    fprintf(stdout, "Pi ~= %lf\n", pi);
+	// tirageS de flechettes
+	for(i = 0; i < n_test; ++i){
+		x = (double) rand() / RAND_MAX;
+		y = (double) rand() / RAND_MAX;
+		count += (x * x + y * y < 1.);
+	}
 
-    return 0;
+	fprintf(stdout, "%ld of %ld throws are in the circle !\n", count, n_test);
+	// estimation de Pi
+	pi = (count / ((double) n_test)) * 4.;
+	fprintf(stdout, "Pi ~= %lf\n", pi);
+
+	return 0;
 }
diff --git a/TDs/TD2/CODE/pi_cuda.cu b/TDs/TD2/CODE/pi_cuda.cu
new file mode 100644
index 0000000000000000000000000000000000000000..b3047865d569b9f024663430c945596453b2c234
--- /dev/null
+++ b/TDs/TD2/CODE/pi_cuda.cu
@@ -0,0 +1,78 @@
+#include <inttypes.h>
+#include <math.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+
+#include <curand_kernel.h>
+
+#include "helper_cuda.h"
+
+#define N_TEST 10E7
+#define GRID_SIZE_X 512
+#define BLOCK_SIZE_X 256
+#define TAB_SIZE (GRID_SIZE_X * BLOCK_SIZE_X)
+#define TRIALS_PER_THREAD (N_TEST / TAB_SIZE)
+
+__global__ void pi_kernel(double *d_pi){
+	const int tid = blockIdx.x * blockDim.x + threadIdx.x;
+
+	double n_test = 10E7;
+	int grid_size_x = 512;
+	int block_size_x = 256;
+	int tab_size = grid_size_x * block_size_x;
+	int trials_per_thread = (int) n_test / tab_size;
+
+	curandState localState;
+	curand_init(0, tid, 0, &localState);
+	int count;
+	float x, y;
+	for(size_t i = 0; i < trials_per_thread; ++i){
+		x = curand_uniform(&localState);
+		y = curand_uniform(&localState);
+		count += x * x + y * y < 1;
+	}
+	d_pi[tid] = 4. * (double) count / (double) trials_per_thread;
+}
+
+int main(int argc, char** argv) {
+	double n_test = 10E7;
+	int grid_size_x = 512;
+	int block_size_x = 256;
+	int tab_size = grid_size_x * block_size_x;
+	int trials_per_thread = n_test / tab_size;
+
+	uint64_t i;
+	double pi = 0.;
+
+	int sz_in_bytes = sizeof(double) * tab_size;
+
+	double *h_pi;
+	double *d_pi;
+
+	h_pi = (double *) malloc(sz_in_bytes);
+
+	checkCudaErrors(cudaMalloc((void **) &d_pi, sz_in_bytes));
+	checkCudaErrors(cudaMemset(d_pi, 0, sz_in_bytes));
+
+	dim3 dimBlock(block_size_x, 1, 1);
+	dim3 dimGrid(grid_size_x, 1, 1);
+	pi_kernel<<<dimGrid, dimBlock>>>(d_pi);
+	cudaDeviceSynchronize();
+	getLastCudaError("PI kernel failed");
+
+    checkCudaErrors(cudaMemcpy(h_pi, d_pi, sz_in_bytes, cudaMemcpyDeviceToHost));
+
+	fprintf(stdout, "Pi ~= %lf\n", h_pi[0]);
+
+	for(i = 0; i < tab_size; ++i)
+		pi += h_pi[i];
+	pi /= (double) tab_size;
+
+	fprintf(stdout, "Pi ~= %lf\n", pi);
+
+	checkCudaErrors(cudaFree(d_pi));
+	free(h_pi);
+
+	return 0;
+}
diff --git a/TDs/TD2/CODE/pi_cuda.out b/TDs/TD2/CODE/pi_cuda.out
new file mode 100644
index 0000000000000000000000000000000000000000..2a6648b3926c0dd99c95eb455c4258366ff6e5cc
--- /dev/null
+++ b/TDs/TD2/CODE/pi_cuda.out
@@ -0,0 +1,14 @@
+❯ make
+nvcc  -c -o pi_cuda.o pi_cuda.cu
+nvcc  -o pi_cuda.exe pi_cuda.o
+❯ ./pi_cuda.exe
+test
+Pi ~= 3.128609
+Pi ~= 3.141749
+❯ make
+nvcc  -c -o pi_cuda.o pi_cuda.cu
+nvcc  -o pi_cuda.exe pi_cuda.o
+❯ ./pi_cuda.exe
+Pi ~= 6109520.860892
+Pi ~= -7959034.788585
+
diff --git a/TDs/TD2/CODE/pi_omp.c b/TDs/TD2/CODE/pi_omp.c
index 39fe0cc8cbf8f430db982d6f5ab4c2d208bd8616..7a8ef808f984ccc41a798d29387fcafa6f1c0b7c 100755
--- a/TDs/TD2/CODE/pi_omp.c
+++ b/TDs/TD2/CODE/pi_omp.c
@@ -7,48 +7,62 @@
 
 #include "omp.h"
 
-#define TRIALS_PER_THREAD 10E10
-
 #define gettime(t) clock_gettime(CLOCK_MONOTONIC_RAW, t)
 #define get_sub_seconde(t) (1e-9 * (double)t.tv_nsec)
 /** return time in second
  */
 double get_elapsedtime(void) {
-    struct timespec st;
-    int err = gettime(&st);
-    if (err != 0) return 0;
-    return (double)st.tv_sec + get_sub_seconde(st);
+	struct timespec st;
+	int err = gettime(&st);
+	if (err != 0) return 0;
+	return (double) st.tv_sec + get_sub_seconde(st);
 }
 
 int main(int argc, char** argv) {
-    uint64_t const n_test = TRIALS_PER_THREAD;
-    uint64_t i;
-    double x = 0., y = 0.;
-    double pi = 0.;
-    double t0 = 0., t1 = 0., duration = 0.;
+	uint64_t const n_test = 10E7;
+	uint64_t i = 0;
+	double x = 0., y = 0.;
+	double pi = 0.;
+	double t0 = 0., t1 = 0., duration = 0.;
+
+	int nb_threads = 0;
+	uint64_t count = 0;
+
+	t0 = get_elapsedtime();
+
+	#pragma omp parallel firstprivate(i, x, y) reduction(+:count)
+	{
+		#pragma omp master
+		{
+			nb_threads = omp_get_num_threads();
+			fprintf(stdout, "Nb threads: %d\n", nb_threads);
+		}
+
+		size_t thread_num = omp_get_thread_num();
 
-    int nb_threads = 0;
-#pragma omp parallel shared(nb_threads)
-#pragma omp master
-    nb_threads = omp_get_num_threads();
-    fprintf(stdout, "Nb threads: %d\n", nb_threads);
+		// initialisation du generateur de nombre pseudo aléatoires
+		unsigned int seed = time(NULL) ^ thread_num;
 
-    // TODO: initialisation du tableau stockant le résultat de chaque thread
-    uint64_t* result = (uint64_t*)malloc(sizeof(uint64_t) * nb_threads);
+		// tirageS de flechettes
+		#pragma omp for
+		for(i = 0; i < n_test; ++i){
+			x = (double) rand_r(&seed) / RAND_MAX;
+			y = (double) rand_r(&seed) / RAND_MAX;
+			count += (x * x + y * y < 1.);
+		}
+	}
 
-    // TODO: initialisation du generateur de nombre pseudo aléatoires
+	t1 = get_elapsedtime();
 
-    t0 = get_elapsedtime();
+	duration = (t1 - t0);
 
-    // TODO: tirageS de flechettes
+	fprintf(stdout,
+		"%ld of %ld throws are in the circle ! (Time: %lf s)\n",
+		count, n_test, duration);
 
-    // TODO: reduction des resultats
-    t1 = get_elapsedtime();
-    duration = (t1 - t0);
-    fprintf(stdout, "%ld of %ld throws are in the circle ! (Time: %lf s)\n",
-            (uint64_t)pi, n_test, duration);
-    // TODO: estimation de Pi
-    fprintf(stdout, "Pi ~= %lf\n", pi);
+	// estimation de Pi
+	pi = (count / ((double) n_test)) * 4.;
+	fprintf(stdout, "Pi ~= %lf\n", pi);
 
-    return 0;
+	return 0;
 }