diff --git a/TPs/TP1/CODE/Partie1/tp1_check.cu b/TPs/TP1/CODE/Partie1/tp1_check.cu
new file mode 100755
index 0000000000000000000000000000000000000000..2b55dc78199539148db061e497ede0086d50bc96
--- /dev/null
+++ b/TPs/TP1/CODE/Partie1/tp1_check.cu
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include <stdlib.h>
+
+__global__ void kernel(int *v)
+{
+  *v = 1;
+}
+
+int main(int argc, char **argv)
+{
+  int sz_in_bytes = sizeof(int);
+
+  int *h_a;
+  int *d_a;
+
+  int nDevices;
+
+  // Querying the CUDA device properties
+  cudaGetDeviceCount(&nDevices);
+  for (int i = 0; i < nDevices; i++) {
+    cudaDeviceProp prop;
+    cudaGetDeviceProperties(&prop, i);
+    printf("Device Number: %d\n", i);
+    printf("  Device name: %s\n", prop.name);
+    printf("  Memory Clock Rate (KHz): %d\n",
+           prop.memoryClockRate);
+    printf("  Memory Bus Width (bits): %d\n",
+           prop.memoryBusWidth);
+    printf("  Peak Memory Bandwidth (GB/s): %f\n",
+           2.0*prop.memoryClockRate*(prop.memoryBusWidth/8)/1.0e6);
+    printf("  Device Compute Capability: %d.%d\n",
+           prop.major, prop.minor);
+    printf("  > Kernel Configuration information\n");
+    printf("    - Warp Size: %d\n",
+           prop.warpSize);
+    printf("    - Max Threads Per Block: %d\n",
+           prop.maxThreadsPerBlock);
+    printf("    - Max size of each dimension of a Block: (%d, %d, %d)\n",
+           prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
+    printf("    - Max size of each dimension of a Grid: (%d, %d, %d)\n",
+           prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]);
+    printf("  > Memory information\n");
+    printf("    - Total Global Memory size (bytes): %llu\n",
+           prop.totalGlobalMem);
+    printf("    - Max Shared Memory size per block (bytes): %llu\n",
+           prop.sharedMemPerBlock);
+    printf("    - Max Constant Memory size (bytes): %llu\n\n",
+           prop.totalConstMem);
+  }
+
+  // Allocation on host (malloc)
+  h_a = (int*)malloc(sz_in_bytes);
+  *h_a = 0;
+
+  // Allocation on device (cudaMalloc)
+  cudaMalloc((void**)&d_a, sz_in_bytes);
+
+  // Copying data to device (cudaMemcpy)
+  cudaMemcpy(d_a, h_a, sz_in_bytes, cudaMemcpyHostToDevice);
+
+  // Kernel configuration
+  dim3  dimBlock(1, 1, 1);
+  dim3  dimGrid(1, 1, 1);
+
+  // Kernel launch
+  kernel<<<dimGrid , dimBlock>>>(d_a);
+
+  // Retrieving data from device (cudaMemcpy)
+  cudaMemcpy(h_a, d_a, sz_in_bytes, cudaMemcpyDeviceToHost);
+
+  // Freeing on device (cudaFree)
+  cudaFree(d_a);
+
+  // Verifying if  
+  if(*h_a == 1)
+  {
+    fprintf(stderr, "TEST PASSED !\n");
+  }
+  else
+  {
+    fprintf(stderr, "TEST FAILED !\n");
+  }
+
+  free(h_a);
+
+  return 0;
+}
diff --git a/TPs/TP1/CODE/Partie2/Makefile b/TPs/TP1/CODE/Partie2/Makefile
new file mode 100755
index 0000000000000000000000000000000000000000..4ae22712a5d3de26583073d9da7dd646cc80b99c
--- /dev/null
+++ b/TPs/TP1/CODE/Partie2/Makefile
@@ -0,0 +1,20 @@
+CC=gcc
+CFLAGS=-O3 -Wall
+EXE=mult.exe
+
+all : $(EXE)
+
+$(EXE) : dgemm.o
+	$(CC) $(CFLAGS) -o $@ $< 
+
+%.o : %.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+run : mult.exe
+	./$< 1000
+
+proper : 
+	rm -f *.o
+
+clean :
+	rm -f *.o $(EXE)
diff --git a/TPs/TP1/CODE/Partie2/dgemm.c b/TPs/TP1/CODE/Partie2/dgemm.c
new file mode 100755
index 0000000000000000000000000000000000000000..90f051ebb7862ce96116ed4488b3836a2efaf480
--- /dev/null
+++ b/TPs/TP1/CODE/Partie2/dgemm.c
@@ -0,0 +1,104 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <inttypes.h>
+#include <math.h>
+
+#define gettime(t) clock_gettime(CLOCK_MONOTONIC_RAW, t)
+#define get_sub_seconde(t) (1e-9*(double)t.tv_nsec)
+/** return time in second
+*/
+double get_elapsedtime(void)
+{
+  struct timespec st;
+  int err = gettime(&st);
+  if (err !=0) return 0;
+  return (double)st.tv_sec + get_sub_seconde(st);
+}
+
+int verify_matrix(double *matRef, double *matOut, int N) {
+  double diff = 0.0;
+  uint64_t i;
+  uint64_t size = N*N;
+  for (i = 0; i < size; i++) {
+    diff = fabs(matRef[i] - matOut[i]);
+    if (diff > 0.01) {
+      printf("Divergence! Should %5.2f, Is %5.2f (Diff %5.2f) at %d\n",
+             matRef[i], matOut[i], diff, i);
+      return 1;
+    }
+  }
+  return 0;
+}
+
+
+void init(double* A, double* B, double* C, int size)
+{
+  int i = 0, j = 0;
+
+  srand(2019);
+
+  for(i = 0; i < size; i++)
+  {
+    for(j = 0; j < size; j++)
+    {
+      A[i * size + j] = (double) (rand() % 10) + 0.01 * (rand() % 5);
+      B[i * size + j] = (double) (rand() % 10) + 0.01 * (rand() % 5);
+      C[i * size + j] = 0.0;
+    }
+  }
+}
+
+void mult(double* A, double* B, double* C, int size)
+{
+  int i = 0, j = 0, k = 0;
+
+  for(i = 0; i < size; i++)
+  {
+    for(j = 0; j < size; j++)
+    {
+      double sum = 0.;
+      for(k = 0; k < size; k++)
+      {
+        sum += A[i * size + k] * B[k * size + j];
+      }
+      C[i * size + j] = sum;
+    }
+  }
+}
+
+int main(int argc, char** argv){
+  int N = 0;
+
+  double *A = NULL;
+  double *B = NULL;
+  double *C = NULL;
+
+  double t0 = 0., t1 = 0., duration = 0.;
+
+  N = (argc < 2)?1000:atoi(argv[1]);
+  fprintf(stdout, "Matrix Multiplication\n  Size: %dx%d\n", N, N);
+
+  // Memory allocation
+  A = (double*) malloc(sizeof(double) * N * N);
+  B = (double*) malloc(sizeof(double) * N * N);
+  C = (double*) malloc(sizeof(double) * N * N);
+
+  // Value initialization
+  init(A, B, C, N);
+
+  // Compute multiplication
+  t0 = get_elapsedtime();
+  mult(A, B, C, N);
+  t1 = get_elapsedtime();
+
+  // Pretty print
+  duration = (t1 - t0);
+  uint64_t N_u64 = (uint64_t) N;
+  uint64_t nb_op = N_u64 * N_u64 * N_u64;
+  fprintf(stdout, "Performance results: \n");
+  fprintf(stdout, "  Time: %lf s\n", duration);
+  fprintf(stdout, "  MFlops: %.2f\n", (nb_op / duration)*1E-6);
+
+  return 0;
+}
diff --git a/TPs/TP1/DOCS/CUDA_C_Best_Practices_Guide.pdf b/TPs/TP1/DOCS/CUDA_C_Best_Practices_Guide.pdf
new file mode 100755
index 0000000000000000000000000000000000000000..d32a346a4a6814537a7cc1601940479b7b766b1c
Binary files /dev/null and b/TPs/TP1/DOCS/CUDA_C_Best_Practices_Guide.pdf differ
diff --git a/TPs/TP1/DOCS/CUDA_C_Programming_Guide.pdf b/TPs/TP1/DOCS/CUDA_C_Programming_Guide.pdf
new file mode 100755
index 0000000000000000000000000000000000000000..4b66369eefbc60fcabb586843cf67f3d44d98c6a
Binary files /dev/null and b/TPs/TP1/DOCS/CUDA_C_Programming_Guide.pdf differ
diff --git a/TPs/TP1/SUJET/tp1.pdf b/TPs/TP1/SUJET/tp1.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..68e14831f27b7a20e1351e4346c717d606af12ee
Binary files /dev/null and b/TPs/TP1/SUJET/tp1.pdf differ
diff --git a/TPs/TP2/CODE/Makefile b/TPs/TP2/CODE/Makefile
new file mode 100755
index 0000000000000000000000000000000000000000..087df5f56444cbbc05026aa03a653f3c60a18cac
--- /dev/null
+++ b/TPs/TP2/CODE/Makefile
@@ -0,0 +1,21 @@
+CC=gcc
+CFLAGS=-O3 -Wall
+LDFLAGS=-lm
+EXE=tp2.exe
+
+all : $(EXE)
+
+$(EXE) : tp2.o
+	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) 
+
+%.o : %.c
+	$(CC) $(CFLAGS) -c -o $@ $<
+
+run : $(EXE)
+	./$< 10240
+
+proper : 
+	rm -f *.o
+
+clean :
+	rm -f *.o $(EXE)
diff --git a/TPs/TP2/CODE/tp2.c b/TPs/TP2/CODE/tp2.c
new file mode 100755
index 0000000000000000000000000000000000000000..52054e51c1b1364f90d2335a6d2e58276c663ab5
--- /dev/null
+++ b/TPs/TP2/CODE/tp2.c
@@ -0,0 +1,64 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#define NSTREAMS 4
+
+void verif(float *out, int sz)
+{
+  float err = 0.;
+
+  for(int i = 0 ; i < sz ; i++)
+  {
+    err += abs(out[i] - exp( - abs(sin(i * 1.0)) ));
+  }
+
+  if (err/sz < 1.e-4)
+  {
+	  fprintf(stdout, "TEST PASSED (error %3.f < 1.e-4)\n", err/sz);
+  }
+  else
+  {
+	  fprintf(stderr, "TEST FAILED (error %3.f > 1.e-4)\n", err/sz);
+  }
+}
+
+void func(float *out, int size)
+{
+  for(int i = 0; i < size; ++i)
+  {
+    out[i] = exp( - abs(out[i]) );
+  }
+}
+
+int main(int argc, char** argv)
+{
+  int size = 1024;
+  if (argc == 2)
+  {
+	  size = atoi(argv[1]);
+  }
+
+  size *= NSTREAMS;
+
+  float *tab = NULL;
+  tab = (float*) malloc(sizeof(float) * size);
+
+  if(tab == NULL)
+  {
+    fprintf(stderr, "Bad allocation\n");
+    return -1;
+  }
+
+  for(int i = 0; i < size; ++i)
+  {
+    tab[i] = sin(i * 1.);
+  }
+
+  func(tab, size);
+
+  verif(tab, size);
+
+  free(tab);
+  return 0;
+}
diff --git a/TPs/TP2/CODE/tp2.cu b/TPs/TP2/CODE/tp2.cu
new file mode 100755
index 0000000000000000000000000000000000000000..137cfc16847247bc16b5c8ec7f30d6ebb6adaa07
--- /dev/null
+++ b/TPs/TP2/CODE/tp2.cu
@@ -0,0 +1,67 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+#define NSTREAMS 4
+
+void verif(float *out, int sz)
+{
+  float err = 0.;
+
+  for(int i = 0 ; i < sz ; i++)
+  {
+    err += abs(out[i] - exp( - abs(sin(i * 1.0)) ));
+  }
+
+  if (err/sz < 1.e-4)
+  {
+	  fprintf(stdout, "TEST PASSED (error %3.f < 1.e-4)\n", err/sz);
+  }
+  else
+  {
+	  fprintf(stderr, "TEST FAILED (error %3.f > 1.e-4)\n", err/sz);
+  }
+}
+
+void func(float *out, int size)
+{
+  for(int i = 0; i < size; ++i)
+  {
+    out[i] = exp( - abs(out[i]) );
+  }
+}
+
+int main(int argc, char** argv)
+{
+  int size = 1024;
+  if (argc == 2)
+  {
+	  size = atoi(argv[1]);
+  }
+
+  size *= NSTREAMS;
+
+  float *tab = NULL;
+  tab = (float*) malloc(sizeof(float) * size);
+
+  float d_tab;
+  cudaMalloc(
+
+  if(tab == NULL)
+  {
+    fprintf(stderr, "Bad allocation\n");
+    return -1;
+  }
+
+  for(int i = 0; i < size; ++i)
+  {
+    tab[i] = sin(i * 1.);
+  }
+
+  func(tab, size);
+
+  verif(tab, size);
+
+  free(tab);
+  return 0;
+}
diff --git a/TPs/TP2/DOCS/StreamsAndConcurrencyWebinar.pdf b/TPs/TP2/DOCS/StreamsAndConcurrencyWebinar.pdf
new file mode 100755
index 0000000000000000000000000000000000000000..fa2d3b5f6e36b0f6a8ca43db3bc754d7f8de9c0b
Binary files /dev/null and b/TPs/TP2/DOCS/StreamsAndConcurrencyWebinar.pdf differ
diff --git a/TPs/TP2/SUJET/tp2.pdf b/TPs/TP2/SUJET/tp2.pdf
new file mode 100644
index 0000000000000000000000000000000000000000..5a3f0a389860d388f9b37b3cedb1793103510415
Binary files /dev/null and b/TPs/TP2/SUJET/tp2.pdf differ