diff --git a/TDs/TD1/CODE/Partie2/td1_cpu.c b/TDs/TD1/CODE/Partie2/td1_cpu.c new file mode 100644 index 0000000000000000000000000000000000000000..cf6bca2270250cd42075006d4139f9e06764d104 --- /dev/null +++ b/TDs/TD1/CODE/Partie2/td1_cpu.c @@ -0,0 +1,76 @@ +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <string.h> + +void kernel(double *a, double *b, double *c, int N, + int blockDim_x, int blockIdx_x, int threadIdx_x) +{ + int i = blockIdx_x * blockDim_x + threadIdx_x; + + if (i < N) + { + c[i] = a[i] + b[i]; + } +} + +int main(int argc, char **argv) +{ + int N = 1000; + int sz_in_bytes = N*sizeof(double); + + double *h_a, *h_b, *h_c; + double *d_a, *d_b, *d_c; + + h_a = (double*)malloc(sz_in_bytes); + h_b = (double*)malloc(sz_in_bytes); + h_c = (double*)malloc(sz_in_bytes); + + // Initiate values on h_a and h_b + for(int i = 0 ; i < N ; i++) + { + h_a[i] = 1./(1.+i); + h_b[i] = (i-1.)/(i+1.); + } + + // 3-arrays allocation on device + d_a = (double *)malloc(sz_in_bytes); + d_b = (double *)malloc(sz_in_bytes); + d_c = (double *)malloc(sz_in_bytes); + + // copy on device values pointed on host by h_a and h_b + // (the new values are pointed by d_a et d_b on device) + memcpy(d_a, h_a, sz_in_bytes); + memcpy(d_b, h_b, sz_in_bytes); + +// dim3 dimBlock(64, 1, 1); +// dim3 dimGrid((N + dimBlock.x - 1)/dimBlock.x, 1, 1); +// kernel<<<dimGrid , dimBlock>>>(d_a, d_b, d_c, N); + int blockDim_x = 64; + int gridDim_x = (N + blockDim_x - 1) / blockDim_x; + for(int bi = 0; bi < gridDim_x; bi++){ + for(int ti = 0; ti < blockDim_x; ti++){ + kernel(d_a, d_b, d_c, N, blockDim_x, bi, ti); + } + } + + // Result is pointed by d_c on device + // Copy this result on host (result pointed by h_c on host) + memcpy(h_c, d_c, sz_in_bytes); + + // freeing on device + free(d_a); + free(d_b); + free(d_c); + + for(int i = 0; i < N; ++i){ + printf("%g/", h_c[i]); + } + printf("\n"); + + free(h_a); + free(h_b); + free(h_c); + + return 0; +} diff --git a/TDs/TD1/CODE/Partie2/tp1.cu b/TDs/TD1/CODE/Partie2/td1_gpu.c similarity index 93% rename from TDs/TD1/CODE/Partie2/tp1.cu rename to TDs/TD1/CODE/Partie2/td1_gpu.c index 004cef598a86b1ec0bf98a022e500960d1a3f309..71e3914dc95fabd206a0b96a8ab649ffcb33a498 100644 --- a/TDs/TD1/CODE/Partie2/tp1.cu +++ b/TDs/TD1/CODE/Partie2/td1_gpu.c @@ -8,7 +8,7 @@ __global__ void kernel(double *a, double *b, double *c, int N) if (i < N) { - c[i] = a[i] + b[i]; + c[i] = a[i] + b[i]; } } @@ -27,8 +27,8 @@ int main(int argc, char **argv) // Initiate values on h_a and h_b for(int i = 0 ; i < N ; i++) { - h_a[i] = 1./(1.+i); - h_b[i] = (i-1.)/(i+1.); + h_a[i] = 1./(1.+i); + h_b[i] = (i-1.)/(i+1.); } // 3-arrays allocation on device @@ -54,6 +54,7 @@ int main(int argc, char **argv) cudaFree(d_b); cudaFree(d_c); + free(h_a); free(h_b); free(h_c); diff --git a/TDs/TD1/td1.md b/TDs/TD1/td1.md new file mode 100644 index 0000000000000000000000000000000000000000..ab98dc5a9fa68bf8635833a216257de861a8e019 --- /dev/null +++ b/TDs/TD1/td1.md @@ -0,0 +1,43 @@ + +## I. Calcul d'indice global + +Q.1 Index block grid 2D. +```c +size_t index_block_2d = blockIdx.y * gridDim.x + blockIdx.x; +``` + +Q.2 Index block grid 3D. +``` +size_t index_block = blockIdx.z * gridDim.x * gridDim.y + + blockIdx.y * gridDim.x + blockIdx.x; +``` + +Q.3 Size block 3D +``` +size_t block_size = blockDim.x * blockDim.y * blockDim.z; +``` + +Q.4 Index thread block 3D. +``` +size_t index_thread = threadIdx.z * blockDim.x * blockDim.y + + threadIdx.y * blockDim.x + threadIdx.y; +``` + +Q.5 Idex thread block 3D & grid 3D. +``` +size_t index = index_block * block_size + index_thread; +``` + +## II. Modèle d'exécution et SDK CUDA + +Q.6 La fonction `kernel` s'éxécute sur le device. +La fonction `main` s'éxécute sur l'hote. + +Q.7 +compute: `(1./(1.+i)) + ((i-1.)/(i+1.))` N (N = 1000) fois. + +Q.8 +16 blocks de taille 64. +1024 blocks au total, N (N = 1000) utile. + +