Skip to content
Extraits de code Groupes Projets
Valider 8627a7d5 rédigé par Nicolas MARIE's avatar Nicolas MARIE
Parcourir les fichiers

add TD1

parent abd2dd1f
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
void kernel(double *a, double *b, double *c, int N,
int blockDim_x, int blockIdx_x, int threadIdx_x)
{
int i = blockIdx_x * blockDim_x + threadIdx_x;
if (i < N)
{
c[i] = a[i] + b[i];
}
}
int main(int argc, char **argv)
{
int N = 1000;
int sz_in_bytes = N*sizeof(double);
double *h_a, *h_b, *h_c;
double *d_a, *d_b, *d_c;
h_a = (double*)malloc(sz_in_bytes);
h_b = (double*)malloc(sz_in_bytes);
h_c = (double*)malloc(sz_in_bytes);
// Initiate values on h_a and h_b
for(int i = 0 ; i < N ; i++)
{
h_a[i] = 1./(1.+i);
h_b[i] = (i-1.)/(i+1.);
}
// 3-arrays allocation on device
d_a = (double *)malloc(sz_in_bytes);
d_b = (double *)malloc(sz_in_bytes);
d_c = (double *)malloc(sz_in_bytes);
// copy on device values pointed on host by h_a and h_b
// (the new values are pointed by d_a et d_b on device)
memcpy(d_a, h_a, sz_in_bytes);
memcpy(d_b, h_b, sz_in_bytes);
// dim3 dimBlock(64, 1, 1);
// dim3 dimGrid((N + dimBlock.x - 1)/dimBlock.x, 1, 1);
// kernel<<<dimGrid , dimBlock>>>(d_a, d_b, d_c, N);
int blockDim_x = 64;
int gridDim_x = (N + blockDim_x - 1) / blockDim_x;
for(int bi = 0; bi < gridDim_x; bi++){
for(int ti = 0; ti < blockDim_x; ti++){
kernel(d_a, d_b, d_c, N, blockDim_x, bi, ti);
}
}
// Result is pointed by d_c on device
// Copy this result on host (result pointed by h_c on host)
memcpy(h_c, d_c, sz_in_bytes);
// freeing on device
free(d_a);
free(d_b);
free(d_c);
for(int i = 0; i < N; ++i){
printf("%g/", h_c[i]);
}
printf("\n");
free(h_a);
free(h_b);
free(h_c);
return 0;
}
......@@ -54,6 +54,7 @@ int main(int argc, char **argv)
cudaFree(d_b);
cudaFree(d_c);
free(h_a);
free(h_b);
free(h_c);
......
## I. Calcul d'indice global
Q.1 Index block grid 2D.
```c
size_t index_block_2d = blockIdx.y * gridDim.x + blockIdx.x;
```
Q.2 Index block grid 3D.
```
size_t index_block = blockIdx.z * gridDim.x * gridDim.y
+ blockIdx.y * gridDim.x + blockIdx.x;
```
Q.3 Size block 3D
```
size_t block_size = blockDim.x * blockDim.y * blockDim.z;
```
Q.4 Index thread block 3D.
```
size_t index_thread = threadIdx.z * blockDim.x * blockDim.y
+ threadIdx.y * blockDim.x + threadIdx.y;
```
Q.5 Idex thread block 3D & grid 3D.
```
size_t index = index_block * block_size + index_thread;
```
## II. Modèle d'exécution et SDK CUDA
Q.6 La fonction `kernel` s'éxécute sur le device.
La fonction `main` s'éxécute sur l'hote.
Q.7
compute: `(1./(1.+i)) + ((i-1.)/(i+1.))` N (N = 1000) fois.
Q.8
16 blocks de taille 64.
1024 blocks au total, N (N = 1000) utile.
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Veuillez vous inscrire ou vous pour commenter