Skip to content
Extraits de code Groupes Projets
Valider 08504bd8 rédigé par Nicolas MARIE's avatar Nicolas MARIE
Parcourir les fichiers

pgpu debut TP3

parent e22de7af
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -4,28 +4,30 @@
int main(int argc, char** argv)
{
double alpha = 2;
double *X = NULL, *Y = NULL;
int N = 1000;
if (argc > 1) N = atoi(argv[1]);
double alpha = 2;
double *X = NULL, *Y = NULL;
int N = 100000000;
if (argc > 1) N = atoi(argv[1]);
X = (double*) malloc(sizeof(double) * N);
Y = (double*) malloc(sizeof(double) * N);
X = (double*) malloc(sizeof(double) * N);
Y = (double*) malloc(sizeof(double) * N);
for(int i = 0; i < N; ++i)
{
X[i] = i;
Y[i] = X[i] + i;
}
for(int i = 0; i < N; ++i)
{
X[i] = i;
Y[i] = X[i] + i;
}
for(int i = 0; i < N; ++i)
{
Y[i] += alpha * X[i];
}
int runningOnGPU = 0;
#pragma omp target teams distribute parallel for map(to: X[:N]) map(tofrom: Y[:N]) map(from: runningOnGPU)
for(int i = 0; i < N; ++i)
{
Y[i] += alpha * X[i];
}
int stop = (N > 5)?5:N;
for(int i = 0; i < stop; ++i)
fprintf(stdout, "Y[%d] = %f\n", i, Y[i]);
int stop = (N > 5)?5:N;
for(int i = 0; i < stop; ++i)
fprintf(stdout, "Y[%d] = %f\n", i, Y[i]);
return 0;
return 0;
}
......@@ -10,72 +10,78 @@ typedef struct timespec struct_time;
*/
double get_elapsedtime(void)
{
struct_time st;
int err = gettime(&st);
if (err !=0) return 0;
return (double)st.tv_sec + get_sub_seconde(st);
struct_time st;
int err = gettime(&st);
if (err !=0) return 0;
return (double)st.tv_sec + get_sub_seconde(st);
}
void mult(int N, float *A, float* B, float* C)
{
int i = 0, j = 0, k = 0;
int total_size = N*N;
for(i = 0; i < N; ++i)
{
for(j = 0; j < N; ++j)
{
float val = 0.;
for(k = 0; k < N; ++k)
{
val += A[i * N + k] * B[k * N + j];
}
C[i * N + j] = val;
}
}
int i = 0, j = 0, k = 0;
//int total_size = N*N;
#pragma omp target teams distribute parallel for map(to: A) map(to: B) map(from: C)
for(i = 0; i < N; ++i)
{
for(j = 0; j < N; ++j)
{
for(k = 0; k < N; ++k)
{
C[i * N + j] += A[i * N + k] * B[k * N + j];
}
}
}
}
[[clang::optnone]]
void donotopti(float *C){
return;
}
int main(int argc, char** argv)
{
double t0 = 0., t1 = 0., duration = 0.;
double t0 = 0., t1 = 0., duration = 0.;
float *A = NULL;
float *B = NULL;
float *C = NULL;
float *A = NULL;
float *B = NULL;
float *C = NULL;
int i = 0;
int i = 0;
int N = 1024;
if(argc > 1)
{
N = atoi(argv[1]);
}
int N = 1024;
if(argc > 1)
{
N = atoi(argv[1]);
}
fprintf(stdout, "> Matrix Multiplication Kernel...\n");
fprintf(stdout, " Size: %dx%d\n", N, N);
fprintf(stdout, "> Matrix Multiplication Kernel...\n");
fprintf(stdout, " Size: %dx%d\n", N, N);
#pragma omp parallel default(none) shared(stdout)
{
{
#pragma omp single
fprintf(stdout, " Running on %d threads\n", omp_get_num_threads());
}
fprintf(stdout, " Running on %d threads\n", omp_get_num_threads());
}
A = (float*) calloc(N * N, sizeof(float));
B = (float*) calloc(N * N, sizeof(float));
C = (float*) calloc(N * N, sizeof(float));
A = (float*) calloc(N * N, sizeof(float));
B = (float*) calloc(N * N, sizeof(float));
C = (float*) calloc(N * N, sizeof(float));
for(i = 0; i < N * N; ++i)
{
A[i] = 1. * i;
B[i] = N*N - (1. * i);
C[i] = 0.;
}
for(i = 0; i < N * N; ++i)
{
A[i] = 1. * i;
B[i] = N*N - (1. * i);
C[i] = 0.;
}
t0 = get_elapsedtime();
mult(N, A, B, C);
t1 = get_elapsedtime();
t0 = get_elapsedtime();
mult(N, A, B, C);
t1 = get_elapsedtime();
duration = (t1 - t0);
duration = (t1 - t0);
fprintf(stdout, " Elapsed Time : %f\n", duration);
fprintf(stdout, " Elapsed Time : %f\n", duration);
donotopti(C);
return 0;
return 0;
}
CC=clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -Xopenmp-target -march=sm_61
CC=clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda
CFLAGS=-O3 -Wall
LDFLAGS=-lm -lrt
......
......@@ -17,5 +17,17 @@ int main()
else
printf("### Unable to use the GPU, using CPU! ###\n");
printf("Nb. devices: %d\n", omp_get_num_devices());
int team = 0;
#pragma omp target map(from: team)
{
#pragma omp teams
{
team = omp_get_num_teams();
}
}
printf("Nb. teams: %d\n", team);
return 0;
}
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter