diff --git a/TPs/TP0/CODE/common/helper_cuda.h b/TPs/TP0/CODE/common/helper_cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..3dd446dced85de9d10eb9f1597c56099fa358bdb --- /dev/null +++ b/TPs/TP0/CODE/common/helper_cuda.h @@ -0,0 +1,183 @@ +/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +//////////////////////////////////////////////////////////////////////////////// +// These are CUDA Helper functions for initialization and error checking + +#ifndef COMMON_HELPER_CUDA_H_ +#define COMMON_HELPER_CUDA_H_ + +#pragma once + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#ifndef MAX +#define MAX(a, b) (a > b ? a : b) +#endif + +#ifndef MIN +#define MIN(a, b) (a < b ? a : b) +#endif + +#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__) +#define getLastCudaError(msg) __getLastCudaError(msg, __FILE__, __LINE__) + +void check(cudaError_t result, char const *const func, const char *const file, + int const line) +{ + if (result) + { + fprintf(stderr, "CUDA error at %s:%d code=%d (%s) \"%s\" \n", file, line, (int)result, cudaGetErrorName(result), func); + exit(EXIT_FAILURE); + } +} + +inline void __getLastCudaError(const char *errorMessage, const char *file, + const int line) +{ + cudaError_t err = cudaGetLastError(); + + if (cudaSuccess != err) + { + fprintf(stderr, + "%s(%i) : getLastCudaError() CUDA error :" + " %s : (%d) %s.\n", + file, line, errorMessage, (int)(err), + cudaGetErrorString(err)); + exit(EXIT_FAILURE); + } +} + +// Beginning of GPU Architecture definitions +int _ConvertSMVer2Cores(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine + // the # of cores per SM + typedef struct + { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, + // and m = SM minor version + int Cores; + } sSMtoCores; + + sSMtoCores nGpuArchCoresPerSM[] = { + {0x30, 192}, + {0x32, 192}, + {0x35, 192}, + {0x37, 192}, + {0x50, 128}, + {0x52, 128}, + {0x53, 128}, + {0x60, 64}, + {0x61, 128}, + {0x62, 128}, + {0x70, 64}, + {0x72, 64}, + {0x75, 64}, + {0x80, 64}, + {0x86, 128}, + {0x87, 128}, + {-1, -1}}; + + int index = 0; + + while (nGpuArchCoresPerSM[index].SM != -1) + { + if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor)) + { + return nGpuArchCoresPerSM[index].Cores; + } + + index++; + } + + // If we don't find the values, we default use the previous one + // to run properly + printf( + "MapSMtoCores for SM %d.%d is undefined." + " Default to use %d Cores/SM\n", + major, minor, nGpuArchCoresPerSM[index - 1].Cores); + return nGpuArchCoresPerSM[index - 1].Cores; +} + +inline const char *_ConvertSMVer2ArchName(int major, int minor) +{ + // Defines for GPU Architecture types (using the SM version to determine + // the GPU Arch name) + typedef struct + { + int SM; // 0xMm (hexidecimal notation), M = SM Major version, + // and m = SM minor version + const char *name; + } sSMtoArchName; + + sSMtoArchName nGpuArchNameSM[] = { + {0x30, "Kepler"}, + {0x32, "Kepler"}, + {0x35, "Kepler"}, + {0x37, "Kepler"}, + {0x50, "Maxwell"}, + {0x52, "Maxwell"}, + {0x53, "Maxwell"}, + {0x60, "Pascal"}, + {0x61, "Pascal"}, + {0x62, "Pascal"}, + {0x70, "Volta"}, + {0x72, "Xavier"}, + {0x75, "Turing"}, + {0x80, "Ampere"}, + {0x86, "Ampere"}, + {-1, "Graphics Device"}}; + + int index = 0; + + while (nGpuArchNameSM[index].SM != -1) + { + if (nGpuArchNameSM[index].SM == ((major << 4) + minor)) + { + return nGpuArchNameSM[index].name; + } + + index++; + } + + // If we don't find the values, we default use the previous one + // to run properly + printf( + "MapSMtoArchName for SM %d.%d is undefined." + " Default to use %s\n", + major, minor, nGpuArchNameSM[index - 1].name); + return nGpuArchNameSM[index - 1].name; +} +// end of GPU Architecture definitions + +// end of CUDA Helper Functions + +#endif // COMMON_HELPER_CUDA_H_ \ No newline at end of file diff --git a/TPs/TP0/CODE/compute-sanitizer/Makefile b/TPs/TP0/CODE/compute-sanitizer/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..2283121ddd828e6e4f8750965ef592866b4acec1 --- /dev/null +++ b/TPs/TP0/CODE/compute-sanitizer/Makefile @@ -0,0 +1,17 @@ +CC = nvcc -lineinfo +INCLUDES = + +# the build target executable: +EXE = memcheck.exe initcheck.exe + +all: $(EXE) + +%.exe: %.o + $(CC) $^ -o $@ + +%.o: %.cu + $(CC) $(INCLUDES) $^ -c -o $@ + +clean: + $(RM) $(EXE) *.o + diff --git a/TPs/TP0/CODE/compute-sanitizer/initcheck.cu b/TPs/TP0/CODE/compute-sanitizer/initcheck.cu new file mode 100644 index 0000000000000000000000000000000000000000..53357265e3d91a43bd9d538f20978be77e6c9c52 --- /dev/null +++ b/TPs/TP0/CODE/compute-sanitizer/initcheck.cu @@ -0,0 +1,11 @@ +const int bs = 1; + +__global__ void kernel(char *in, char *out) { out[threadIdx.x] = in[threadIdx.x]; } + +int main(void) { + char *d1, *d2; + cudaMalloc(&d1, bs); + cudaMalloc(&d2, bs); + kernel<<<1, bs>>>(d1, d2); + cudaDeviceSynchronize(); +} diff --git a/TPs/TP0/CODE/compute-sanitizer/memcheck.cu b/TPs/TP0/CODE/compute-sanitizer/memcheck.cu new file mode 100644 index 0000000000000000000000000000000000000000..ad3fc59b16be296946be73084b4f19bb676c52fe --- /dev/null +++ b/TPs/TP0/CODE/compute-sanitizer/memcheck.cu @@ -0,0 +1,44 @@ +#include <stdio.h> +#include <stdlib.h> +#define max(a, b) ((a) < (b) ? (b) : (a)) + +__global__ void saxpy(int n, float a, float *x, float *y) { + int i = blockIdx.x * blockDim.x + threadIdx.x; + y[i] = a * x[i] + y[i]; +} + +int main(void) { + int N = 1 << 20; + float *x, *y, *d_x, *d_y; + + x = (float *)malloc(N * sizeof(float)); + y = (float *)malloc(N * sizeof(float)); + + cudaMalloc(&d_x, N * sizeof(float)); + cudaMalloc(&d_y, N * sizeof(float)); + + for (int i = 0; i < N; i++) { + x[i] = 1.0f; + y[i] = 2.0f; + } + + cudaMemcpy(d_x, x, N * sizeof(float), cudaMemcpyHostToDevice); + cudaMemcpy(d_y, y, N * sizeof(float), cudaMemcpyHostToDevice); + + // Perform SAXPY on 1M elements + saxpy<<<(N + 255) / 256 + 1, 256>>>(N, 2.0f, d_x, d_y); + cudaMemcpy(y, d_y, N * sizeof(float), cudaMemcpyDeviceToHost); + + float maxError = 0.0f; + + for (int i = 0; i < N; i++) { + maxError = max(maxError, abs(y[i] - 4.0f)); + } + + printf("Max error: %f\n", maxError); + + cudaFree(d_x); + cudaFree(d_y); + free(x); + free(y); +} diff --git a/TPs/TP0/CODE/compute-sanitizer/tags b/TPs/TP0/CODE/compute-sanitizer/tags new file mode 100644 index 0000000000000000000000000000000000000000..f41805ff57b31cfd930077730d066b2145261de1 --- /dev/null +++ b/TPs/TP0/CODE/compute-sanitizer/tags @@ -0,0 +1,9 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ +!_TAG_PROGRAM_NAME Exuberant Ctags // +!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ +!_TAG_PROGRAM_VERSION 5.9~svn20110310 // +CC Makefile /^CC = nvcc$/;" m +EXE Makefile /^EXE = memcheck.exe initcheck.exe $/;" m +INCLUDES Makefile /^INCLUDES = $/;" m diff --git a/TPs/TP0/CODE/device_query/Makefile b/TPs/TP0/CODE/device_query/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..32e68d57b8d6fb95157ba9072e94601681308929 --- /dev/null +++ b/TPs/TP0/CODE/device_query/Makefile @@ -0,0 +1,20 @@ +#*********************************************************** +# Created by Patricio Bulic, Davor Sluga UL FRI on 15/6/2022 +#*********************************************************** +CC = nvcc +INCLUDES = -I. + +# the build target executable: +TARGET = prog + +all: $(TARGET) + +$(TARGET): $(TARGET).o + $(CC) $(TARGET).o -o $@ + +$(TARGET).o: $(TARGET).cu + $(CC) $(INCLUDES) $(TARGET).cu -c -o $@ + +clean: + $(RM) $(TARGET) $(TARGET).o + diff --git a/TPs/TP0/CODE/device_query/prog.cu b/TPs/TP0/CODE/device_query/prog.cu new file mode 100644 index 0000000000000000000000000000000000000000..d228b097efb2ef6d83172b5461a1d355672f7a1d --- /dev/null +++ b/TPs/TP0/CODE/device_query/prog.cu @@ -0,0 +1,110 @@ +#include <stdio.h> +#include <cuda_runtime.h> +#include <cuda.h> + +#include "helper_cuda.h" + +int main(int argc, char **argv) { + + // Get number of GPUs + int deviceCount = 0; + cudaError_t error = cudaGetDeviceCount(&deviceCount); + + if (error != cudaSuccess) { + printf("cudaGetDeviceCount error %d\n-> %s\n", error, cudaGetErrorString(error)); + exit(EXIT_FAILURE); + } + + // Get device propreties and print + for (int dev = 0; dev < deviceCount; dev++) { + struct cudaDeviceProp prop; + int value; + printf("\n========== cudaDeviceGetProperties ============ \n"); + cudaGetDeviceProperties(&prop, dev); + printf("\nDevice %d: \"%s\"\n", dev, prop.name); + printf(" GPU Clock Rate (MHz): %d\n", prop.clockRate/1000); + printf(" Memory Clock Rate (MHz): %d\n", prop.memoryClockRate/1000); + printf(" Memory Bus Width (bits): %d\n", prop.memoryBusWidth); + printf(" CUDA Cores/MP: %d\n", _ConvertSMVer2Cores(prop.major, prop.minor)); + printf(" CUDA Cores: %d\n", _ConvertSMVer2Cores(prop.major, prop.minor) * + prop.multiProcessorCount); + printf(" Total amount of global memory: %.0f GB\n", prop.totalGlobalMem / 1073741824.0f); + printf(" Total amount of shared memory per block: %zu kB\n", + prop.sharedMemPerBlock/1024); + printf(" Total number of registers available per block: %d\n", + prop.regsPerBlock); + printf(" Warp size: %d\n", + prop.warpSize); + printf(" Maximum number of threads per block: %d\n", + prop.maxThreadsPerBlock); + printf(" Max dimension size of a thread block (x,y,z): (%d, %d, %d)\n", + prop.maxThreadsDim[0], prop.maxThreadsDim[1], + prop.maxThreadsDim[2]); + printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", + prop.maxGridSize[0], prop.maxGridSize[1], + prop.maxGridSize[2]); + + printf("\n\n========== cudaDeviceGetAttribute ============ \n"); + printf("\nDevice %d: \"%s\"\n", dev, prop.name); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxThreadsPerBlock, dev); + printf(" Max number of threads per block: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxBlockDimX, dev); + printf(" Max block dimension X: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxBlockDimY, dev); + printf(" Max block dimension Y: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxBlockDimZ, dev); + printf(" Max block dimension Z: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxGridDimX, dev); + printf(" Max grid dimension X: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxGridDimY, dev); + printf(" Max grid dimension Y: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxGridDimZ, dev); + printf(" Max grid dimension Z: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxSharedMemoryPerBlock, dev); + printf(" Max shared memory per block: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrWarpSize, dev); + printf(" Warp size: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrClockRate, dev); + printf(" Peak clock frequency in kilohertz: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMemoryClockRate, dev); + printf(" Peak memory clock frequency in kilohertz: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrGlobalMemoryBusWidth, dev); + printf(" Global memory bus width in bits: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrL2CacheSize, dev); + printf(" Size of L2 cache in bytes: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxThreadsPerMultiProcessor, dev); + printf(" Maximum resident threads per SM: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrComputeCapabilityMajor, dev); + printf(" Major compute capability version number: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrComputeCapabilityMinor, dev); + printf(" Minor compute capability version number: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxSharedMemoryPerMultiprocessor, dev); + printf(" Max shared memory per SM in bytes: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxRegistersPerMultiprocessor, dev); + printf(" Max number of 32-bit registers per SM: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxSharedMemoryPerBlockOptin, dev); + printf(" Max per block shared mem size on the device: %d\n", + value); + cudaDeviceGetAttribute (&value, cudaDevAttrMaxBlocksPerMultiprocessor, dev); + printf(" Max thread blocks that can reside on a SM: %d\n", + value); + } +} diff --git a/TPs/TP0/CODE/device_query/tags b/TPs/TP0/CODE/device_query/tags new file mode 100644 index 0000000000000000000000000000000000000000..a1a72e91049548c5bef1153a5347b998e0dbcda8 --- /dev/null +++ b/TPs/TP0/CODE/device_query/tags @@ -0,0 +1,9 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ +!_TAG_PROGRAM_NAME Exuberant Ctags // +!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ +!_TAG_PROGRAM_VERSION 5.9~svn20110310 // +CC Makefile /^CC = nvcc$/;" m +INCLUDES Makefile /^INCLUDES = -I.$/;" m +TARGET Makefile /^TARGET = prog$/;" m diff --git a/TPs/TP0/CODE/error_checking/Makefile b/TPs/TP0/CODE/error_checking/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..5024daa666a478895dc709a6e43a6e92cd721f28 --- /dev/null +++ b/TPs/TP0/CODE/error_checking/Makefile @@ -0,0 +1,17 @@ +CC = nvcc +INCLUDES = -I../common + +# the build target executable: +EXE = exemple1.exe exemple2.exe exemple3.exe + +all: $(EXE) + +%.exe: %.o + $(CC) $^ -o $@ + +%.o: %.cu + $(CC) $(INCLUDES) $^ -c -o $@ + +clean: + $(RM) $(EXE) *.o + diff --git a/TPs/TP0/CODE/error_checking/exemple1.cu b/TPs/TP0/CODE/error_checking/exemple1.cu new file mode 100755 index 0000000000000000000000000000000000000000..2c085a1a20bac6e02fcb0de6368336a9747e6aa9 --- /dev/null +++ b/TPs/TP0/CODE/error_checking/exemple1.cu @@ -0,0 +1,62 @@ +#include <stdio.h> +#include <stdlib.h> +#include "helper_cuda.h" + +#define THREADS 4096 +#define TAB_SIZE 8192 + +__global__ void kernel(int *a, int *b, int *c) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < TAB_SIZE) c[tid] = a[tid] + b[tid]; +} + +int main(int argc, char **argv) +{ + int sz_in_bytes = sizeof(int) * TAB_SIZE; + + int *h_c; + int res = 0; + int *d_a, *d_b, *d_c; + + // Allocation on host (malloc) + h_c = (int *)malloc(sz_in_bytes); + + // Allocation on device (cudaMalloc) + checkCudaErrors(cudaMalloc((void **)&d_a, sz_in_bytes)); + checkCudaErrors(cudaMalloc((void **)&d_b, sz_in_bytes)); + checkCudaErrors(cudaMalloc((void **)&d_c, sz_in_bytes)); + + checkCudaErrors(cudaMemset(d_a, 1, sz_in_bytes)); + checkCudaErrors(cudaMemset(d_b, 2, sz_in_bytes)); + + // Kernel configuration + dim3 dimBlock(THREADS, 1, 1); + dim3 dimGrid(TAB_SIZE / THREADS + 1, 1, 1); + + // Kernel launch + kernel<<<dimGrid, dimBlock>>>(d_a, d_b, d_c); + + // Retrieving data from device (cudaMemcpy) + checkCudaErrors(cudaMemcpy(h_c, d_c, sz_in_bytes, cudaMemcpyDeviceToHost)); + + // Freeing on device (cudaFree) + checkCudaErrors(cudaFree(d_a)); + checkCudaErrors(cudaFree(d_b)); + checkCudaErrors(cudaFree(d_c)); + + // computing sum of tab element + for (int i = 0; i < TAB_SIZE; i++) res += h_c[i]; + + // Verifying if + if (res == 3 * TAB_SIZE) { + fprintf(stderr, "TEST PASSED !\n"); + } + else + { + fprintf(stderr, "TEST FAILED !\n"); + } + + free(h_c); + + return 0; +} diff --git a/TPs/TP0/CODE/error_checking/exemple2.cu b/TPs/TP0/CODE/error_checking/exemple2.cu new file mode 100755 index 0000000000000000000000000000000000000000..bbef844d63fc6c0c0ae42fc93f524c70804a9b83 --- /dev/null +++ b/TPs/TP0/CODE/error_checking/exemple2.cu @@ -0,0 +1,60 @@ +#include <stdio.h> +#include <stdlib.h> +#include "helper_cuda.h" + +#define THREADS 256 +#define TAB_SIZE 8192 + +__global__ void copy(int *a, int *b) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid <= TAB_SIZE) b[tid] = a[tid]; +} + +int main(int argc, char **argv) +{ + int sz_in_bytes = sizeof(int) * TAB_SIZE; + + int *h_b; + int res = 0; + int *d_a, *d_b; + + // Allocation on host (malloc) + h_b = (int *)malloc(sz_in_bytes); + + // Allocation on device (cudaMalloc) + checkCudaErrors(cudaMalloc((void **)&d_a, sz_in_bytes)); + checkCudaErrors(cudaMalloc((void **)&d_b, sz_in_bytes)); + + checkCudaErrors(cudaMemset(d_a, 1, sz_in_bytes)); + + // Kernel configuration + dim3 dimBlock(THREADS, 1, 1); + dim3 dimGrid(TAB_SIZE / THREADS + 1, 1, 1); + + // Kernel launch + copy<<<dimGrid, dimBlock>>>(d_a, d_b); + checkCudaErrors(cudaDeviceSynchronize()); + + // Retrieving data from device (cudaMemcpy) + checkCudaErrors(cudaMemcpy(h_b, d_b, sz_in_bytes, cudaMemcpyDeviceToHost)); + + // Freeing on device (cudaFree) + checkCudaErrors(cudaFree(d_a)); + checkCudaErrors(cudaFree(d_b)); + + // computing sum of tab element + for (int i = 0; i < TAB_SIZE; i++) res += h_b[i]; + + // Verifying if + if (res == TAB_SIZE) { + fprintf(stderr, "TEST PASSED !\n"); + } + else + { + fprintf(stderr, "TEST FAILED !\n"); + } + + free(h_b); + + return 0; +} diff --git a/TPs/TP0/CODE/error_checking/exemple3.cu b/TPs/TP0/CODE/error_checking/exemple3.cu new file mode 100755 index 0000000000000000000000000000000000000000..d2a0c2ce1dd4eb0e8f55c29b89402f59146c1ef2 --- /dev/null +++ b/TPs/TP0/CODE/error_checking/exemple3.cu @@ -0,0 +1,62 @@ +#include <stdio.h> +#include <stdlib.h> +#include "helper_cuda.h" + +#define THREADS 256 +#define TAB_SIZE 8192 + +__global__ void copy(int *a, int *b) { + const int tid = blockIdx.x * blockDim.x + threadIdx.x; + if (tid < TAB_SIZE) b[tid] = a[tid]; +} + +int main(int argc, char **argv) +{ + int sz_in_bytes = sizeof(int) * TAB_SIZE; + + int *h_b; + int res = 0; + int *d_a, *d_b; + + // Allocation on host (malloc) + h_b = (int *)malloc(sz_in_bytes); + + // Too big allocation on device + checkCudaErrors(cudaMalloc((void **)&d_a, 100000000000)); + + // Allocation on device (cudaMalloc) + checkCudaErrors(cudaMalloc((void **)&d_a, sz_in_bytes)); + checkCudaErrors(cudaMalloc((void **)&d_b, sz_in_bytes)); + + checkCudaErrors(cudaMemset(d_a, 1, sz_in_bytes)); + + // Kernel configuration + dim3 dimBlock(THREADS, 1, 1); + dim3 dimGrid(TAB_SIZE / THREADS + 1, 1, 1); + + // Kernel launch + copy<<<dimGrid, dimBlock>>>(d_a, d_b); + + // Retrieving data from device (cudaMemcpy) + checkCudaErrors(cudaMemcpy(h_b, d_b, sz_in_bytes, cudaMemcpyDeviceToHost)); + + // Freeing on device (cudaFree) + checkCudaErrors(cudaFree(d_a)); + checkCudaErrors(cudaFree(d_b)); + + // computing sum of tab element + for (int i = 0; i < TAB_SIZE; i++) res += h_b[i]; + + // Verifying if + if (res == TAB_SIZE) { + fprintf(stderr, "TEST PASSED !\n"); + } + else + { + fprintf(stderr, "TEST FAILED !\n"); + } + + free(h_b); + + return 0; +} diff --git a/TPs/TP0/CODE/error_checking/tags b/TPs/TP0/CODE/error_checking/tags new file mode 100644 index 0000000000000000000000000000000000000000..62d8d1959ccd1e9cd92a22796556a2bfb4d2571d --- /dev/null +++ b/TPs/TP0/CODE/error_checking/tags @@ -0,0 +1,9 @@ +!_TAG_FILE_FORMAT 2 /extended format; --format=1 will not append ;" to lines/ +!_TAG_FILE_SORTED 1 /0=unsorted, 1=sorted, 2=foldcase/ +!_TAG_PROGRAM_AUTHOR Darren Hiebert /dhiebert@users.sourceforge.net/ +!_TAG_PROGRAM_NAME Exuberant Ctags // +!_TAG_PROGRAM_URL http://ctags.sourceforge.net /official site/ +!_TAG_PROGRAM_VERSION 5.9~svn20110310 // +CC Makefile /^CC = nvcc$/;" m +EXE Makefile /^EXE = exemple1.exe exemple2.exe exemple3.exe$/;" m +INCLUDES Makefile /^INCLUDES = -I..\/common$/;" m diff --git a/TPs/TP0/DOCS/cuda_training_series_cuda_debugging.pdf b/TPs/TP0/DOCS/cuda_training_series_cuda_debugging.pdf new file mode 100644 index 0000000000000000000000000000000000000000..00dd349dea34fd47860cbcbac2a0ae04dcfd9d12 Binary files /dev/null and b/TPs/TP0/DOCS/cuda_training_series_cuda_debugging.pdf differ diff --git a/TPs/TP0/SUJET/tp0.pdf b/TPs/TP0/SUJET/tp0.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c466200cf7143d4dac7fb60d7233b75aad070bdd Binary files /dev/null and b/TPs/TP0/SUJET/tp0.pdf differ