diff --git a/TPs/TP0/CODE/device_query/prog b/TPs/TP0/CODE/device_query/prog new file mode 100755 index 0000000000000000000000000000000000000000..5425967f320d4c8fbaa3d71089cd4b66cef386f7 Binary files /dev/null and b/TPs/TP0/CODE/device_query/prog differ diff --git a/TPs/TP0/CODE/device_query/prog.cu b/TPs/TP0/CODE/device_query/prog.cu index d228b097efb2ef6d83172b5461a1d355672f7a1d..07d8b357d74230ad627ba43f6524a30b12e5f751 100644 --- a/TPs/TP0/CODE/device_query/prog.cu +++ b/TPs/TP0/CODE/device_query/prog.cu @@ -23,8 +23,10 @@ int main(int argc, char **argv) { cudaGetDeviceProperties(&prop, dev); printf("\nDevice %d: \"%s\"\n", dev, prop.name); printf(" GPU Clock Rate (MHz): %d\n", prop.clockRate/1000); - printf(" Memory Clock Rate (MHz): %d\n", prop.memoryClockRate/1000); + printf(" Memory Clock Rate (MT/s): %d\n", prop.memoryClockRate/1000); printf(" Memory Bus Width (bits): %d\n", prop.memoryBusWidth); + printf(" Memory Band width (Gio/s): %d\n", + (((prop.memoryClockRate / 1024) * (prop.memoryBusWidth / 8)) / 1024) * 2); printf(" CUDA Cores/MP: %d\n", _ConvertSMVer2Cores(prop.major, prop.minor)); printf(" CUDA Cores: %d\n", _ConvertSMVer2Cores(prop.major, prop.minor) * prop.multiProcessorCount); @@ -43,6 +45,11 @@ int main(int argc, char **argv) { printf(" Max dimension size of a grid size (x,y,z): (%d, %d, %d)\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2]); + printf(" Max FP32 compute (Gflop/s): %d\n", + ((prop.clockRate / 1000) * + _ConvertSMVer2Cores(prop.major, prop.minor) + * prop.multiProcessorCount * 2) + / 1000); printf("\n\n========== cudaDeviceGetAttribute ============ \n"); printf("\nDevice %d: \"%s\"\n", dev, prop.name); @@ -79,9 +86,18 @@ int main(int argc, char **argv) { cudaDeviceGetAttribute (&value, cudaDevAttrMemoryClockRate, dev); printf(" Peak memory clock frequency in kilohertz: %d\n", value); +// int memoryClockRate = value; cudaDeviceGetAttribute (&value, cudaDevAttrGlobalMemoryBusWidth, dev); printf(" Global memory bus width in bits: %d\n", value); +// int memoryBusWidth = value; +// int memoryBandWidth = memoryClockRate * memoryBusWidth; +// printf(" Global memory bandwidth in kilo byts: %d\n", +// memoryBandWidth / 8); + + + + cudaDeviceGetAttribute (&value, cudaDevAttrL2CacheSize, dev); printf(" Size of L2 cache in bytes: %d\n", value); diff --git a/TPs/TP0/CODE/device_query/prog.o b/TPs/TP0/CODE/device_query/prog.o new file mode 100644 index 0000000000000000000000000000000000000000..fe708f858b73ffda6d64c5e28ef0a9a10671bba7 Binary files /dev/null and b/TPs/TP0/CODE/device_query/prog.o differ diff --git a/TPs/TP0/CODE/error_checking/exemple1.cu b/TPs/TP0/CODE/error_checking/exemple1.cu index 2c085a1a20bac6e02fcb0de6368336a9747e6aa9..58d7a39197d61cebf8e174c86dde4d5c902214e4 100755 --- a/TPs/TP0/CODE/error_checking/exemple1.cu +++ b/TPs/TP0/CODE/error_checking/exemple1.cu @@ -2,7 +2,8 @@ #include <stdlib.h> #include "helper_cuda.h" -#define THREADS 4096 +#define THREADS 1024 +//#define THREADS 4096 #define TAB_SIZE 8192 __global__ void kernel(int *a, int *b, int *c) { @@ -28,6 +29,7 @@ int main(int argc, char **argv) checkCudaErrors(cudaMemset(d_a, 1, sz_in_bytes)); checkCudaErrors(cudaMemset(d_b, 2, sz_in_bytes)); + cudaDeviceSynchronize(); // Kernel configuration dim3 dimBlock(THREADS, 1, 1); @@ -35,6 +37,7 @@ int main(int argc, char **argv) // Kernel launch kernel<<<dimGrid, dimBlock>>>(d_a, d_b, d_c); + getLastCudaError("test"); // Retrieving data from device (cudaMemcpy) checkCudaErrors(cudaMemcpy(h_c, d_c, sz_in_bytes, cudaMemcpyDeviceToHost)); @@ -47,6 +50,11 @@ int main(int argc, char **argv) // computing sum of tab element for (int i = 0; i < TAB_SIZE; i++) res += h_c[i]; +// for(int i = 0; i < TAB_SIZE; ++i){ +// printf("%d/", h_c[i]); +// } +// printf("\n"); + // Verifying if if (res == 3 * TAB_SIZE) { fprintf(stderr, "TEST PASSED !\n"); diff --git a/TPs/TP0/CODE/error_checking/exemple1.exe b/TPs/TP0/CODE/error_checking/exemple1.exe new file mode 100755 index 0000000000000000000000000000000000000000..92ff871b850ef8a9c61848b3b09fc28a55715118 Binary files /dev/null and b/TPs/TP0/CODE/error_checking/exemple1.exe differ diff --git a/TPs/TP0/CODE/error_checking/exemple2.cu b/TPs/TP0/CODE/error_checking/exemple2.cu index bbef844d63fc6c0c0ae42fc93f524c70804a9b83..f863dde23bdab204fd7aead195abe78a7876fe9e 100755 --- a/TPs/TP0/CODE/error_checking/exemple2.cu +++ b/TPs/TP0/CODE/error_checking/exemple2.cu @@ -16,7 +16,7 @@ int main(int argc, char **argv) int *h_b; int res = 0; - int *d_a, *d_b; + int *d_a, *d_b, *d_c; // Allocation on host (malloc) h_b = (int *)malloc(sz_in_bytes); @@ -33,11 +33,17 @@ int main(int argc, char **argv) // Kernel launch copy<<<dimGrid, dimBlock>>>(d_a, d_b); - checkCudaErrors(cudaDeviceSynchronize()); + getLastCudaError("copy kernel error"); + checkCudaErrors(cudaDeviceSynchronize()); + getLastCudaError("copy kernel error after sync"); // Retrieving data from device (cudaMemcpy) checkCudaErrors(cudaMemcpy(h_b, d_b, sz_in_bytes, cudaMemcpyDeviceToHost)); + // checking if cudamalloc is still available + checkCudaErrors(cudaMalloc((void **)&d_c, sz_in_bytes)); + checkCudaErrors(cudaFree(d_c)); + // Freeing on device (cudaFree) checkCudaErrors(cudaFree(d_a)); checkCudaErrors(cudaFree(d_b)); diff --git a/TPs/TP0/CODE/error_checking/exemple2.exe b/TPs/TP0/CODE/error_checking/exemple2.exe new file mode 100755 index 0000000000000000000000000000000000000000..ea89892735b6df2a2e60d443515ec9de4c026f5a Binary files /dev/null and b/TPs/TP0/CODE/error_checking/exemple2.exe differ diff --git a/TPs/TP0/CODE/error_checking/exemple3.exe b/TPs/TP0/CODE/error_checking/exemple3.exe new file mode 100755 index 0000000000000000000000000000000000000000..247981307c9bfe2e5aa932c7eeea35b63056f71b Binary files /dev/null and b/TPs/TP0/CODE/error_checking/exemple3.exe differ