test.cu 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384
  1. #include <stdio.h>
  2. #include <assert.h>
  3. #include <cuda.h>
  4. #include "test.h"
  5. // these macros are really really helpful
  6. # define CUDA_SAFE_CALL( call) { \
  7. cudaError err = call; \
  8. if( cudaSuccess != err) { \
  9. fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
  10. __FILE__, __LINE__, cudaGetErrorString( err) ); \
  11. exit(EXIT_FAILURE); \
  12. } }
  13. #define CHECKLASTERROR { \
  14. cudaError_t err = cudaGetLastError(); \
  15. if (err != cudaSuccess) { \
  16. fprintf(stderr, "Cuda error in file '%s' in line %i : %s.\n", \
  17. __FILE__, __LINE__, cudaGetErrorString( err) ); \
  18. exit(EXIT_FAILURE); \
  19. } }
  20. #define SIZ 128
  21. __global__ void truc(unsigned int * buf) {
  22. if (threadIdx.x < SIZ) {
  23. buf[threadIdx.x] = buf[threadIdx.x] << 5;
  24. }
  25. __syncthreads();
  26. }
  27. int testcuda()
  28. {
  29. unsigned int* foo = (unsigned int*) malloc(SIZ * sizeof(unsigned int));
  30. for (int x = 0; x < SIZ; ++x) {
  31. foo[x] = 1;
  32. }
  33. unsigned int * recf = NULL;
  34. CUDA_SAFE_CALL( cudaMalloc((void **) &recf, SIZ * sizeof(unsigned int)) );
  35. CUDA_SAFE_CALL(cudaMemcpy(recf, foo, SIZ * sizeof(unsigned int), cudaMemcpyHostToDevice));
  36. truc<<<1, SIZ>>>(recf);
  37. CHECKLASTERROR
  38. CUDA_SAFE_CALL(cudaMemcpy(foo, recf, SIZ * sizeof(unsigned int), cudaMemcpyDeviceToHost));
  39. printf("2^5 -> %u\n", foo[5]);
  40. CUDA_SAFE_CALL(cudaFree(recf));
  41. /*
  42. int deviceCount;
  43. cudaGetDeviceCount(&deviceCount);
  44. printf("device count %d\n", deviceCount);
  45. cudaDeviceProp dP;
  46. cudaGetDeviceProperties(&dP, NULL);
  47. //printf("Max threads per block: %d\n", dP.maxThreadsPerBlock);
  48. //printf("Max Threads DIM: %d x %d x %d\n", dP.maxThreadsDim[0], dP.maxThreadsDim[1], dP.maxThreadsDim[2]);
  49. //printf("Max Grid Size: %d x %d x %d\n", dP.maxGridSize[0], dP.maxGridSize[1], dP.maxGridSize[2]);
  50. cudaDeviceProp* pDeviceProp = &dP;
  51. printf( "\nDevice Name \t - %s ", pDeviceProp->name );
  52. printf( "\n**************************************");
  53. printf( "\nTotal Global Memory\t\t -%d KB", pDeviceProp->totalGlobalMem/1024 );
  54. printf( "\nShared memory available per block \t - %d KB", pDeviceProp->sharedMemPerBlock/1024 );
  55. printf( "\nNumber of registers per thread block \t - %d", pDeviceProp->regsPerBlock );
  56. printf( "\nWarp size in threads \t - %d", pDeviceProp->warpSize );
  57. printf( "\nMemory Pitch \t - %d bytes", pDeviceProp->memPitch );
  58. printf( "\nMaximum threads per block \t - %d", pDeviceProp->maxThreadsPerBlock );
  59. printf( "\nMaximum Thread Dimension (block) \t - %d %d %d", pDeviceProp->maxThreadsDim[0], pDeviceProp->maxThreadsDim[1], pDeviceProp->maxThreadsDim[2] );
  60. printf( "\nMaximum Thread Dimension (grid) \t - %d %d %d", pDeviceProp->maxGridSize[0], pDeviceProp->maxGridSize[1], pDeviceProp->maxGridSize[2] );
  61. printf( "\nTotal constant memory \t - %d bytes", pDeviceProp->totalConstMem );
  62. printf( "\nCUDA ver \t - %d.%d", pDeviceProp->major, pDeviceProp->minor );
  63. printf( "\nClock rate \t - %d KHz", pDeviceProp->clockRate );
  64. printf( "\nTexture Alignment \t - %d bytes", pDeviceProp->textureAlignment );
  65. printf( "\nDevice Overlap \t - %s", pDeviceProp-> deviceOverlap?"Allowed":"Not Allowed" );
  66. printf( "\nNumber of Multi processors \t - %d\n", pDeviceProp->multiProcessorCount );
  67. */
  68. return 0;
  69. }