CUBLASã使ã£ã¦ã¿ãã
ãã¯ããã¦ã®CUDAããã°ã©ãã³ã°ã*1ãèªãã§ãCUDAç¨ã®ç·å½¢è¨ç®ã©ã¤ãã©ãªCUBLASã使ã£ã¦ã¿ãããµã³ãã«ã³ã¼ããã®ã¾ã¾ã§ã¯ã¡ãã£ã¨åããã«ããã®ã§ã¡ã¢ã
CUBLASã®å
ã«ãªã£ã¦ãBLASã©ã¤ãã©ãªã¯ããããFortranç¨ã®ã©ã¤ãã©ãªã§ãC/C++ã§ä½¿ãã¨ãã«ã¯æ³¨æãå¿
è¦ã§ãããã¨ãç¥ããã¦ãããããã¯ãè¡åæ¼ç®ãè¡ãå ´åã®æ°å¤ã®ã¡ã¢ãªä¸ã®é
ç½®ãFortranã¨C/C++ã§éããã¨ãè¡åAã®iè¡jåç®ãæ®éAijã¨æ¸ããã ãã©ããããFortranã§A(i,j)
ã¨æ¸ããå ´åã¯ã¡ã¢ãªä¸ã§ãã®ãé£ã¯A(i+1,j)
ã«ãªããä¸æ¹ã§ãC/C++ã§A[i][j]
ã¨æ¸ããå ´åãé£ã¯A[i][j+1]
((ä¸æ¬¡å
é
åã§Aijãæ¸ãå ´åã§ããA[i*N+j]
ã¨æ¸ããããªãã®ãC/C++ããã°ã©ããªã®ã ))ãåè
ãè¡æåã®é
ç½®(column-major storage)ãå¾è
ãåæå(row-major storage)ã¨å¼ã¶ãããã
ãã¦ãBLASãå
ã«ãªã£ã¦ãCUBLASã¯å½ç¶è¡æåãæ³å®ããã¦ããããã©ããã¯ããã¦ã®CUDAããã°ã©ãã³ã°ãã¯ãã¾ãã¡ãã®ä»¶ã«é¢ããè¨åãå¾®å¦ã§ãæ
£ãã¦ããªãã¨å®¹æã«ééãããééããªãããã«ããã«ã¯ãè¡æåã§æ®æ®µããèãã¦ãããã
#define IDX2C(i,j,ld) (((j)*(ld))+(i))
ã¿ãããªãã¯ããå®ç¾©ãã¦ãããã¨*2ãããã§ld
ã¯leading dimensionã§ãæ©ã話ãè¡ã®æ°ã ã
ãã®ãã¯ãã使ã£ã¦æ¸ãç´ããã³ã¼ãã以ä¸ã«è²¼ã£ã¦ããã
#include<stdio.h> #include<stdlib.h> #include<cublas.h> #define N 1000 #define M 1500 #define K 500 #define IDX2C(i,j,ld) (((j)*(ld)+(i))) int main(int argc,char **argv){ double alpha = 3.0, beta = 1.0; double *A,*B,*C; double *dA,*dB,*dC; int LDA = M, LDB = K, LDC = M; int i,j; cudaSetDevice(0); cublasInit(); cudaMallocHost((void **)&A,sizeof(double) * M * K); cudaMallocHost((void **)&B,sizeof(double) * K * N); cudaMallocHost((void **)&C,sizeof(double) * M * N); for(i=0;i<M;++i) for(j=0;j<K;++j) A[IDX2C(i,j,M)] = i*K+j + 1; for(i=0;i<K;++i) for(j=0;j<N;++j) B[IDX2C(i,j,K)] = i*N+j + 1; for(i=0;i<M;++i) for(j=0;j<N;++j) C[IDX2C(i,j,M)] = 0.0; cublasAlloc(M*K,sizeof(double),(void **)&dA); cublasAlloc(K*N,sizeof(double),(void **)&dB); cublasAlloc(M*N,sizeof(double),(void **)&dC); cublasSetMatrix(M,K,sizeof(double),A,LDA,dA,M); cublasSetMatrix(K,N,sizeof(double),B,LDB,dB,K); cublasSetMatrix(M,N,sizeof(double),C,LDC,dC,M); cublasDgemm('N','N',M,N,K,alpha,dA,LDA,dB,LDB,beta,dC,LDC); cublasGetMatrix(M,N,sizeof(double),dC,M,C,LDC); cublasFree(dA); cublasFree(dB); cublasFree(dC); cublasShutdown(); return 0; }
ã¡ãªã¿ã«ãIntel Math Kernel Libraryã«ã¤ãã¦ãCBLASã¯ãå¼æ°ã§è¡æåã¨åæåãåãæ¿ãããã¦ä¾¿å©ãªã®ã§ãCUBLASãå°æ¥ã®ãã¼ã¸ã§ã³ã§ã¯è¦ç¿ã£ã¦æ¬²ãããã®ã§ããã
*1:
*2:CUDA Toolkit Archive | NVIDIA Developerã«ããCUBLAS User Guideãåèã«ãã¾ããã