add tensorcore support

This commit is contained in:
Hieu Hoang 2018-01-16 16:03:06 +00:00
parent 4a725ed4ec
commit 83b6de15c5
3 changed files with 5 additions and 4 deletions

View File

@ -38,7 +38,7 @@ endif(FPGA)
if(CUDA)
find_package(CUDA)
if(CUDA_FOUND)
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -O3; -arch=sm_61; -lineinfo; --use_fast_math; )
LIST(APPEND CUDA_NVCC_FLAGS --default-stream per-thread; -std=c++11; -O3; -arch=sm_70; -lineinfo; --use_fast_math; )
# add_definitions(-DCUDA_API_PER_THREAD_DEFAULT_STREAM)
add_definitions(-DCUDA)
SET(CUDA_PROPAGATE_HOST_FLAGS OFF)

View File

@ -420,7 +420,7 @@ unsigned EncoderDecoder::SentencesToGet(const Histories& histories)
unsigned ret1 = histories.NumInactive();
return ret1;
*/
///*
BEGIN_TIMER("SentencesToGet");
unsigned minActive = (histories.size() > 8) ? histories.size() - 8 : 1;
@ -457,6 +457,7 @@ unsigned EncoderDecoder::SentencesToGet(const Histories& histories)
PAUSE_TIMER("SentencesToGet");
return ret;
//*/
}
}

View File

@ -30,13 +30,13 @@ CublasHandler::CublasHandler()
}
#if CUDA_VERSION >= 9000
/*
///*
stat = cublasSetMathMode(handle_, CUBLAS_TENSOR_OP_MATH);
if (stat != CUBLAS_STATUS_SUCCESS) {
printf ("cublasSetMathMode failed\n");
abort();
}
*/
//*/
#endif
stat = cublasSetStream(handle_, CudaStreamHandler::GetStream());