Update some fixed

This commit is contained in:
Young Jin Kim 2019-06-18 17:09:14 -07:00
parent d2d8ec041d
commit e8ca9a3756
2 changed files with 24 additions and 3 deletions

View File

@ -56,6 +56,14 @@ IF(CMAKE_SYSTEM_NAME MATCHES "Linux")
ELSE (AVX2_TRUE)
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
ENDIF (AVX2_TRUE)
STRING(REGEX REPLACE "^.*(avx512).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "avx512" "${SSE_THERE}" AVX512_TRUE)
IF (AVX512_TRUE)
set(AVX512_FOUND true CACHE BOOL "AVX512 available on host")
ELSE (AVX512_TRUE)
set(AVX512_FOUND false CACHE BOOL "AVX512 available on host")
ENDIF (AVX512_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
EXEC_PROGRAM("/usr/sbin/sysctl -n machdep.cpu.features" OUTPUT_VARIABLE
@ -108,6 +116,14 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Darwin")
ELSE (AVX2_TRUE)
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
ENDIF (AVX2_TRUE)
STRING(REGEX REPLACE "^.*(avx512).*$" "\\1" SSE_THERE ${CPUINFO})
STRING(COMPARE EQUAL "avx512" "${SSE_THERE}" AVX512_TRUE)
IF (AVX512_TRUE)
set(AVX512_FOUND true CACHE BOOL "AVX512 available on host")
ELSE (AVX512_TRUE)
set(AVX512_FOUND false CACHE BOOL "AVX512 available on host")
ENDIF (AVX512_TRUE)
ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
# TODO
@ -117,6 +133,7 @@ ELSEIF(CMAKE_SYSTEM_NAME MATCHES "Windows")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(AVX_FOUND false CACHE BOOL "AVX available on host")
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
set(AVX512_FOUND false CACHE BOOL "AVX512 available on host")
ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE2_FOUND true CACHE BOOL "SSE2 available on host")
set(SSE3_FOUND false CACHE BOOL "SSE3 available on host")
@ -124,6 +141,7 @@ ELSE(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(SSE4_1_FOUND false CACHE BOOL "SSE4.1 available on host")
set(AVX_FOUND false CACHE BOOL "AVX available on host")
set(AVX2_FOUND false CACHE BOOL "AVX2 available on host")
set(AVX512_FOUND false CACHE BOOL "AVX512 available on host")
ENDIF(CMAKE_SYSTEM_NAME MATCHES "Linux")
if(NOT SSE2_FOUND)
@ -144,5 +162,8 @@ endif(NOT AVX_FOUND)
if(NOT AVX2_FOUND)
MESSAGE(STATUS "Could not find hardware support for AVX2 on this machine.")
endif(NOT AVX2_FOUND)
if(NOT AVX512_FOUND)
MESSAGE(STATUS "Could not find hardware support for AVX512 on this machine.")
endif(NOT AVX512_FOUND)
mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND, AVX_FOUND, AVX2_FOUND)
mark_as_advanced(SSE2_FOUND SSE3_FOUND SSSE3_FOUND SSE4_1_FOUND, AVX_FOUND, AVX2_FOUND, AVX512_FOUND)

View File

@ -191,7 +191,7 @@ void Transpose0213(Tensor out, Tensor in) {
}
template <bool add>
void Transposexxx3(Tensor out, Tensor in, const std::vector<int>& vAxis) {
void TransposeFirst3In4(Tensor out, Tensor in, const std::vector<int>& vAxis) {
#if MKL_FOUND
int innermost = in->shape()[-1];
@ -323,7 +323,7 @@ void TransposeND(Tensor out, Tensor in, const std::vector<int>& vAxis) {
Transpose0213<false>(out, in);
#if MKL_FOUND
else if(vAxis.size() == 4 && vAxis[3] == 3)
Transposexxx3<false>(out, in, vAxis);
TransposeFirst3In4<false>(out, in, vAxis);
#endif // MKL_FOUND
else if(vAxis == std::vector<int>({1, 0}) && in->shape()[-1] % 16 == 0
&& in->shape()[-2] % 16 == 0)