diff --git a/regression-tests b/regression-tests index 71b0506a..658cf86e 160000 --- a/regression-tests +++ b/regression-tests @@ -1 +1 @@ -Subproject commit 71b0506a50e1f181e269fe90073a8ae470676dd1 +Subproject commit 658cf86e2ab4aa76b4ab3edabd750b93d2416f1a diff --git a/src/3rd_party/avx_mathfun.h b/src/3rd_party/avx_mathfun.h old mode 100644 new mode 100755 index 26b3132a..9d1403b4 --- a/src/3rd_party/avx_mathfun.h +++ b/src/3rd_party/avx_mathfun.h @@ -32,8 +32,13 @@ #include /* yes I know, the top of this file is quite ugly */ +#ifdef _MSC_VER +# define ALIGN32_BEG __declspec(align(32)) +# define ALIGN32_END +#else /* gcc or icc */ # define ALIGN32_BEG # define ALIGN32_END __attribute__((aligned(32))) +#endif /* __m128 is ugly to write */ typedef __m256 v8sf; // vector of 8 float (avx) @@ -94,17 +99,17 @@ typedef union imm_xmm_union { v4si xmm[2]; } imm_xmm_union; -#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \ - imm_xmm_union u __attribute__((aligned(32))); \ - u.imm = imm_; \ - xmm0_ = u.xmm[0]; \ - xmm1_ = u.xmm[1]; \ +#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \ + ALIGN32_BEG imm_xmm_union u ALIGN32_END; \ + u.imm = imm_; \ + xmm0_ = u.xmm[0]; \ + xmm1_ = u.xmm[1]; \ } -#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \ - imm_xmm_union u __attribute__((aligned(32))); \ +#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \ + ALIGN32_BEG imm_xmm_union u ALIGN32_END; \ u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm; \ - } +} #define AVX2_BITOP_USING_SSE2(fn) \ diff --git a/src/3rd_party/half_float/umHalf.h b/src/3rd_party/half_float/umHalf.h old mode 100644 new mode 100755 index 4c065c02..361dbdaa --- a/src/3rd_party/half_float/umHalf.h +++ b/src/3rd_party/half_float/umHalf.h @@ -44,11 +44,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include -#ifdef _MSC_VER -#include "stdint.h" -#else +//#ifdef _MSC_VER +//#include "stdint.h" +//#else #include -#endif +//#endif #undef min #undef max diff --git a/src/3rd_party/nccl b/src/3rd_party/nccl index 8e3a3f7c..d6297d25 160000 --- a/src/3rd_party/nccl +++ b/src/3rd_party/nccl @@ -1 +1 @@ -Subproject commit 8e3a3f7c5b520babff49cec54a866fa3eda3a3b6 +Subproject commit d6297d250433715c283d17f1969cfcb50d2b6531 diff --git a/vs/Marian.vcxproj b/vs/Marian.vcxproj index 0de76f65..91590160 100755 --- a/vs/Marian.vcxproj +++ b/vs/Marian.vcxproj @@ -42,7 +42,7 @@ true $(SolutionDir)$(Platform)\$(Configuration)\Marian\ - %CUDA_PATH%\include;..\src;..\src\3rd_party;%BOOST_INCLUDE_PATH%;%ZLIB_PATH%\include;%MKL_PATH%\include;$(VC_IncludePath);$(WindowsSDK_IncludePath); + ..\src\3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include;..\src\3rd_party\fbgemm\third_party\cpuinfo\src;..\src\3rd_party\fbgemm\third_party\cpuinfo\include;..\src\3rd_party\fbgemm\third_party\asmjit\src;%MKL_PATH%\include;..\src\3rd_party\fbgemm\include;%CUDA_PATH%\include;..\src;..\src\3rd_party;%BOOST_INCLUDE_PATH%;%ZLIB_PATH%\include;$(VC_IncludePath);$(WindowsSDK_IncludePath); %CUDA_PATH%\lib\x64;%BOOST_LIB_PATH%;%ZLIB_PATH%\lib;%MKL_PATH%\lib\intel64;$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);$(NETFXKitsDir)Lib\um\x64 @@ -69,7 +69,7 @@ Level4 Disabled - FBGEMM_EXPORTS;USE_FBGEMM=1;ASMJIT_VARAPI;CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + FBGEMM_EXPORTS; USE_FBGEMM=1; ASMJIT_VARAPI; USE_SSE2=1; CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) false true /bigobj %(AdditionalOptions) /arch:AVX2 @@ -105,7 +105,7 @@ MaxSpeed true true - FBGEMM_EXPORTS;USE_FBGEMM=1;CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) + FBGEMM_EXPORTS; USE_FBGEMM=1; USE_SSE2=1; CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions) false Speed /d2Zi+ /bigobj %(AdditionalOptions) /arch:AVX2 @@ -426,6 +426,7 @@ false false + @@ -635,6 +636,8 @@ + + @@ -709,6 +712,8 @@ + + true @@ -906,6 +911,7 @@ true true + true true @@ -1100,6 +1106,8 @@ + + @@ -1369,6 +1377,8 @@ + + true diff --git a/vs/Marian.vcxproj.filters b/vs/Marian.vcxproj.filters index f905636a..5ef68d74 100755 --- a/vs/Marian.vcxproj.filters +++ b/vs/Marian.vcxproj.filters @@ -712,6 +712,9 @@ common + + 3rd_party\half_float + @@ -2002,6 +2005,27 @@ 3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include + + 3rd_party\half_float + + + 3rd_party\half_float + + + 3rd_party + + + 3rd_party + + + 3rd_party + + + functional + + + functional + @@ -2232,6 +2256,9 @@ {8fd74b1e-d3c1-4158-ad46-4a447222934e} + + {defd3aec-3c56-4d70-a4bb-90ba9003d98d} + @@ -2354,6 +2381,12 @@ examples + + 3rd_party\half_float + + + 3rd_party\half_float +