sorted out AVX2 build problems on Windows

This commit is contained in:
Frank Seide 2019-09-11 14:38:22 -07:00
parent 446cff8dcf
commit e40491f44a
6 changed files with 65 additions and 17 deletions

@ -1 +1 @@
Subproject commit 71b0506a50e1f181e269fe90073a8ae470676dd1
Subproject commit 658cf86e2ab4aa76b4ab3edabd750b93d2416f1a

21
src/3rd_party/avx_mathfun.h vendored Normal file → Executable file
View File

@ -32,8 +32,13 @@
#include <immintrin.h>
/* yes I know, the top of this file is quite ugly */
#ifdef _MSC_VER
# define ALIGN32_BEG __declspec(align(32))
# define ALIGN32_END
#else /* gcc or icc */
# define ALIGN32_BEG
# define ALIGN32_END __attribute__((aligned(32)))
#endif
/* __m128 is ugly to write */
typedef __m256 v8sf; // vector of 8 float (avx)
@ -94,17 +99,17 @@ typedef union imm_xmm_union {
v4si xmm[2];
} imm_xmm_union;
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \
imm_xmm_union u __attribute__((aligned(32))); \
u.imm = imm_; \
xmm0_ = u.xmm[0]; \
xmm1_ = u.xmm[1]; \
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \
ALIGN32_BEG imm_xmm_union u ALIGN32_END; \
u.imm = imm_; \
xmm0_ = u.xmm[0]; \
xmm1_ = u.xmm[1]; \
}
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \
imm_xmm_union u __attribute__((aligned(32))); \
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \
ALIGN32_BEG imm_xmm_union u ALIGN32_END; \
u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm; \
}
}
#define AVX2_BITOP_USING_SSE2(fn) \

8
src/3rd_party/half_float/umHalf.h vendored Normal file → Executable file
View File

@ -44,11 +44,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <limits>
#include <algorithm>
#ifdef _MSC_VER
#include "stdint.h"
#else
//#ifdef _MSC_VER
//#include "stdint.h"
//#else
#include <stdint.h>
#endif
//#endif
#undef min
#undef max

2
src/3rd_party/nccl vendored

@ -1 +1 @@
Subproject commit 8e3a3f7c5b520babff49cec54a866fa3eda3a3b6
Subproject commit d6297d250433715c283d17f1969cfcb50d2b6531

View File

@ -42,7 +42,7 @@
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental>
<IntDir>$(SolutionDir)$(Platform)\$(Configuration)\Marian\</IntDir>
<IncludePath>%CUDA_PATH%\include;..\src;..\src\3rd_party;%BOOST_INCLUDE_PATH%;%ZLIB_PATH%\include;%MKL_PATH%\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
<IncludePath>..\src\3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include;..\src\3rd_party\fbgemm\third_party\cpuinfo\src;..\src\3rd_party\fbgemm\third_party\cpuinfo\include;..\src\3rd_party\fbgemm\third_party\asmjit\src;%MKL_PATH%\include;..\src\3rd_party\fbgemm\include;%CUDA_PATH%\include;..\src;..\src\3rd_party;%BOOST_INCLUDE_PATH%;%ZLIB_PATH%\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
<LibraryPath>%CUDA_PATH%\lib\x64;%BOOST_LIB_PATH%;%ZLIB_PATH%\lib;%MKL_PATH%\lib\intel64;$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);$(NETFXKitsDir)Lib\um\x64</LibraryPath>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
@ -69,7 +69,7 @@
</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>FBGEMM_EXPORTS;USE_FBGEMM=1;ASMJIT_VARAPI;CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>FBGEMM_EXPORTS; USE_FBGEMM=1; ASMJIT_VARAPI; USE_SSE2=1; CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>false</SDLCheck>
<TreatWarningAsError>true</TreatWarningAsError>
<AdditionalOptions>/bigobj %(AdditionalOptions) /arch:AVX2</AdditionalOptions>
@ -105,7 +105,7 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>FBGEMM_EXPORTS;USE_FBGEMM=1;CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>FBGEMM_EXPORTS; USE_FBGEMM=1; USE_SSE2=1; CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<SDLCheck>false</SDLCheck>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions) /arch:AVX2</AdditionalOptions>
@ -426,6 +426,7 @@
<TreatWarningAsError Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</TreatWarningAsError>
<TreatWarningAsError Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</TreatWarningAsError>
</ClCompile>
<ClCompile Include="..\src\3rd_party\half_float\HalfPrecisionFloatTest.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\entry_iterator.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\errors.cpp" />
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\path.cpp" />
@ -635,6 +636,8 @@
</ClCompile>
<ClCompile Include="..\src\3rd_party\yaml-cpp\binary_renamed.cpp" />
<ClCompile Include="..\src\3rd_party\yaml-cpp\yaml-node.cpp" />
<ClInclude Include="..\src\3rd_party\any_type.h" />
<ClInclude Include="..\src\3rd_party\avx_mathfun.h" />
<ClInclude Include="..\src\3rd_party\ExceptionWithCallStack.h" />
<ClInclude Include="..\src\3rd_party\fbgemm\include\fbgemm\ConvUtils.h" />
<ClInclude Include="..\src\3rd_party\fbgemm\include\fbgemm\Fbgemm.h" />
@ -709,6 +712,8 @@
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\src\x86\api.h" />
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\src\x86\cpuid.h" />
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\src\x86\windows\api.h" />
<ClInclude Include="..\src\3rd_party\half_float\stdint.h" />
<ClInclude Include="..\src\3rd_party\half_float\umHalf.h" />
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\collectives.h">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
@ -906,6 +911,7 @@
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</ClInclude>
<ClInclude Include="..\src\3rd_party\sse_mathfun.h" />
<ClInclude Include="..\src\command\marian_decoder.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
@ -1100,6 +1106,8 @@
<ClInclude Include="..\src\examples\mnist\model_lenet.h" />
<ClInclude Include="..\src\examples\mnist\training.h" />
<ClInclude Include="..\src\examples\mnist\validator.h" />
<ClInclude Include="..\src\functional\approx.h" />
<ClInclude Include="..\src\functional\operators.h" />
<ClInclude Include="..\src\layers\loss.h" />
<ClInclude Include="..\src\layers\weight.h" />
<ClInclude Include="..\src\marian.h" />
@ -1369,6 +1377,8 @@
<ClInclude Include="..\src\training\communicator_nccl.h" />
</ItemGroup>
<ItemGroup>
<None Include="..\src\3rd_party\half_float\Readme.md" />
<None Include="..\src\3rd_party\half_float\umHalf.inl" />
<None Include="..\src\3rd_party\nccl\src\bootstrap.cu">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
</None>

View File

@ -712,6 +712,9 @@
<ClCompile Include="..\src\common\aliases.cpp">
<Filter>common</Filter>
</ClCompile>
<ClCompile Include="..\src\3rd_party\half_float\HalfPrecisionFloatTest.cpp">
<Filter>3rd_party\half_float</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="..\src\marian.h" />
@ -2002,6 +2005,27 @@
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include\clog.h">
<Filter>3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\half_float\stdint.h">
<Filter>3rd_party\half_float</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\half_float\umHalf.h">
<Filter>3rd_party\half_float</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\any_type.h">
<Filter>3rd_party</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\avx_mathfun.h">
<Filter>3rd_party</Filter>
</ClInclude>
<ClInclude Include="..\src\3rd_party\sse_mathfun.h">
<Filter>3rd_party</Filter>
</ClInclude>
<ClInclude Include="..\src\functional\approx.h">
<Filter>functional</Filter>
</ClInclude>
<ClInclude Include="..\src\functional\operators.h">
<Filter>functional</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<Filter Include="3rd_party">
@ -2232,6 +2256,9 @@
<Filter Include="3rd_party\fbgemm\third_party\cpuinfo\deps\clog\src">
<UniqueIdentifier>{8fd74b1e-d3c1-4158-ad46-4a447222934e}</UniqueIdentifier>
</Filter>
<Filter Include="3rd_party\half_float">
<UniqueIdentifier>{defd3aec-3c56-4d70-a4bb-90ba9003d98d}</UniqueIdentifier>
</Filter>
</ItemGroup>
<ItemGroup>
<None Include="..\src\3rd_party\nccl\src\bootstrap.cu">
@ -2354,6 +2381,12 @@
<None Include="..\src\examples\README.md">
<Filter>examples</Filter>
</None>
<None Include="..\src\3rd_party\half_float\Readme.md">
<Filter>3rd_party\half_float</Filter>
</None>
<None Include="..\src\3rd_party\half_float\umHalf.inl">
<Filter>3rd_party\half_float</Filter>
</None>
</ItemGroup>
<ItemGroup>
<Text Include="..\src\3rd_party\sentencepiece\src\CMakeLists.txt">