mirror of
https://github.com/marian-nmt/marian.git
synced 2024-10-26 09:09:10 +03:00
sorted out AVX2 build problems on Windows
This commit is contained in:
parent
446cff8dcf
commit
e40491f44a
@ -1 +1 @@
|
||||
Subproject commit 71b0506a50e1f181e269fe90073a8ae470676dd1
|
||||
Subproject commit 658cf86e2ab4aa76b4ab3edabd750b93d2416f1a
|
21
src/3rd_party/avx_mathfun.h
vendored
Normal file → Executable file
21
src/3rd_party/avx_mathfun.h
vendored
Normal file → Executable file
@ -32,8 +32,13 @@
|
||||
#include <immintrin.h>
|
||||
|
||||
/* yes I know, the top of this file is quite ugly */
|
||||
#ifdef _MSC_VER
|
||||
# define ALIGN32_BEG __declspec(align(32))
|
||||
# define ALIGN32_END
|
||||
#else /* gcc or icc */
|
||||
# define ALIGN32_BEG
|
||||
# define ALIGN32_END __attribute__((aligned(32)))
|
||||
#endif
|
||||
|
||||
/* __m128 is ugly to write */
|
||||
typedef __m256 v8sf; // vector of 8 float (avx)
|
||||
@ -94,17 +99,17 @@ typedef union imm_xmm_union {
|
||||
v4si xmm[2];
|
||||
} imm_xmm_union;
|
||||
|
||||
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \
|
||||
imm_xmm_union u __attribute__((aligned(32))); \
|
||||
u.imm = imm_; \
|
||||
xmm0_ = u.xmm[0]; \
|
||||
xmm1_ = u.xmm[1]; \
|
||||
#define COPY_IMM_TO_XMM(imm_, xmm0_, xmm1_) { \
|
||||
ALIGN32_BEG imm_xmm_union u ALIGN32_END; \
|
||||
u.imm = imm_; \
|
||||
xmm0_ = u.xmm[0]; \
|
||||
xmm1_ = u.xmm[1]; \
|
||||
}
|
||||
|
||||
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \
|
||||
imm_xmm_union u __attribute__((aligned(32))); \
|
||||
#define COPY_XMM_TO_IMM(xmm0_, xmm1_, imm_) { \
|
||||
ALIGN32_BEG imm_xmm_union u ALIGN32_END; \
|
||||
u.xmm[0]=xmm0_; u.xmm[1]=xmm1_; imm_ = u.imm; \
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#define AVX2_BITOP_USING_SSE2(fn) \
|
||||
|
8
src/3rd_party/half_float/umHalf.h
vendored
Normal file → Executable file
8
src/3rd_party/half_float/umHalf.h
vendored
Normal file → Executable file
@ -44,11 +44,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include "stdint.h"
|
||||
#else
|
||||
//#ifdef _MSC_VER
|
||||
//#include "stdint.h"
|
||||
//#else
|
||||
#include <stdint.h>
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
#undef min
|
||||
#undef max
|
||||
|
2
src/3rd_party/nccl
vendored
2
src/3rd_party/nccl
vendored
@ -1 +1 @@
|
||||
Subproject commit 8e3a3f7c5b520babff49cec54a866fa3eda3a3b6
|
||||
Subproject commit d6297d250433715c283d17f1969cfcb50d2b6531
|
@ -42,7 +42,7 @@
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
<IntDir>$(SolutionDir)$(Platform)\$(Configuration)\Marian\</IntDir>
|
||||
<IncludePath>%CUDA_PATH%\include;..\src;..\src\3rd_party;%BOOST_INCLUDE_PATH%;%ZLIB_PATH%\include;%MKL_PATH%\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
|
||||
<IncludePath>..\src\3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include;..\src\3rd_party\fbgemm\third_party\cpuinfo\src;..\src\3rd_party\fbgemm\third_party\cpuinfo\include;..\src\3rd_party\fbgemm\third_party\asmjit\src;%MKL_PATH%\include;..\src\3rd_party\fbgemm\include;%CUDA_PATH%\include;..\src;..\src\3rd_party;%BOOST_INCLUDE_PATH%;%ZLIB_PATH%\include;$(VC_IncludePath);$(WindowsSDK_IncludePath);</IncludePath>
|
||||
<LibraryPath>%CUDA_PATH%\lib\x64;%BOOST_LIB_PATH%;%ZLIB_PATH%\lib;%MKL_PATH%\lib\intel64;$(VC_LibraryPath_x64);$(WindowsSDK_LibraryPath_x64);$(NETFXKitsDir)Lib\um\x64</LibraryPath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
@ -69,7 +69,7 @@
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level4</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>FBGEMM_EXPORTS;USE_FBGEMM=1;ASMJIT_VARAPI;CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>FBGEMM_EXPORTS; USE_FBGEMM=1; ASMJIT_VARAPI; USE_SSE2=1; CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;_DEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>false</SDLCheck>
|
||||
<TreatWarningAsError>true</TreatWarningAsError>
|
||||
<AdditionalOptions>/bigobj %(AdditionalOptions) /arch:AVX2</AdditionalOptions>
|
||||
@ -105,7 +105,7 @@
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>FBGEMM_EXPORTS;USE_FBGEMM=1;CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>FBGEMM_EXPORTS; USE_FBGEMM=1; USE_SSE2=1; CUDA_FOUND=1; MKL_FOUND=1; MPI_FOUND=1; BLAS_FOUND=1; MKL_ILP64; WIN32;NDEBUG;_CONSOLE;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<SDLCheck>false</SDLCheck>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<AdditionalOptions>/d2Zi+ /bigobj %(AdditionalOptions) /arch:AVX2</AdditionalOptions>
|
||||
@ -426,6 +426,7 @@
|
||||
<TreatWarningAsError Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</TreatWarningAsError>
|
||||
<TreatWarningAsError Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</TreatWarningAsError>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\3rd_party\half_float\HalfPrecisionFloatTest.cpp" />
|
||||
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\entry_iterator.cpp" />
|
||||
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\errors.cpp" />
|
||||
<ClCompile Include="..\src\3rd_party\pathie-cpp\src\path.cpp" />
|
||||
@ -635,6 +636,8 @@
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\3rd_party\yaml-cpp\binary_renamed.cpp" />
|
||||
<ClCompile Include="..\src\3rd_party\yaml-cpp\yaml-node.cpp" />
|
||||
<ClInclude Include="..\src\3rd_party\any_type.h" />
|
||||
<ClInclude Include="..\src\3rd_party\avx_mathfun.h" />
|
||||
<ClInclude Include="..\src\3rd_party\ExceptionWithCallStack.h" />
|
||||
<ClInclude Include="..\src\3rd_party\fbgemm\include\fbgemm\ConvUtils.h" />
|
||||
<ClInclude Include="..\src\3rd_party\fbgemm\include\fbgemm\Fbgemm.h" />
|
||||
@ -709,6 +712,8 @@
|
||||
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\src\x86\api.h" />
|
||||
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\src\x86\cpuid.h" />
|
||||
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\src\x86\windows\api.h" />
|
||||
<ClInclude Include="..\src\3rd_party\half_float\stdint.h" />
|
||||
<ClInclude Include="..\src\3rd_party\half_float\umHalf.h" />
|
||||
<ClInclude Include="..\src\3rd_party\nccl\src\collectives\collectives.h">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
@ -906,6 +911,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\3rd_party\sse_mathfun.h" />
|
||||
<ClInclude Include="..\src\command\marian_decoder.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
@ -1100,6 +1106,8 @@
|
||||
<ClInclude Include="..\src\examples\mnist\model_lenet.h" />
|
||||
<ClInclude Include="..\src\examples\mnist\training.h" />
|
||||
<ClInclude Include="..\src\examples\mnist\validator.h" />
|
||||
<ClInclude Include="..\src\functional\approx.h" />
|
||||
<ClInclude Include="..\src\functional\operators.h" />
|
||||
<ClInclude Include="..\src\layers\loss.h" />
|
||||
<ClInclude Include="..\src\layers\weight.h" />
|
||||
<ClInclude Include="..\src\marian.h" />
|
||||
@ -1369,6 +1377,8 @@
|
||||
<ClInclude Include="..\src\training\communicator_nccl.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\src\3rd_party\half_float\Readme.md" />
|
||||
<None Include="..\src\3rd_party\half_float\umHalf.inl" />
|
||||
<None Include="..\src\3rd_party\nccl\src\bootstrap.cu">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</None>
|
||||
|
@ -712,6 +712,9 @@
|
||||
<ClCompile Include="..\src\common\aliases.cpp">
|
||||
<Filter>common</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\3rd_party\half_float\HalfPrecisionFloatTest.cpp">
|
||||
<Filter>3rd_party\half_float</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\src\marian.h" />
|
||||
@ -2002,6 +2005,27 @@
|
||||
<ClInclude Include="..\src\3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include\clog.h">
|
||||
<Filter>3rd_party\fbgemm\third_party\cpuinfo\deps\clog\include</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\3rd_party\half_float\stdint.h">
|
||||
<Filter>3rd_party\half_float</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\3rd_party\half_float\umHalf.h">
|
||||
<Filter>3rd_party\half_float</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\3rd_party\any_type.h">
|
||||
<Filter>3rd_party</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\3rd_party\avx_mathfun.h">
|
||||
<Filter>3rd_party</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\3rd_party\sse_mathfun.h">
|
||||
<Filter>3rd_party</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\functional\approx.h">
|
||||
<Filter>functional</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\functional\operators.h">
|
||||
<Filter>functional</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Filter Include="3rd_party">
|
||||
@ -2232,6 +2256,9 @@
|
||||
<Filter Include="3rd_party\fbgemm\third_party\cpuinfo\deps\clog\src">
|
||||
<UniqueIdentifier>{8fd74b1e-d3c1-4158-ad46-4a447222934e}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="3rd_party\half_float">
|
||||
<UniqueIdentifier>{defd3aec-3c56-4d70-a4bb-90ba9003d98d}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\src\3rd_party\nccl\src\bootstrap.cu">
|
||||
@ -2354,6 +2381,12 @@
|
||||
<None Include="..\src\examples\README.md">
|
||||
<Filter>examples</Filter>
|
||||
</None>
|
||||
<None Include="..\src\3rd_party\half_float\Readme.md">
|
||||
<Filter>3rd_party\half_float</Filter>
|
||||
</None>
|
||||
<None Include="..\src\3rd_party\half_float\umHalf.inl">
|
||||
<Filter>3rd_party\half_float</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="..\src\3rd_party\sentencepiece\src\CMakeLists.txt">
|
||||
|
Loading…
Reference in New Issue
Block a user