mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
Fix compilation on CPUs that don't support AVX and some white space i… (#561)
* Fix compilation on CPUs that don't support AVX and some white space issues * Change from ifdef to ifndef
This commit is contained in:
parent
6b3c8f5a8c
commit
34e99da49b
@ -117,6 +117,8 @@ else(MSVC)
|
||||
message(STATUS "AVX support found")
|
||||
set(INTRINSICS "${INTRINSICS} -mavx")
|
||||
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx)
|
||||
else()
|
||||
add_definitions(-DNO_AVX)
|
||||
endif(AVX_FOUND)
|
||||
if(AVX2_FOUND)
|
||||
message(STATUS "AVX2 support found")
|
||||
|
@ -31,7 +31,7 @@
|
||||
#include <cuda.h> // required to see CUDA_VERSION
|
||||
#if (CUDA_VERSION > 9000)
|
||||
#define COMPILE_FP16 1
|
||||
#else
|
||||
#else
|
||||
#define COMPILE_FP16 0
|
||||
#endif
|
||||
#else
|
||||
@ -174,6 +174,7 @@ public:
|
||||
};
|
||||
|
||||
// @TODO: consider how code can be shared via templating
|
||||
#ifndef NO_AVX
|
||||
struct float32x8 {
|
||||
private:
|
||||
__m256 f_;
|
||||
@ -199,6 +200,11 @@ public:
|
||||
return out;
|
||||
}
|
||||
};
|
||||
#else
|
||||
//Dummy version to get things to compile on older CPUs
|
||||
struct float32x8 {
|
||||
};
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Internal to types.h, don't use. Use test functions below.
|
||||
@ -207,7 +213,7 @@ enum class TypeClass : size_t {
|
||||
unsigned_type = 0x200,
|
||||
float_type = 0x400,
|
||||
packed_type = 0x800, // special packed (CPU cache friendly) type class, used in FBGEMM, not meant to be used anywhere else
|
||||
|
||||
|
||||
size_mask = 0x0FF
|
||||
};
|
||||
|
||||
|
@ -319,7 +319,7 @@ struct Ops<float32x4> {
|
||||
|
||||
} // end namespace functional
|
||||
} // end namespace marian
|
||||
|
||||
#ifndef NO_AVX
|
||||
#include "3rd_party/avx_mathfun.h"
|
||||
|
||||
namespace marian {
|
||||
@ -438,7 +438,7 @@ struct Ops<float32x8> {
|
||||
|
||||
} // end namespace functional
|
||||
} // end namespace marian
|
||||
|
||||
#endif
|
||||
#endif // of "#ifndef __CUDACC__"
|
||||
|
||||
#ifdef __CUDACC__
|
||||
@ -600,4 +600,4 @@ BINARY(sPReLU, PReLU, Ops<ElementType>::prelu(x, y));
|
||||
BINARY(sPReLUBack, PReLUback, Ops<ElementType>::preluBack(x, y));
|
||||
|
||||
} // end namespace functional
|
||||
} // end namespace marian
|
||||
} // end namespace marian
|
||||
|
@ -7,8 +7,8 @@
|
||||
namespace marian {
|
||||
namespace functional {
|
||||
|
||||
// By default for single valued types like float do nothing. Usually the number of elements in a tensor
|
||||
// is correctly mirrored in the shape object. Only special multi-element types like float32x4 (4 floats),
|
||||
// By default for single valued types like float do nothing. Usually the number of elements in a tensor
|
||||
// is correctly mirrored in the shape object. Only special multi-element types like float32x4 (4 floats),
|
||||
// float32x8 (8 floats) and half2 (2 half) require special handling done by specializations below.
|
||||
// Similar for multi-element integer types to be added later.
|
||||
template <typename T>
|
||||
@ -31,7 +31,7 @@ inline marian::Shape adapt<float32x4>(const marian::Shape& shape) {
|
||||
x4Shape.set(-1, shape[-1] / 4);
|
||||
return x4Shape;
|
||||
}
|
||||
|
||||
#ifndef NO_AVX
|
||||
template <>
|
||||
inline marian::Shape adapt<float32x8>(const marian::Shape& shape) {
|
||||
ABORT_IF(shape[-1] % 8 != 0,
|
||||
@ -42,7 +42,7 @@ inline marian::Shape adapt<float32x8>(const marian::Shape& shape) {
|
||||
x8Shape.set(-1, shape[-1] / 8);
|
||||
return x8Shape;
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <typename T, const int D>
|
||||
@ -211,4 +211,4 @@ template <typename T>
|
||||
using Tensor = View<T, CONST_SHAPE_DIMS>;
|
||||
|
||||
} // namespace functional
|
||||
} // namespace marian
|
||||
} // namespace marian
|
||||
|
@ -99,8 +99,10 @@ void elementFloat(const Functor& functor, marian::Tensor out, Tensors... tensors
|
||||
|
||||
if(div8) {
|
||||
// std::cerr << "8: " << functor.to_string() << std::endl;
|
||||
#ifndef NO_AVX
|
||||
element<float32x8>(functor, out, tensors...);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
|
||||
if(div4) {
|
||||
|
@ -425,9 +425,13 @@ void Softmax(Tensor out, Tensor in) {
|
||||
matchOrAbort<float>(out->type());
|
||||
matchOrAbort<float>(in->type());
|
||||
|
||||
#ifndef NO_AVX
|
||||
if(out->shape()[-1] % 8 == 0) {
|
||||
Softmax<float32x8>(out, in);
|
||||
} else if(out->shape()[-1] % 4 == 0) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if(out->shape()[-1] % 4 == 0) {
|
||||
Softmax<float32x4>(out, in);
|
||||
} else {
|
||||
Softmax<float>(out, in);
|
||||
@ -477,9 +481,13 @@ void LogSoftmax(Tensor out, Tensor in) {
|
||||
matchOrAbort<float>(out->type());
|
||||
matchOrAbort<float>(in->type());
|
||||
|
||||
#ifndef NO_AVX
|
||||
if(out->shape()[-1] % 8 == 0) {
|
||||
LogSoftmax<float32x8>(out, in);
|
||||
} else if(out->shape()[-1] % 4 == 0) {
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
if(out->shape()[-1] % 4 == 0) {
|
||||
LogSoftmax<float32x4>(out, in);
|
||||
} else {
|
||||
LogSoftmax<float>(out, in);
|
||||
|
Loading…
Reference in New Issue
Block a user