Fix compilation on CPUs that don't support AVX and some white space i… (#561)

* Fix compilation on CPUs that don't support AVX and some white space issues
* Change from ifdef to ifndef
This commit is contained in:
Nikolay Bogoychev 2019-12-05 16:12:17 +00:00 committed by Marcin Junczys-Dowmunt
parent 6b3c8f5a8c
commit 34e99da49b
6 changed files with 30 additions and 12 deletions

View File

@ -117,6 +117,8 @@ else(MSVC)
message(STATUS "AVX support found")
set(INTRINSICS "${INTRINSICS} -mavx")
list(APPEND INTRINSICS_NVCC -Xcompiler\ -mavx)
else()
add_definitions(-DNO_AVX)
endif(AVX_FOUND)
if(AVX2_FOUND)
message(STATUS "AVX2 support found")

View File

@ -31,7 +31,7 @@
#include <cuda.h> // required to see CUDA_VERSION
#if (CUDA_VERSION > 9000)
#define COMPILE_FP16 1
#else
#else
#define COMPILE_FP16 0
#endif
#else
@ -174,6 +174,7 @@ public:
};
// @TODO: consider how code can be shared via templating
#ifndef NO_AVX
struct float32x8 {
private:
__m256 f_;
@ -199,6 +200,11 @@ public:
return out;
}
};
#else
//Dummy version to get things to compile on older CPUs
struct float32x8 {
};
#endif
#endif
// Internal to types.h, don't use. Use test functions below.
@ -207,7 +213,7 @@ enum class TypeClass : size_t {
unsigned_type = 0x200,
float_type = 0x400,
packed_type = 0x800, // special packed (CPU cache friendly) type class, used in FBGEMM, not meant to be used anywhere else
size_mask = 0x0FF
};

View File

@ -319,7 +319,7 @@ struct Ops<float32x4> {
} // end namespace functional
} // end namespace marian
#ifndef NO_AVX
#include "3rd_party/avx_mathfun.h"
namespace marian {
@ -438,7 +438,7 @@ struct Ops<float32x8> {
} // end namespace functional
} // end namespace marian
#endif
#endif // of "#ifndef __CUDACC__"
#ifdef __CUDACC__
@ -600,4 +600,4 @@ BINARY(sPReLU, PReLU, Ops<ElementType>::prelu(x, y));
BINARY(sPReLUBack, PReLUback, Ops<ElementType>::preluBack(x, y));
} // end namespace functional
} // end namespace marian
} // end namespace marian

View File

@ -7,8 +7,8 @@
namespace marian {
namespace functional {
// By default for single valued types like float do nothing. Usually the number of elements in a tensor
// is correctly mirrored in the shape object. Only special multi-element types like float32x4 (4 floats),
// By default for single valued types like float do nothing. Usually the number of elements in a tensor
// is correctly mirrored in the shape object. Only special multi-element types like float32x4 (4 floats),
// float32x8 (8 floats) and half2 (2 half) require special handling done by specializations below.
// Similar for multi-element integer types to be added later.
template <typename T>
@ -31,7 +31,7 @@ inline marian::Shape adapt<float32x4>(const marian::Shape& shape) {
x4Shape.set(-1, shape[-1] / 4);
return x4Shape;
}
#ifndef NO_AVX
template <>
inline marian::Shape adapt<float32x8>(const marian::Shape& shape) {
ABORT_IF(shape[-1] % 8 != 0,
@ -42,7 +42,7 @@ inline marian::Shape adapt<float32x8>(const marian::Shape& shape) {
x8Shape.set(-1, shape[-1] / 8);
return x8Shape;
}
#endif
#endif
template <typename T, const int D>
@ -211,4 +211,4 @@ template <typename T>
using Tensor = View<T, CONST_SHAPE_DIMS>;
} // namespace functional
} // namespace marian
} // namespace marian

View File

@ -99,8 +99,10 @@ void elementFloat(const Functor& functor, marian::Tensor out, Tensors... tensors
if(div8) {
// std::cerr << "8: " << functor.to_string() << std::endl;
#ifndef NO_AVX
element<float32x8>(functor, out, tensors...);
return;
#endif
}
if(div4) {

View File

@ -425,9 +425,13 @@ void Softmax(Tensor out, Tensor in) {
matchOrAbort<float>(out->type());
matchOrAbort<float>(in->type());
#ifndef NO_AVX
if(out->shape()[-1] % 8 == 0) {
Softmax<float32x8>(out, in);
} else if(out->shape()[-1] % 4 == 0) {
return;
}
#endif
if(out->shape()[-1] % 4 == 0) {
Softmax<float32x4>(out, in);
} else {
Softmax<float>(out, in);
@ -477,9 +481,13 @@ void LogSoftmax(Tensor out, Tensor in) {
matchOrAbort<float>(out->type());
matchOrAbort<float>(in->type());
#ifndef NO_AVX
if(out->shape()[-1] % 8 == 0) {
LogSoftmax<float32x8>(out, in);
} else if(out->shape()[-1] % 4 == 0) {
return;
}
#endif
if(out->shape()[-1] % 4 == 0) {
LogSoftmax<float32x4>(out, in);
} else {
LogSoftmax<float>(out, in);