diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e2a40d5..6a1dabf9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Make guided-alignment faster via sparse memory layout, add alignment points for EOS, remove losses other than ce. - Changed minimal C++ standard to C++-17 - Faster LSH top-k search on CPU +- Updated intgemm to the latest upstream version ## [1.11.0] - 2022-02-08 diff --git a/VERSION b/VERSION index 3d461ead..f5f1545d 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -v1.11.3 +v1.11.4 diff --git a/src/3rd_party/intgemm b/src/3rd_party/intgemm index 8abde25b..a05a2e51 160000 --- a/src/3rd_party/intgemm +++ b/src/3rd_party/intgemm @@ -1 +1 @@ -Subproject commit 8abde25b13c3ab210c0dec8e23f4944e3953812d +Subproject commit a05a2e51ab524bcee954a39ee72005193f3adf7c diff --git a/src/tensors/cpu/expression_graph_packable.h b/src/tensors/cpu/expression_graph_packable.h index f5a9cad9..1a233372 100644 --- a/src/tensors/cpu/expression_graph_packable.h +++ b/src/tensors/cpu/expression_graph_packable.h @@ -172,19 +172,19 @@ public: // Hardware-specific conversions which allow to implement memory-mapping and avoid conversion at runtime cpu::integer::passOrAbort(gemmElementType); // Check if the hardware supports the GEMM type if(isSsse3(gemmElementType)) { - intgemm::ssse3::Kernels8::PrepareBTransposed(tmp->data(), /*input*/ + intgemm::SSSE3::Kernels8::PrepareBTransposed(tmp->data(), /*input*/ paramMat->data(), /*output*/ quantMult, /*Quant Mult*/ rows(val), cols(val)); } else if(isAvx2(gemmElementType)) { - intgemm::avx2::Kernels8::PrepareBTransposed(tmp->data(), /*input*/ + intgemm::AVX2::Kernels8::PrepareBTransposed(tmp->data(), /*input*/ paramMat->data(), /*output*/ quantMult, /*Quant Mult*/ rows(val), cols(val)); } else if(isAvx512(gemmElementType)) { - intgemm::avx512bw::Kernels8::PrepareBTransposed(tmp->data(), /*input*/ + intgemm::AVX512BW::Kernels8::PrepareBTransposed(tmp->data(), /*input*/ paramMat->data(), /*output*/ quantMult, /*Quant Mult*/ rows(val), @@ -206,19 +206,19 @@ public: // Hardware-specific conversions which allow to implement memory-mapping and avoid conversion at runtime cpu::integer::passOrAbort(gemmElementType); // Check if the hardware supports the GEMM type if(isSse2(gemmElementType)) { - intgemm::sse2::Kernels16::PrepareBTransposed(tmp->data(), /*input*/ + intgemm::SSE2::Kernels16::PrepareBTransposed(tmp->data(), /*input*/ paramMat->data(), /*output*/ quantMult, /*Quant Mult*/ rows(val), cols(val)); } else if(isAvx2(gemmElementType)) { - intgemm::avx2::Kernels16::PrepareBTransposed(tmp->data(), /*input*/ + intgemm::AVX2::Kernels16::PrepareBTransposed(tmp->data(), /*input*/ paramMat->data(), /*output*/ quantMult, /*Quant Mult*/ rows(val), cols(val)); } else if(isAvx512(gemmElementType)) { - intgemm::avx512bw::Kernels16::PrepareBTransposed(tmp->data(), /*input*/ + intgemm::AVX512BW::Kernels16::PrepareBTransposed(tmp->data(), /*input*/ paramMat->data(), /*output*/ quantMult, /*Quant Mult*/ rows(val), diff --git a/src/tensors/cpu/integer_common.h b/src/tensors/cpu/integer_common.h index cb372a74..f4e632b5 100644 --- a/src/tensors/cpu/integer_common.h +++ b/src/tensors/cpu/integer_common.h @@ -11,21 +11,21 @@ namespace intgemm { struct Int8; struct Int16; - namespace ssse3 { + namespace SSSE3 { struct Kernels8; } - namespace sse2 { + namespace SSE2 { struct Kernels16; } - namespace avx2 { + namespace AVX2 { struct Kernels8; struct Kernels16; } - namespace avx512bw { + namespace AVX512BW { struct Kernels8; struct Kernels16; } - namespace avx512vnni { + namespace AVX512VNNI { struct Kernels8; } } @@ -57,22 +57,22 @@ template <> struct intgemm_ { }; template <> struct intgemm_ { - using width = intgemm::ssse3::Kernels8; + using width = intgemm::SSSE3::Kernels8; using type = int8_t; }; template <> struct intgemm_ { - using width = intgemm::avx2::Kernels8; + using width = intgemm::AVX2::Kernels8; using type = int8_t; }; template <> struct intgemm_ { - using width = intgemm::avx512bw::Kernels8; + using width = intgemm::AVX512BW::Kernels8; using type = int8_t; }; template <> struct intgemm_ { - using width = intgemm::avx512vnni::Kernels8; + using width = intgemm::AVX512VNNI::Kernels8; using type = int8_t; }; @@ -82,17 +82,17 @@ template <> struct intgemm_ { }; template <> struct intgemm_ { - using width = intgemm::sse2::Kernels16; + using width = intgemm::SSE2::Kernels16; using type = int16_t; }; template <> struct intgemm_ { - using width = intgemm::avx2::Kernels16; + using width = intgemm::AVX2::Kernels16; using type = int16_t; }; template <> struct intgemm_ { - using width = intgemm::avx512bw::Kernels16; + using width = intgemm::AVX512BW::Kernels16; using type = int16_t; }; @@ -220,4 +220,4 @@ void prepareAndTransposeB(io::Item& item, const char * input) { } //integer } //cpu -} //marian \ No newline at end of file +} //marian