mirror of
https://github.com/marian-nmt/marian.git
synced 2024-10-26 09:09:10 +03:00
Merged PR 11831: Change the weight matrix quantization to use 7-bit min/max quantization to avoid overflow
1. Change the weight matrix quantization to use 7-bit min/max quantization -> This resolves all the overflow issue, because weight and activations are quantized by min/max range. 2. Clip fp16 quantization to avoid overflow 3. Fix windows build errors (cmake options, vcproj file) 4. int8 pack model (encoder -> fp16)
This commit is contained in:
parent
a5a5c62d4a
commit
d2b4f3803e
@ -841,11 +841,15 @@ Ptr<Options> ConfigParser::parseOptions(int argc, char** argv, bool doValidate){
|
||||
|
||||
auto buildInfo = get<std::string>("build-info");
|
||||
if(!buildInfo.empty() && buildInfo != "false") {
|
||||
#ifndef _MSC_VER // cmake build options are not available on MSVC based build.
|
||||
if(buildInfo == "all")
|
||||
std::cerr << cmakeBuildOptionsAdvanced() << std::endl;
|
||||
else
|
||||
std::cerr << cmakeBuildOptions() << std::endl;
|
||||
exit(0);
|
||||
#else // _MSC_VER
|
||||
ABORT("build-info is not available on MSVC based build.");
|
||||
#endif // _MSC_VER
|
||||
}
|
||||
|
||||
// get paths to extra config files
|
||||
|
@ -35,10 +35,13 @@ public:
|
||||
Tensor val = p.second->val();
|
||||
|
||||
// save as packed format
|
||||
// @TODO Hardcoded to find packable weights - all the weights used for affine op (fp16), all the weights used for affine op and dot op (int8)
|
||||
// @TODO Hardcoded to find packable weights
|
||||
// int8 - quantize decoder only for better quality, all the weights used for affine op and dot op (int8)
|
||||
// fp16 - all the weights used for affine op (fp16)
|
||||
if ((gemmElementType == Type::packed8avx2 || gemmElementType == Type::packed8avx512)
|
||||
&& (pName.find("_W") == pName.length() - 3 || pName.find("_W") == pName.length() - 2)) {
|
||||
#if USE_FBGEMM
|
||||
&& (pName.find("_W") == pName.length() - 3 || pName.find("_W") == pName.length() - 2)
|
||||
&& pName.find("encoder") == std::string::npos) {
|
||||
#if USE_FBGEMM
|
||||
using namespace marian::cpu::variant;
|
||||
// packing information - size
|
||||
int nrow;
|
||||
@ -82,7 +85,10 @@ public:
|
||||
#else
|
||||
ABORT("Packed type {} only supported when compiled with -DUSE_FBGEMM=on", gemmElementType);
|
||||
#endif
|
||||
} else if (gemmElementType == Type::packed16 && pName.find("_W") == pName.length() - 3) {
|
||||
// fp16 quantization option + encoders for int8 quantized models
|
||||
} else if ((gemmElementType == Type::packed16 && pName.find("_W") == pName.length() - 3)
|
||||
|| ((gemmElementType == Type::packed8avx2 || gemmElementType == Type::packed8avx512)
|
||||
&& (pName.find("_W") == pName.length() - 3 || pName.find("_W") == pName.length() - 2))) {
|
||||
#if USE_FBGEMM
|
||||
using namespace marian::cpu::variant;
|
||||
|
||||
@ -123,7 +129,7 @@ public:
|
||||
io::Item item;
|
||||
item.name = pName;
|
||||
item.shape = val->shape();
|
||||
item.type = gemmElementType;
|
||||
item.type = Type::packed16;
|
||||
|
||||
// Use the actual memory as this will be aligned and padded.
|
||||
// When memory mapping this is required. Shape keeps track of
|
||||
|
@ -76,22 +76,31 @@ const int PACK16_PADDING = 1024;
|
||||
// This is a memory space to store auxiliary variables for FBGEMM (e.g. block row, block column, kernel_ncol_blocks and etc.)
|
||||
const int PACK16_SPECIALMEM = 256;
|
||||
|
||||
// This is the maximum value of FP16 type. There is a template type implementation, but it doesn't work on windows.
|
||||
// To keep the consistent result, just use the constant value instead of #ifdef _MSC_VER.
|
||||
// Template type implementation: float FP16_MAX = NumericLimits<float>(Type::float16).max;
|
||||
const float FP16_MAX = 65504.f;
|
||||
|
||||
// This function clips a value into a [min, max] range
|
||||
inline float clip(float value, float min, float max) {
|
||||
return std::max(min, std::min(value, max));
|
||||
}
|
||||
|
||||
// This is copied from FBGEMM code
|
||||
// A better way?
|
||||
// will be removed, when FBGEMM api is changed
|
||||
// blocked row-major format address arithmetic
|
||||
/**
|
||||
* Returns the memory address in the packed (block formatted) matrix array of a specific element
|
||||
* indexed by the original non-packed array.
|
||||
*
|
||||
* @param r_ row index in the original matrix
|
||||
* @param c_ column index in the original matrix
|
||||
* @param brow_ row wide block index
|
||||
* @param bcol_ column wide block index
|
||||
* @param nbrow_ number of blocks in row
|
||||
* @param nbcol_ number of blocks in column
|
||||
* @param last_brow_ row number of the last block
|
||||
*/
|
||||
//
|
||||
// Returns the memory address in the packed (block formatted) matrix array of a specific element
|
||||
// indexed by the original non-packed array.
|
||||
//
|
||||
// @param r_ row index in the original matrix
|
||||
// @param c_ column index in the original matrix
|
||||
// @param brow_ row wide block index
|
||||
// @param bcol_ column wide block index
|
||||
// @param nbrow_ number of blocks in row
|
||||
// @param nbcol_ number of blocks in column
|
||||
// @param last_brow_ row number of the last block
|
||||
inline uint64_t addr(const int r_,
|
||||
const int c_,
|
||||
const int brow_,
|
||||
@ -114,6 +123,15 @@ inline uint64_t addr(const int r_,
|
||||
return index;
|
||||
}
|
||||
|
||||
// Returns a value in 2D array with the row, column index (i, j) and transposed flag.
|
||||
// The number of rows and columns needs to be passed.
|
||||
// The transposed flag indicates if the underlying data needs to be accessed in a tranposed layout or not.
|
||||
inline float getVal2dArr(const float* data, size_t i, size_t j, size_t rows, size_t cols, bool transposed) {
|
||||
ABORT_IF(i >= rows, "Row index {} exceeds the number of rows {}.", i, rows);
|
||||
ABORT_IF(j >= cols, "Column index {} exceeds the number of columns {}.", j, cols);
|
||||
return transposed ? data[j * rows + i] : data[i * cols + j];
|
||||
}
|
||||
|
||||
// Memory blocking factors (parameters) for packing into AVX2 int8
|
||||
static const fbgemm::BlockingFactors Packed8Avx2BlockingFactors = {
|
||||
PackingTraits<int8_t, int32_t, inst_set_t::avx2>::MR,
|
||||
@ -147,6 +165,12 @@ inline const fbgemm::BlockingFactors* getBlockingFactors(marian::Type packType)
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the byte size of packed matrix in fp16. It's calculated by fbgemm's internal logic due to the paddings and different layouts.
|
||||
// Packing with fp16 only targets AVX2 instruction sets for now.
|
||||
// See '3rd_party/fbgemm/include/fbgemm/FbgemmFP16.h'.
|
||||
// shape: shape of the tensor to be packed
|
||||
// transpose: the matrix is transposed
|
||||
// packsize (out): the size of the packed matrix in byte
|
||||
void fbgemmPacked16PackInfo(const marian::Shape& shape,
|
||||
const bool transpose,
|
||||
uint64_t& packsize) {
|
||||
@ -154,6 +178,21 @@ void fbgemmPacked16PackInfo(const marian::Shape& shape,
|
||||
fbgemmPacked16PackInfo(shape, transpose, nrow, ncol, kernel_ncol_blocks, brow, bcol, last_brow, nbrow, nbcol, packsize);
|
||||
}
|
||||
|
||||
// Returns the byte size of packed matrix in fp16. It's calculated by fbgemm's internal logic due to the paddings and different layouts.
|
||||
// This function returns some other extra variables
|
||||
// Packing with fp16 only targets AVX2 instruction sets for now.
|
||||
// See '3rd_party/fbgemm/include/fbgemm/FbgemmFP16.h'.
|
||||
// shape: shape of the tensor to be packed
|
||||
// transpose: the matrix is transposed
|
||||
// nrow (out): the number of rows
|
||||
// ncol (out): the number of columns
|
||||
// kernel_ncol_blocks (out): the number of column blocks
|
||||
// brow (out): the number of rows in a block
|
||||
// bcol (out): the number of columns in a block
|
||||
// last_brow (out): the number of rows in the last block
|
||||
// nbrow (out): row index in a block
|
||||
// nbcol (out): column index in a block
|
||||
// packsize (out): the size of the packed matrix in byte
|
||||
void fbgemmPacked16PackInfo(const marian::Shape& shape,
|
||||
const bool transpose,
|
||||
int& nrow,
|
||||
@ -178,6 +217,14 @@ void fbgemmPacked16PackInfo(const marian::Shape& shape,
|
||||
+ PACK16_SPECIALMEM;
|
||||
}
|
||||
|
||||
// Returns the byte size of packed matrix in int8. It's calculated by fbgemm's internal logic due to the paddings and different layouts.
|
||||
// See '3rd_party/fbgemm/src/PackBMatrix.cc'.
|
||||
// shape: shape of the tensor to be packed
|
||||
// packType: Type to be packed - packed8avx2 or packed8avx512
|
||||
// transpose: the matrix is transposed
|
||||
// nrow (out): the number of rows
|
||||
// ncol (out): the number of columns
|
||||
// packsize (out): the size of the packed matrix in byte
|
||||
void fbgemmPacked8PackInfo(const marian::Shape& shape,
|
||||
const marian::Type packType,
|
||||
const bool transpose,
|
||||
@ -221,6 +268,20 @@ inline void col_offsets_with_zero_pt_s8acc32(
|
||||
}
|
||||
}
|
||||
|
||||
// Pack a matrix (fp16) into cache utilization efficient way (block format) into fp16
|
||||
// out: output tensor - packed format
|
||||
// inData: input tensor data - pointer of float data
|
||||
// transpose: the matrix is transposed
|
||||
// nrow: the number of rows
|
||||
// ncol: the number of columns
|
||||
// kernel_ncol_blocks: the number of column blocks
|
||||
// brow: the number of rows in a block
|
||||
// bcol: the number of columns in a block
|
||||
// last_brow: the number of rows in the last block
|
||||
// nbrow: row index in a block
|
||||
// nbcol: column index in a block
|
||||
// packsize: the size of the packed matrix
|
||||
// (the number of fp16 elements + padding (1024) + extra temporary memory (256))
|
||||
void fbgemmPacked16Pack(marian::Tensor out,
|
||||
const float* inData, // Packing is only available for 2D weight matrix in Marian. Otherwise, it's aborted in expanded_gemm.h.
|
||||
const bool transpose,
|
||||
@ -258,20 +319,37 @@ void fbgemmPacked16Pack(marian::Tensor out,
|
||||
// pack the matrix
|
||||
for(int i = 0; i < nrow; i++) {
|
||||
for(int j = 0; j < ncol; j++) {
|
||||
outmem[addr(i, j, brow, bcol, nbrow, nbcol, last_brow)]
|
||||
= tconv(!transpose ? inData[i * ncol + j] : inData[i + nrow * j], *dummy);
|
||||
float src = clip(transpose ? inData[i + nrow * j] : inData[i * ncol + j], -FP16_MAX, FP16_MAX);
|
||||
outmem[addr(i, j, brow, bcol, nbrow, nbcol, last_brow)] = tconv(src, *dummy);
|
||||
}
|
||||
}
|
||||
delete dummy;
|
||||
}
|
||||
|
||||
// Pack a matrix (int8) into cache utilization efficient way (block format) together with quantization into int8
|
||||
// out: output tensor - packed format and quantized into int8
|
||||
// inData: input tensor data - pointer of float data
|
||||
// packType: Type to be packed - packed8avx2 or packed8avx512
|
||||
// transpose: the matrix is transposed
|
||||
// nrow: the number of rows
|
||||
// ncol: the number of columns
|
||||
// packsize: the size of the packed matrix
|
||||
// (the size of int8 packed B from fbgemm:PackAWithQuantRowOffset + quantization scale, offset and zero point)
|
||||
// quantRangeStdDevs: the range to be quantized for the original float data in multiples standard deviation
|
||||
// the default value is 0.0f which means min/max quantization
|
||||
// only a half range of normal int8 which is [-64, 63] used to avoid overflow
|
||||
// during the accumulation in VPMADDUBSW instruction
|
||||
// https://intel.github.io/mkl-dnn/dev_guide_int8_computations.html
|
||||
// (e.g. 3.f means the original tensor is quantized
|
||||
// from [mean - 3.f * standard deviation, mean + 3.f * standard deviation] to [-64, 63])
|
||||
void fbgemmPacked8Pack(marian::Tensor out,
|
||||
const float* inData,
|
||||
const marian::Type packType,
|
||||
const bool transpose,
|
||||
const int nrow,
|
||||
const int ncol,
|
||||
const uint64_t packsize) {
|
||||
const uint64_t packsize,
|
||||
const float quantRangeStdDevs) {
|
||||
int k = nrow;
|
||||
int n = ncol;
|
||||
int len = k * n;
|
||||
@ -282,46 +360,43 @@ void fbgemmPacked8Pack(marian::Tensor out,
|
||||
|
||||
const float* data = inData;
|
||||
float val = 0;
|
||||
|
||||
// Use half of the quantization range to prevent overflow of VPMADDUBSW
|
||||
constexpr static int quantizedRange = 127;
|
||||
constexpr static int quantizedMax = 63;
|
||||
|
||||
if (transpose) {
|
||||
for (int jj = 0; jj < n; jj++) {
|
||||
float min = std::numeric_limits<float>::max(), max = std::numeric_limits<float>::min();
|
||||
double mean = 0, sqrsum = 0;
|
||||
for (int ii = 0; ii < k; ii++) {
|
||||
val = data[jj * k + ii];
|
||||
mean += val;
|
||||
sqrsum += val * val;
|
||||
}
|
||||
mean /= k;
|
||||
sqrsum /= k;
|
||||
sqrsum -= mean * mean;
|
||||
sqrsum = sqrt(sqrsum);
|
||||
|
||||
min = (float)(mean - 7.0f*sqrsum);
|
||||
max = (float)(mean + 7.0f*sqrsum);
|
||||
bqScale[jj] = (max - min) / 255;
|
||||
bqZeropoint[jj] = (int32_t)(127 - max / bqScale[jj]);
|
||||
}
|
||||
} else {
|
||||
for (int jj = 0; jj < n; jj++) {
|
||||
float min = std::numeric_limits<float>::max(), max = std::numeric_limits<float>::min();
|
||||
double mean = 0, sqrsum = 0;
|
||||
for (int ii = 0; ii < k; ii++) {
|
||||
val = data[jj + ii * n];
|
||||
mean += val;
|
||||
sqrsum += val * val;
|
||||
}
|
||||
mean /= k;
|
||||
sqrsum /= k;
|
||||
sqrsum -= mean * mean;
|
||||
sqrsum = sqrt(sqrsum);
|
||||
|
||||
min = (float)(mean - 7.0f*sqrsum);
|
||||
max = (float)(mean + 7.0f*sqrsum);
|
||||
bqScale[jj] = (max - min) / 255;
|
||||
bqZeropoint[jj] = (int32_t)(127 - max / bqScale[jj]);
|
||||
}
|
||||
}
|
||||
// This routine compute the quantization range for each column - either one of min/max range or quantRangeStdDevs sigma range.
|
||||
for (size_t jj = 0; jj < n; jj++) { // for each column, collect stats (min/max or mean/std.dev.)
|
||||
float min = std::numeric_limits<float>::max(), max = std::numeric_limits<float>::min();
|
||||
double mean = 0, sqrsum = 0;
|
||||
for (size_t ii = 0; ii < k; ii++) { // in a column, go throuhg all the rows and collect stats
|
||||
val = getVal2dArr(data, ii, jj, k, n, transpose);
|
||||
// If quantRangeStdDevs is 0.f, min/max values of the columns is used as a quantization range
|
||||
if(quantRangeStdDevs == 0.f) {
|
||||
if(min > val)
|
||||
min = val;
|
||||
if(max < val)
|
||||
max = val;
|
||||
} else {
|
||||
// Quantize by std.dev. range
|
||||
mean += val;
|
||||
sqrsum += val * val;
|
||||
}
|
||||
}
|
||||
// If a quantization range (in multiples of std. dev.) is given with a non-zero value,
|
||||
// it calculate the range for this column (different quantization scale/offset are used for each column)
|
||||
if(quantRangeStdDevs != 0.f) {
|
||||
mean /= k;
|
||||
sqrsum /= k;
|
||||
sqrsum -= mean * mean;
|
||||
sqrsum = sqrt(sqrsum);
|
||||
min = (float)(mean - quantRangeStdDevs * sqrsum);
|
||||
max = (float)(mean + quantRangeStdDevs * sqrsum);
|
||||
}
|
||||
// based on the quantization range, this computes the scale and offset for the quantization
|
||||
bqScale[jj] = (max - min) / quantizedRange;
|
||||
bqZeropoint[jj] = (int32_t)(quantizedMax - max / bqScale[jj]);
|
||||
}
|
||||
|
||||
// 2. quantize
|
||||
int8_t* quantized = 0;
|
||||
@ -335,7 +410,7 @@ void fbgemmPacked8Pack(marian::Tensor out,
|
||||
TensorQuantizationParams bQuantParam;
|
||||
bQuantParam.scale = bqScale[jj];
|
||||
bQuantParam.zero_point = bqZeropoint[jj];
|
||||
bQuantParam.precision = 8;
|
||||
bQuantParam.precision = 7; // Use half of the quantization range to prevent overflow of VPMADDUBSW
|
||||
|
||||
if (transpose)
|
||||
fbgemm::Quantize<int8_t>(data + jj * k, quantized + jj * k, k, bQuantParam);
|
||||
|
@ -94,13 +94,21 @@ void fbgemmPacked16Pack(marian::Tensor out,
|
||||
// ncol: the number of columns
|
||||
// packsize: the size of the packed matrix
|
||||
// (the size of int8 packed B from fbgemm:PackAWithQuantRowOffset + quantization scale, offset and zero point)
|
||||
// quantRangeStdDevs: the range to be quantized for the original float data in multiples standard deviation
|
||||
// the default value is 0.0f which means min/max quantization
|
||||
// only a half range of normal int8 which is [-64, 63] used to avoid overflow
|
||||
// during the accumulation in VPMADDUBSW instruction
|
||||
// https://intel.github.io/mkl-dnn/dev_guide_int8_computations.html
|
||||
// (e.g. 3.f means the original tensor is quantized
|
||||
// from [mean - 3.f * standard deviation, mean + 3.f * standard deviation] to [-64, 63])
|
||||
void fbgemmPacked8Pack(marian::Tensor out,
|
||||
const float* inData,
|
||||
const marian::Type packType,
|
||||
const bool transpose,
|
||||
const int nrow,
|
||||
const int ncol,
|
||||
const uint64_t packsize); // @TODO: change to size_t where appropriate
|
||||
const uint64_t packsize,
|
||||
const float quantRangeStdDevs = 0.f); // @TODO: change to size_t where appropriate
|
||||
|
||||
// GEMM operation on the packed B matrix
|
||||
// C: output matrix
|
||||
|
@ -56,7 +56,7 @@ public:
|
||||
for(size_t i = 0; i < N; ++i) {
|
||||
int idx = idxs[i];
|
||||
// since idxs is re-used for each batch, add batch offset to each idx to get absolute position
|
||||
h_res_idx[pos] = idx + batchIdx * batchOffset;
|
||||
h_res_idx[pos] = (int) (idx + batchIdx * batchOffset);
|
||||
h_res[pos] = scoresData[idx];
|
||||
++pos;
|
||||
}
|
||||
|
@ -1445,7 +1445,7 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\communicator.cpp" />
|
||||
<ClCompile Include="..\src\training\graph_group_multinode_sync.cpp" />
|
||||
<ClCompile Include="..\src\training\graph_group.cpp" />
|
||||
<ClCompile Include="..\src\training\scheduler.cpp" />
|
||||
<ClCompile Include="..\src\translator\history.cpp" />
|
||||
<ClCompile Include="..\src\translator\output_collector.cpp" />
|
||||
@ -1454,10 +1454,8 @@
|
||||
<ClCompile Include="..\src\translator\output_printer.cpp" />
|
||||
<ClCompile Include="..\src\translator\scorers.cpp" />
|
||||
<ClCompile Include="..\src\training\graph_group_async.cpp" />
|
||||
<ClCompile Include="..\src\training\graph_group_async_drop.cpp" />
|
||||
<ClCompile Include="..\src\training\graph_group_sync.cpp" />
|
||||
<ClCompile Include="..\src\training\graph_group_singleton.cpp" />
|
||||
<ClCompile Include="..\src\training\graph_group_multinode.cpp" />
|
||||
<ClCompile Include="..\src\training\validator.cpp" />
|
||||
<ClCompile Include="..\src\3rd_party\yaml-cpp\convert.cpp" />
|
||||
<ClCompile Include="..\src\3rd_party\yaml-cpp\directives.cpp" />
|
||||
@ -1653,7 +1651,6 @@
|
||||
<ClInclude Include="..\src\common\config_parser.h" />
|
||||
<ClInclude Include="..\src\common\definitions.h" />
|
||||
<ClInclude Include="..\src\common\file_stream.h" />
|
||||
<ClInclude Include="..\src\common\keywords.h" />
|
||||
<ClInclude Include="..\src\common\logging.h" />
|
||||
<ClInclude Include="..\src\common\options.h" />
|
||||
<ClInclude Include="..\src\common\regex.h" />
|
||||
@ -1755,18 +1752,12 @@
|
||||
<ClInclude Include="..\src\training\communicator.h" />
|
||||
<ClInclude Include="..\src\training\graph_group.h" />
|
||||
<ClInclude Include="..\src\training\graph_group_async.h" />
|
||||
<ClInclude Include="..\src\training\graph_group_async_drop.h" />
|
||||
<ClInclude Include="..\src\training\graph_group_multinode.h" />
|
||||
<ClInclude Include="..\src\training\graph_group_multinode_sync.h" />
|
||||
<ClInclude Include="..\src\training\graph_group_singleton.h" />
|
||||
<ClInclude Include="..\src\training\graph_group_sync.h" />
|
||||
<ClInclude Include="..\src\training\scheduler.h" />
|
||||
<ClInclude Include="..\src\training\training.h" />
|
||||
<ClInclude Include="..\src\training\training_state.h" />
|
||||
<ClInclude Include="..\src\training\validator.h" />
|
||||
<ClInclude Include="..\src\training\gradient_dropping\dropper.h" />
|
||||
<ClInclude Include="..\src\training\gradient_dropping\sparse_tensor.h" />
|
||||
<ClInclude Include="..\src\training\gradient_dropping\gpu\sparse_algorithm.h" />
|
||||
<ClInclude Include="..\src\translator\beam_search.h" />
|
||||
<ClInclude Include="..\src\translator\helpers.h" />
|
||||
<ClInclude Include="..\src\translator\history.h" />
|
||||
@ -1906,8 +1897,6 @@
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="..\src\training\gradient_dropping\gpu\dropper.cu" />
|
||||
<CudaCompile Include="..\src\training\gradient_dropping\gpu\sparse_algorithm.cu" />
|
||||
<CudaCompile Include="..\src\translator\helpers.cu" />
|
||||
<CudaCompile Include="..\src\translator\nth_element.cu" />
|
||||
</ItemGroup>
|
||||
|
@ -94,18 +94,12 @@
|
||||
<ClCompile Include="..\src\training\graph_group_async.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\graph_group_async_drop.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\graph_group_sync.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\graph_group_singleton.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\graph_group_multinode.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\validator.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
@ -226,9 +220,6 @@
|
||||
<ClCompile Include="..\src\3rd_party\yaml-cpp\binary_renamed.cpp">
|
||||
<Filter>3rd_party\yaml-cpp</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\graph_group_multinode_sync.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\command\marian_main.cpp">
|
||||
<Filter>command</Filter>
|
||||
</ClCompile>
|
||||
@ -883,6 +874,9 @@
|
||||
<ClCompile Include="..\src\tensors\cpu\fbgemm\packed_gemm.cpp">
|
||||
<Filter>tensors\cpu\fbgemm</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\src\training\graph_group.cpp">
|
||||
<Filter>training</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\src\marian.h" />
|
||||
@ -1288,9 +1282,6 @@
|
||||
<ClInclude Include="..\src\common\file_stream.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\common\keywords.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\common\logging.h">
|
||||
<Filter>common</Filter>
|
||||
</ClInclude>
|
||||
@ -1531,12 +1522,6 @@
|
||||
<ClInclude Include="..\src\training\graph_group_async.h">
|
||||
<Filter>training</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\training\graph_group_async_drop.h">
|
||||
<Filter>training</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\training\graph_group_multinode.h">
|
||||
<Filter>training</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\training\graph_group_singleton.h">
|
||||
<Filter>training</Filter>
|
||||
</ClInclude>
|
||||
@ -1555,15 +1540,6 @@
|
||||
<ClInclude Include="..\src\training\validator.h">
|
||||
<Filter>training</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\training\gradient_dropping\dropper.h">
|
||||
<Filter>training\gradient_dropping</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\training\gradient_dropping\sparse_tensor.h">
|
||||
<Filter>training\gradient_dropping</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\training\gradient_dropping\gpu\sparse_algorithm.h">
|
||||
<Filter>training\gradient_dropping\gpu</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\translator\beam_search.h">
|
||||
<Filter>translator</Filter>
|
||||
</ClInclude>
|
||||
@ -1642,9 +1618,6 @@
|
||||
<ClInclude Include="..\src\translator\output_printer.h">
|
||||
<Filter>translator</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\training\graph_group_multinode_sync.h">
|
||||
<Filter>training</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\src\command\marian_vocab.cpp">
|
||||
<Filter>command</Filter>
|
||||
</ClInclude>
|
||||
@ -2373,12 +2346,6 @@
|
||||
<Filter Include="training">
|
||||
<UniqueIdentifier>{880c8f51-3306-4d80-a682-7242341b0098}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="training\gradient_dropping">
|
||||
<UniqueIdentifier>{880c8f51-3306-4d80-a682-7242341b0101}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="training\gradient_dropping\gpu">
|
||||
<UniqueIdentifier>{880c8f51-3306-4d80-a682-7242341b0104}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="translator">
|
||||
<UniqueIdentifier>{880c8f51-3306-4d80-a682-7242341b0107}</UniqueIdentifier>
|
||||
</Filter>
|
||||
@ -2703,11 +2670,5 @@
|
||||
<CudaCompile Include="..\src\tensors\gpu\sparse.cu">
|
||||
<Filter>tensors\gpu</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="..\src\training\gradient_dropping\gpu\dropper.cu">
|
||||
<Filter>training\gradient_dropping\gpu</Filter>
|
||||
</CudaCompile>
|
||||
<CudaCompile Include="..\src\training\gradient_dropping\gpu\sparse_algorithm.cu">
|
||||
<Filter>training\gradient_dropping\gpu</Filter>
|
||||
</CudaCompile>
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Reference in New Issue
Block a user