mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
add legacy code on gpu
This commit is contained in:
parent
1d96d7b6eb
commit
ce34df4d98
@ -480,46 +480,25 @@ void ProdBatchedTypedLegacy(marian::Tensor C,
|
||||
CUDA_CHECK(cudaSetDevice((int)C->getDeviceId().no));
|
||||
ComputeType alpha = scalar;
|
||||
|
||||
// determine meta-shape of bdot operation. Essentially treat the last two dimensions as single elements
|
||||
// such that (..., m, k) x (..., k, n) -> (..., m, n) where ... is a broadcastable shape as in element-wise kernels.
|
||||
int batchA = A->shape().elements() / (A->shape()[-1] * A->shape()[-2]);
|
||||
int batchB = B->shape().elements() / (B->shape()[-1] * B->shape()[-2]);
|
||||
|
||||
auto aShape = A->shape();
|
||||
auto bShape = B->shape();
|
||||
|
||||
// make sure both shape have the same number of dimensions via broadcasting
|
||||
size_t maxLength = std::max(aShape.size(), bShape.size());
|
||||
if(aShape.size() != bShape.size()) {
|
||||
Shape ones(std::vector<int>(maxLength, 1));
|
||||
aShape = Shape::broadcast({aShape, ones});
|
||||
bShape = Shape::broadcast({bShape, ones});
|
||||
}
|
||||
|
||||
// Create meta-shapes without last 2 dimensions
|
||||
Shape aShapeMeta, bShapeMeta, cShapeMeta;
|
||||
aShapeMeta.resize(maxLength - 2);
|
||||
bShapeMeta.resize(maxLength - 2);
|
||||
for(size_t i = 0; i < maxLength - 2; ++i) {
|
||||
aShapeMeta.set(i, aShape[i]);
|
||||
bShapeMeta.set(i, bShape[i]);
|
||||
}
|
||||
cShapeMeta = Shape::broadcast({aShapeMeta, bShapeMeta});
|
||||
|
||||
size_t m = aShape[-2];
|
||||
size_t k = aShape[-1];
|
||||
int m = A->shape()[-2];
|
||||
int k = A->shape()[-1];
|
||||
if(transA)
|
||||
std::swap(m, k);
|
||||
|
||||
size_t l = bShape[-2];
|
||||
size_t n = bShape[-1];
|
||||
int l = B->shape()[-2];
|
||||
int n = B->shape()[-1];
|
||||
if(transB)
|
||||
std::swap(l, n);
|
||||
|
||||
size_t lda = aShape[-1];
|
||||
size_t ldb = bShape[-1];
|
||||
size_t ldc = bShape[-1];
|
||||
int lda = A->shape()[-1];
|
||||
int ldb = B->shape()[-1];
|
||||
int ldc = B->shape()[-1];
|
||||
|
||||
if(transB)
|
||||
ldc = bShape[-2];
|
||||
ldc = B->shape()[-2];
|
||||
|
||||
cublasOperation_t opA = transA ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
cublasOperation_t opB = transB ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
@ -528,29 +507,18 @@ void ProdBatchedTypedLegacy(marian::Tensor C,
|
||||
auto cublasHandle = backend->getCublasHandle();
|
||||
auto compute = backend->getCudaComputeCapability();
|
||||
|
||||
auto strideA = m * k;
|
||||
auto strideB = n * k;
|
||||
auto strideA = batchA == 1 ? 0 : m * k;
|
||||
auto strideB = batchB == 1 ? 0 : n * k;
|
||||
auto strideC = n * m;
|
||||
|
||||
auto batchC = cShapeMeta.elements();
|
||||
|
||||
// Convert to functional shapes to be able to map dimensions. @TODO merge this
|
||||
functional::Shape aShapeMetaF = aShapeMeta;
|
||||
functional::Shape bShapeMetaF = bShapeMeta;
|
||||
functional::Shape cShapeMetaF = cShapeMeta;
|
||||
auto batchC = std::max(batchA, batchB);
|
||||
|
||||
std::vector<const ElementType*> aptr;
|
||||
std::vector<const ElementType*> bptr;
|
||||
std::vector<ElementType*> cptr;
|
||||
|
||||
functional::Array<int, functional::Shape::size()> dims;
|
||||
for(int i = 0; i < batchC; i++) {
|
||||
cShapeMetaF.dims(i, dims);
|
||||
auto aIndex = aShapeMetaF.bindex(dims);
|
||||
auto bIndex = bShapeMetaF.bindex(dims);
|
||||
|
||||
aptr.push_back(A->data<ElementType>() + aIndex * strideA);
|
||||
bptr.push_back(B->data<ElementType>() + bIndex * strideB);
|
||||
aptr.push_back(A->data<ElementType>() + (i % batchA) * strideA);
|
||||
bptr.push_back(B->data<ElementType>() + (i % batchB) * strideB);
|
||||
cptr.push_back(C->data<ElementType>() + i * strideC);
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user