mirror of
https://github.com/marian-nmt/marian.git
synced 2024-07-14 17:40:36 +03:00
dropout over source and target words
This commit is contained in:
parent
fcd99c49f7
commit
5062f7b230
@ -392,7 +392,6 @@ class GRU {
|
||||
Expr apply1(Expr input) {
|
||||
if(dropMaskX_)
|
||||
input = dropout(input, keywords::mask=dropMaskX_);
|
||||
debug(input, "in");
|
||||
auto xW = dot(input, W_);
|
||||
if(layerNorm_)
|
||||
xW = layer_norm(xW, gamma1_);
|
||||
@ -403,7 +402,6 @@ class GRU {
|
||||
Expr mask = nullptr) {
|
||||
if(dropMaskS_)
|
||||
state = dropout(state, keywords::mask=dropMaskS_);
|
||||
debug(state, "state");
|
||||
|
||||
auto sU = dot(state, U_);
|
||||
|
||||
|
@ -26,12 +26,19 @@ namespace marian {
|
||||
bool skipDepth = options_->get<bool>("skip");
|
||||
size_t encoderLayers = options_->get<size_t>("layers-enc");
|
||||
float dropoutRnn = options_->get<float>("dropout-rnn");
|
||||
float dropoutSrc = options_->get<float>("dropout-src");
|
||||
|
||||
auto xEmb = Embedding("Wemb", dimSrcVoc, dimSrcEmb)(graph);
|
||||
|
||||
Expr x, xMask;
|
||||
std::tie(x, xMask) = prepareSource(xEmb, batch, batchIdx);
|
||||
|
||||
if(dropoutSrc) {
|
||||
int srcWords = x->shape()[2];
|
||||
auto srcWordDrop = graph->dropout(dropoutSrc, {1, 1, srcWords});
|
||||
x = dropout(x, mask=srcWordDrop);
|
||||
}
|
||||
|
||||
auto xFw = RNN<GRU>(graph, "encoder_bi",
|
||||
dimSrcEmb, dimEncState,
|
||||
normalize=layerNorm,
|
||||
@ -45,7 +52,6 @@ namespace marian {
|
||||
dropout_prob=dropoutRnn)
|
||||
(x, mask=xMask);
|
||||
|
||||
debug(xFw, "xFw");
|
||||
if(encoderLayers > 1) {
|
||||
auto xBi = concatenate({xFw, xBw}, axis=1);
|
||||
|
||||
@ -90,9 +96,16 @@ class DecoderGNMT : public DecoderBase {
|
||||
bool skipDepth = options_->get<bool>("skip");
|
||||
size_t decoderLayers = options_->get<size_t>("layers-dec");
|
||||
float dropoutRnn = options_->get<float>("dropout-rnn");
|
||||
float dropoutTrg = options_->get<float>("dropout-trg");
|
||||
|
||||
auto graph = embeddings->graph();
|
||||
|
||||
if(dropoutTrg) {
|
||||
int trgWords = embeddings->shape()[2];
|
||||
auto trgWordDrop = graph->dropout(dropoutTrg, {1, 1, trgWords});
|
||||
embeddings = dropout(embeddings, mask=trgWordDrop);
|
||||
}
|
||||
|
||||
if(!attention_)
|
||||
attention_ = New<GlobalAttention>("decoder",
|
||||
context, dimDecState,
|
||||
|
@ -220,7 +220,7 @@ int main(int argc, char** argv) {
|
||||
target->load("../benchmark/marian32K/train.tok.true.bpe.de.json", 50000);
|
||||
|
||||
auto encdec = New<GNMT>(options);
|
||||
encdec->load(graph, "../benchmark/marian32K/modelML6.200000.npz");
|
||||
encdec->load(graph, "../benchmark/marian32K/model8.110000.npz");
|
||||
|
||||
graph->reserveWorkspaceMB(128);
|
||||
|
||||
|
@ -199,6 +199,10 @@ void Config::addOptions(int argc, char** argv, bool doValidate) {
|
||||
"Enable layer normalization")
|
||||
("dropout-rnn", po::value<float>()->default_value(0),
|
||||
"Scaling dropout along rnn layers and time (0 = no dropout)")
|
||||
("dropout-src", po::value<float>()->default_value(0),
|
||||
"Dropout source words (0 = no dropout)")
|
||||
("dropout-trg", po::value<float>()->default_value(0),
|
||||
"Dropout target words (0 = no dropout)")
|
||||
;
|
||||
|
||||
po::options_description opt("Optimizer options");
|
||||
@ -307,6 +311,8 @@ void Config::addOptions(int argc, char** argv, bool doValidate) {
|
||||
SET_OPTION("no-shuffle", bool);
|
||||
SET_OPTION("normalize", bool);
|
||||
SET_OPTION("dropout-rnn", float);
|
||||
SET_OPTION("dropout-src", float);
|
||||
SET_OPTION("dropout-trg", float);
|
||||
SET_OPTION("skip", bool);
|
||||
|
||||
if(doValidate)
|
||||
|
Loading…
Reference in New Issue
Block a user