dropout over source and target words

This commit is contained in:
Marcin Junczys-Dowmunt 2017-02-26 23:22:46 +00:00
parent fcd99c49f7
commit 5062f7b230
4 changed files with 21 additions and 4 deletions

View File

@ -392,7 +392,6 @@ class GRU {
Expr apply1(Expr input) {
if(dropMaskX_)
input = dropout(input, keywords::mask=dropMaskX_);
debug(input, "in");
auto xW = dot(input, W_);
if(layerNorm_)
xW = layer_norm(xW, gamma1_);
@ -403,7 +402,6 @@ class GRU {
Expr mask = nullptr) {
if(dropMaskS_)
state = dropout(state, keywords::mask=dropMaskS_);
debug(state, "state");
auto sU = dot(state, U_);

View File

@ -26,12 +26,19 @@ namespace marian {
bool skipDepth = options_->get<bool>("skip");
size_t encoderLayers = options_->get<size_t>("layers-enc");
float dropoutRnn = options_->get<float>("dropout-rnn");
float dropoutSrc = options_->get<float>("dropout-src");
auto xEmb = Embedding("Wemb", dimSrcVoc, dimSrcEmb)(graph);
Expr x, xMask;
std::tie(x, xMask) = prepareSource(xEmb, batch, batchIdx);
if(dropoutSrc) {
int srcWords = x->shape()[2];
auto srcWordDrop = graph->dropout(dropoutSrc, {1, 1, srcWords});
x = dropout(x, mask=srcWordDrop);
}
auto xFw = RNN<GRU>(graph, "encoder_bi",
dimSrcEmb, dimEncState,
normalize=layerNorm,
@ -45,7 +52,6 @@ namespace marian {
dropout_prob=dropoutRnn)
(x, mask=xMask);
debug(xFw, "xFw");
if(encoderLayers > 1) {
auto xBi = concatenate({xFw, xBw}, axis=1);
@ -90,9 +96,16 @@ class DecoderGNMT : public DecoderBase {
bool skipDepth = options_->get<bool>("skip");
size_t decoderLayers = options_->get<size_t>("layers-dec");
float dropoutRnn = options_->get<float>("dropout-rnn");
float dropoutTrg = options_->get<float>("dropout-trg");
auto graph = embeddings->graph();
if(dropoutTrg) {
int trgWords = embeddings->shape()[2];
auto trgWordDrop = graph->dropout(dropoutTrg, {1, 1, trgWords});
embeddings = dropout(embeddings, mask=trgWordDrop);
}
if(!attention_)
attention_ = New<GlobalAttention>("decoder",
context, dimDecState,

View File

@ -220,7 +220,7 @@ int main(int argc, char** argv) {
target->load("../benchmark/marian32K/train.tok.true.bpe.de.json", 50000);
auto encdec = New<GNMT>(options);
encdec->load(graph, "../benchmark/marian32K/modelML6.200000.npz");
encdec->load(graph, "../benchmark/marian32K/model8.110000.npz");
graph->reserveWorkspaceMB(128);

View File

@ -199,6 +199,10 @@ void Config::addOptions(int argc, char** argv, bool doValidate) {
"Enable layer normalization")
("dropout-rnn", po::value<float>()->default_value(0),
"Scaling dropout along rnn layers and time (0 = no dropout)")
("dropout-src", po::value<float>()->default_value(0),
"Dropout source words (0 = no dropout)")
("dropout-trg", po::value<float>()->default_value(0),
"Dropout target words (0 = no dropout)")
;
po::options_description opt("Optimizer options");
@ -307,6 +311,8 @@ void Config::addOptions(int argc, char** argv, bool doValidate) {
SET_OPTION("no-shuffle", bool);
SET_OPTION("normalize", bool);
SET_OPTION("dropout-rnn", float);
SET_OPTION("dropout-src", float);
SET_OPTION("dropout-trg", float);
SET_OPTION("skip", bool);
if(doValidate)