diff --git a/src/bahdanau.h b/src/bahdanau.h deleted file mode 100644 index f9629f3d..00000000 --- a/src/bahdanau.h +++ /dev/null @@ -1,5 +0,0 @@ -#pragma once - -#include "bahdanau/model.h" -#include "bahdanau/encoder.h" -#include "bahdanau/decoder.h" diff --git a/src/bahdanau/decoder.h b/src/bahdanau/decoder.h deleted file mode 100644 index aebedeeb..00000000 --- a/src/bahdanau/decoder.h +++ /dev/null @@ -1,256 +0,0 @@ -#pragma once - -#include "mblas/matrix.h" -#include "bahdanau/model.h" - -class Decoder { - private: - template - class Embeddings { - public: - Embeddings(const Weights& model) - : w_(model) - {} - - void Lookup(mblas::Matrix& Rows, const std::vector& ids) { - using namespace mblas; - Assemble(Rows, w_.E_, ids); - Broadcast(_1 + _2, Rows, w_.EB_); - } - - private: - const Weights& w_; - }; - - template - class RNN { - public: - RNN(const Weights& model) - : w_(model) {} - - void InitializeState(mblas::Matrix& State, - const mblas::Matrix& SourceContext, - const size_t batchSize = 1) { - using namespace mblas; - CopyRow(Temp1_, SourceContext, 0, 1000); - Temp2_.Clear(); - Temp2_.Resize(batchSize, 1000, 0.0); - Broadcast(_1 + _2, Temp2_, Temp1_); - Prod(State, Temp2_, w_.Ws_); - Broadcast(Tanh(_1 + _2), State, w_.WsB_); - } - - mblas::Matrix& GetNextState(mblas::Matrix& State, - const mblas::Matrix& Embd, - const mblas::Matrix& PrevState, - const mblas::Matrix& Context) { - using namespace mblas; - - Prod(Z_, Embd, w_.Wz_); - Prod(Temp1_, PrevState, w_.Uz_); - Prod(Temp2_, Context, w_.Cz_); - Element(Logit(_1 + _2 + _3), - Z_, Temp1_, Temp2_); - - Prod(R_, Embd, w_.Wr_); - Prod(Temp1_, PrevState, w_.Ur_); - Prod(Temp2_, Context, w_.Cr_); - Element(Logit(_1 + _2 + _3), - R_, Temp1_, Temp2_); - - Prod(S_, Embd, w_.W_); - Broadcast(_1 + _2, S_, w_.B_); // Broadcasting row-wise - Prod(Temp1_, Element(_1 * _2, R_, PrevState), w_.U_); - Prod(Temp2_, Context, w_.C_); - - Element(Tanh(_1 + _2 + _3), S_, Temp1_, Temp2_); - - Element((1.0 - _1) * _2 + _1 * _3, - Z_, PrevState, S_); - - State.Resize(Z_.Rows(), Z_.Cols()); - Swap(State, Z_); - - return State; - } - - private: - // Model matrices - const Weights& w_; - - // reused to avoid allocation - mblas::Matrix Z_; - mblas::Matrix R_; - mblas::Matrix S_; - - mblas::Matrix Temp1_; - mblas::Matrix Temp2_; - }; - - template - class Alignment { - public: - Alignment(const Weights& model) - : w_(model) - {} - - void GetContext(mblas::Matrix& Context, - const mblas::Matrix& SourceContext, - const mblas::Matrix& PrevState) { - using namespace mblas; - - Prod(Temp1_, SourceContext, w_.Ua_); - Prod(Temp2_, PrevState, w_.Wa_); - - Broadcast(Tanh(_1 + _2), Temp1_, Temp2_); - - Prod(A_, w_.Va_, Temp1_, false, true); - size_t rows1 = SourceContext.Rows(); - size_t rows2 = PrevState.Rows(); - A_.Reshape(rows2, rows1); // due to broadcasting above - - mblas::Softmax(A_); - Prod(Context, A_, SourceContext); - } - - private: - const Weights& w_; - - mblas::Matrix Temp1_; - mblas::Matrix Temp2_; - mblas::Matrix A_; - - mblas::Matrix Ones_; - mblas::Matrix Sums_; - }; - - template - class Softmax { - public: - Softmax(const Weights& model) - : w_(model), filtered_(false) - {} - - void GetProbs(mblas::Matrix& Probs, - const mblas::Matrix& PrevState, - const mblas::Matrix& PrevEmbd, - const mblas::Matrix& Context) { - - using namespace mblas; - - Prod(T_, PrevState, w_.Uo_); - - Prod(Temp1_, PrevEmbd, w_.Vo_); - Prod(Temp2_, Context, w_.Co_); - Element(_1 + _2 + _3, T_, Temp1_, Temp2_); - Broadcast(_1 + _2, T_, w_.UoB_); // Broadcasting row-wise - PairwiseReduce(Max(_1, _2), T_); - - if(filtered_) { // use only filtered vocabulary for SoftMax - Prod(Probs, T_, FilteredWo_); - Broadcast(_1 + _2, Probs, FilteredWoB_); // Broadcasting row-wise - } - else { - Prod(Probs, T_, w_.Wo_); - Broadcast(_1 + _2, Probs, w_.WoB_); // Broadcasting row-wise - } - mblas::Softmax(Probs); - } - - void Filter(const std::vector& ids) { - using namespace mblas; - - Matrix TempWo; - Transpose(TempWo, w_.Wo_); - Assemble(FilteredWo_, TempWo, ids); - Transpose(FilteredWo_); - - Matrix TempWoB; - Transpose(TempWoB, w_.WoB_); - Assemble(FilteredWoB_, TempWoB, ids); - Transpose(FilteredWoB_); - - filtered_ = true; - } - - private: - const Weights& w_; - - bool filtered_; - mblas::Matrix FilteredWo_; - mblas::Matrix FilteredWoB_; - - mblas::Matrix T_; - mblas::Matrix Temp1_; - mblas::Matrix Temp2_; - - mblas::Matrix Ones_; - mblas::Matrix Sums_; - }; - - public: - Decoder(const Weights& model) - : embeddings_(model.decEmbeddings_), - rnn_(model.decRnn_), alignment_(model.decAlignment_), - softmax_(model.decSoftmax_) - {} - - void EmptyState(mblas::Matrix& State, const mblas::Matrix& SourceContext, - size_t batchSize = 1) { - State.Resize(batchSize, 1000); - rnn_.InitializeState(State, SourceContext, batchSize); - } - - void EmptyEmbedding(mblas::Matrix& Embedding, size_t batchSize = 1) { - Embedding.Clear(); - Embedding.Resize(batchSize, 620, 0); - } - - void MakeStep(mblas::Matrix& NextState, - mblas::Matrix& NextEmbeddings, - mblas::Matrix& Probs, - const std::vector& batch, - const mblas::Matrix& State, - const mblas::Matrix& Embeddings, - const mblas::Matrix& SourceContext) { - GetProbs(Probs, AlignedSourceContext_, - State, Embeddings, SourceContext); - Lookup(NextEmbeddings, batch); - GetNextState(NextState, NextEmbeddings, - State, AlignedSourceContext_); - } - - //private: - - void Filter(const std::vector& ids) { - softmax_.Filter(ids); - } - - void GetProbs(mblas::Matrix& Probs, - mblas::Matrix& AlignedSourceContext, - const mblas::Matrix& PrevState, - const mblas::Matrix& PrevEmbedding, - const mblas::Matrix& SourceContext) { - alignment_.GetContext(AlignedSourceContext, SourceContext, PrevState); - softmax_.GetProbs(Probs, PrevState, PrevEmbedding, AlignedSourceContext); - } - - void Lookup(mblas::Matrix& Embedding, const std::vector& w) { - embeddings_.Lookup(Embedding, w); - } - - void GetNextState(mblas::Matrix& State, - const mblas::Matrix& Embedding, - const mblas::Matrix& PrevState, - const mblas::Matrix& AlignedSourceContext) { - rnn_.GetNextState(State, Embedding, PrevState, AlignedSourceContext); - } - - private: - mblas::Matrix AlignedSourceContext_; - - Embeddings embeddings_; - RNN rnn_; - Alignment alignment_; - Softmax softmax_; -}; diff --git a/src/bahdanau/encoder.h b/src/bahdanau/encoder.h deleted file mode 100644 index 94533427..00000000 --- a/src/bahdanau/encoder.h +++ /dev/null @@ -1,118 +0,0 @@ -#pragma once - -#include "mblas/matrix.h" -#include "bahdanau/model.h" - -class Encoder { - private: - template - class Embeddings { - public: - Embeddings(const Weights& model) - : w_(model) - {} - - void Lookup(mblas::Matrix& Row, size_t i) { - using namespace mblas; - CopyRow(Row, w_.E_, i); - Element(_1 + _2, - Row, w_.EB_); - } - - private: - const Weights& w_; - }; - - template - class RNN { - public: - RNN(const Weights& model) - : w_(model) {} - - void InitializeState(size_t batchSize = 1) { - State_.Clear(); - State_.Resize(batchSize, 1000, 0.0); - } - - void GetNextState(mblas::Matrix& State, - const mblas::Matrix& Embd, - const mblas::Matrix& PrevState) { - using namespace mblas; - - Prod(Za_, Embd, w_.Wz_); - Prod(Temp_, PrevState, w_.Uz_); - Element(Logit(_1 + _2), Za_, Temp_); - - Prod(Ra_, Embd, w_.Wr_); - Prod(Temp_, PrevState, w_.Ur_); - Element(Logit(_1 + _2), Ra_, Temp_); - - Prod(Ha_, Embd, w_.W_); - Prod(Temp_, Element(_1 * _2, Ra_, PrevState), w_.U_); - Element(_1 + _2, Ha_, w_.B_); // Broadcasting row-wise - Element(Tanh(_1 + _2), Ha_, Temp_); - - Element((1.0 - _1) * _2 + _1 * _3, Za_, PrevState, Ha_); - - Swap(State, Za_); - } - - template - void GetContext(It it, It end, - mblas::Matrix& Context, bool invert) { - InitializeState(); - - size_t n = std::distance(it, end); - size_t i = 0; - while(it != end) { - GetNextState(State_, *it++, State_); - if(invert) - mblas::PasteRow(Context, State_, n - i - 1, 1000); - else - mblas::PasteRow(Context, State_, i, 0); - ++i; - } - } - - private: - // Model matrices - const Weights& w_; - - // reused to avoid allocation - mblas::Matrix Za_; - mblas::Matrix Ra_; - mblas::Matrix Ha_; - mblas::Matrix Temp_; - mblas::Matrix State_; - }; - - public: - Encoder(const Weights& model) - : embeddings_(model.encEmbeddings_), - forwardRnn_(model.encForwardRnn_), - backwardRnn_(model.encBackwardRnn_) - {} - - void GetContext(const std::vector& words, - mblas::Matrix& Context) { - std::vector embeddedWords; - - Context.Resize(words.size(), 2000); - for(auto& w : words) { - embeddedWords.emplace_back(); - embeddings_.Lookup(embeddedWords.back(), w); - } - - forwardRnn_.GetContext(embeddedWords.begin(), - embeddedWords.end(), - Context, false); - backwardRnn_.GetContext(embeddedWords.rbegin(), - embeddedWords.rend(), - Context, true); - } - - private: - Embeddings embeddings_; - RNN forwardRnn_; - RNN backwardRnn_; -}; diff --git a/src/bahdanau/model.h b/src/bahdanau/model.h deleted file mode 100644 index ae4445d9..00000000 --- a/src/bahdanau/model.h +++ /dev/null @@ -1,169 +0,0 @@ -#pragma once - -#include -#include - -#include "mblas/matrix.h" -#include "npz_converter.h" - -struct Weights { - - ////////////////////////////////////////////////////////////////////////////// - - struct EncEmbeddings { - EncEmbeddings(const NpzConverter& model) - : E_(model["W_0_enc_approx_embdr"]), - EB_(model("b_0_enc_approx_embdr", true)) - {} - - const mblas::Matrix E_; - const mblas::Matrix EB_; - }; - - struct EncForwardRnn { - EncForwardRnn(const NpzConverter& model) - : W_(model["W_0_enc_input_embdr_0"]), - B_(model("b_0_enc_input_embdr_0", true)), - U_(model["W_enc_transition_0"]), - Wz_(model["W_0_enc_update_embdr_0"]), - Uz_(model["G_enc_transition_0"]), - Wr_(model["W_0_enc_reset_embdr_0"]), - Ur_(model["R_enc_transition_0"]) - {} - - const mblas::Matrix W_; - const mblas::Matrix B_; - const mblas::Matrix U_; - const mblas::Matrix Wz_; - const mblas::Matrix Uz_; - const mblas::Matrix Wr_; - const mblas::Matrix Ur_; - }; - - struct EncBackwardRnn { - EncBackwardRnn(const NpzConverter& model) - : W_(model["W_0_back_enc_input_embdr_0"]), - B_(model("b_0_back_enc_input_embdr_0", true)), - U_(model["W_back_enc_transition_0"]), - Wz_(model["W_0_back_enc_update_embdr_0"]), - Uz_(model["G_back_enc_transition_0"]), - Wr_(model["W_0_back_enc_reset_embdr_0"]), - Ur_(model["R_back_enc_transition_0"]) - {} - - const mblas::Matrix W_; - const mblas::Matrix B_; - const mblas::Matrix U_; - const mblas::Matrix Wz_; - const mblas::Matrix Uz_; - const mblas::Matrix Wr_; - const mblas::Matrix Ur_; - }; - - ////////////////////////////////////////////////////////////////////////////// - - struct DecEmbeddings { - DecEmbeddings(const NpzConverter& model) - : E_(model["W_0_dec_approx_embdr"]), - EB_(model("b_0_dec_approx_embdr", true)) - {} - - const mblas::Matrix E_; - const mblas::Matrix EB_; - }; - - struct DecRnn { - DecRnn(const NpzConverter& model) - : Ws_(model["W_0_dec_initializer_0"]), - WsB_(model("b_0_dec_initializer_0", true)), - - W_(model["W_0_dec_input_embdr_0"]), - B_(model("b_0_dec_input_embdr_0", true)), - U_(model["W_dec_transition_0"]), - C_(model["W_0_dec_dec_inputter_0"]), - - Wz_(model["W_0_dec_update_embdr_0"]), - Uz_(model["G_dec_transition_0"]), - Cz_(model["W_0_dec_dec_updater_0"]), - - Wr_(model["W_0_dec_reset_embdr_0"]), - Ur_(model["R_dec_transition_0"]), - Cr_(model["W_0_dec_dec_reseter_0"]) - {} - - const mblas::Matrix Ws_; - const mblas::Matrix WsB_; - const mblas::Matrix W_; - const mblas::Matrix B_; - const mblas::Matrix U_; - const mblas::Matrix C_; - const mblas::Matrix Wz_; - const mblas::Matrix Uz_; - const mblas::Matrix Cz_; - const mblas::Matrix Wr_; - const mblas::Matrix Ur_; - const mblas::Matrix Cr_; - }; - - struct DecAlignment { - DecAlignment(const NpzConverter& model) - : Va_(model("D_dec_transition_0", true)), - Wa_(model["B_dec_transition_0"]), - Ua_(model["A_dec_transition_0"]) - {} - - const mblas::Matrix Va_; - const mblas::Matrix Wa_; - const mblas::Matrix Ua_; - }; - - struct DecSoftmax { - DecSoftmax(const NpzConverter& model) - : WoB_(model("b_dec_deep_softmax", true)), - Uo_(model["W_0_dec_hid_readout_0"]), - UoB_(model("b_0_dec_hid_readout_0", true)), - Vo_(model["W_0_dec_prev_readout_0"]), - Co_(model["W_0_dec_repr_readout"]) - { - const mblas::Matrix Wo1_(model["W1_dec_deep_softmax"]); - const mblas::Matrix Wo2_(model["W2_dec_deep_softmax"]); - mblas::Prod(const_cast(Wo_), Wo1_, Wo2_); - } - - const mblas::Matrix Wo_; - const mblas::Matrix WoB_; - const mblas::Matrix Uo_; - const mblas::Matrix UoB_; - const mblas::Matrix Vo_; - const mblas::Matrix Co_; - }; - - Weights(const std::string& npzFile, size_t device = 0) - : Weights(NpzConverter(npzFile), device) - {} - - Weights(const NpzConverter& model, size_t device = 0) - : encEmbeddings_(model), - decEmbeddings_(model), - encForwardRnn_(model), - encBackwardRnn_(model), - decRnn_(model), - decAlignment_(model), - decSoftmax_(model), - device_(device) - {} - - size_t GetDevice() { - return device_; - } - - const EncEmbeddings encEmbeddings_; - const DecEmbeddings decEmbeddings_; - const EncForwardRnn encForwardRnn_; - const EncBackwardRnn encBackwardRnn_; - const DecRnn decRnn_; - const DecAlignment decAlignment_; - const DecSoftmax decSoftmax_; - - const size_t device_; -}; diff --git a/src/decoder.bah/decoder_main.cu b/src/decoder.bah/decoder_main.cu deleted file mode 100644 index 360ee02f..00000000 --- a/src/decoder.bah/decoder_main.cu +++ /dev/null @@ -1,95 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "bahdanau/model.h" -#include "vocab.h" -#include "decoder/nmt_decoder.h" - - -void ProgramOptions(int argc, char *argv[], - std::string& modelPath, - std::string& svPath, - std::string& tvPath, - size_t& beamsize, - size_t& device) { - bool help = false; - - namespace po = boost::program_options; - po::options_description cmdline_options("Allowed options"); - cmdline_options.add_options() - ("beamsize,b", po::value(&beamsize)->default_value(10), - "Beam size") - ("device,d", po::value(&device)->default_value(0), - "CUDA Device") - ("model,m", po::value(&modelPath)->required(), - "Path to a model") - ("source,s", po::value(&svPath)->required(), - "Path to a source vocab file.") - ("target,t", po::value(&tvPath)->required(), - "Path to a target vocab file.") - ("help,h", po::value(&help)->zero_tokens()->default_value(false), - "Print this help message and exit.") - ; - po::variables_map vm; - try { - po::store(po::command_line_parser(argc, argv). - options(cmdline_options).run(), vm); - po::notify(vm); - } catch (std::exception& e) { - std::cout << "Error: " << e.what() << std::endl << std::endl; - - std::cout << "Usage: " + std::string(argv[0]) + " [options]" << std::endl; - std::cout << cmdline_options << std::endl; - exit(0); - } - - if (help) { - std::cout << "Usage: " + std::string(argv[0]) + " [options]" << std::endl; - std::cout << cmdline_options << std::endl; - exit(0); - } -} - -int main(int argc, char* argv[]) { - std::string modelPath, srcVocabPath, trgVocabPath; - size_t device = 0; - size_t beamsize = 10; - ProgramOptions(argc, argv, modelPath, srcVocabPath, trgVocabPath, beamsize, device); - std::cerr << "Using device GPU" << device << std::endl;; - cudaSetDevice(device); - std::cerr << "Loading model... "; - std::shared_ptr model(new Weights(modelPath)); - std::shared_ptr srcVocab(new Vocab(srcVocabPath)); - std::shared_ptr trgVocab(new Vocab(trgVocabPath)); - std::cerr << "done." << std::endl; - - NMTDecoder decoder(model, srcVocab, trgVocab, beamsize); - - std::cerr << "Start translating...\n"; - - std::ios_base::sync_with_stdio(false); - - std::string line; - boost::timer::cpu_timer timer; - while(std::getline(std::cin, line)) { - auto result = decoder.translate(line); - for (auto it = result.rbegin(); it != result.rend(); ++it) { - std::string word = (*trgVocab)[*it]; - if(it != result.rbegin()) - std::cout << " "; - if(word != "") - std::cout << word; - } - std::cout << std::endl; - } - std::cerr << timer.format() << std::endl; - return 0; -} diff --git a/src/decoder.bah/hypothesis.h b/src/decoder.bah/hypothesis.h deleted file mode 100644 index 92a14c42..00000000 --- a/src/decoder.bah/hypothesis.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once - -#include -#include -#include - -class Hypothesis { - public: - Hypothesis(size_t word, size_t prev, float cost) - : prev_(prev), - word_(word), - cost_(cost) { - } - - size_t GetWord() const { - return word_; - } - - size_t GetPrevStateIndex() const { - return prev_; - } - - float GetCost() const { - return cost_; - } - - private: - const size_t prev_; - const size_t word_; - const float cost_; -}; - diff --git a/src/decoder.bah/hypothesis_manager.h b/src/decoder.bah/hypothesis_manager.h deleted file mode 100644 index f81ebece..00000000 --- a/src/decoder.bah/hypothesis_manager.h +++ /dev/null @@ -1,75 +0,0 @@ -#pragma once - -#include -#include - -#include "decoder/hypothesis.h" - -class HypothesisManager { - using Hypotheses = std::vector; - public: - HypothesisManager(size_t beamSize, size_t EOSIndex) - : beamSize_(beamSize), - EOSIndex_(EOSIndex), - baseIndex_(0) { - hypotheses_.emplace_back(0, 0, 0); - } - - void AddHypotheses(const Hypotheses& hypos) { - size_t nextBaseIndex = hypotheses_.size(); - for (const auto& hypo : hypos) { - if (hypo.GetWord() == EOSIndex_) { - completedHypotheses_.emplace_back(hypo.GetWord(), - hypo.GetPrevStateIndex() + baseIndex_, - hypo.GetCost()); - } else { - hypotheses_.emplace_back(hypo.GetWord(), hypo.GetPrevStateIndex() + baseIndex_, - hypo.GetCost()); - } - } - baseIndex_ = nextBaseIndex; - } - - std::vector GetBestTranslation() { - size_t bestHypoId = 0; - for (size_t i = 0; i < completedHypotheses_.size(); ++i) { - if (completedHypotheses_[bestHypoId].GetCost() - < completedHypotheses_[i].GetCost()) { - bestHypoId = i; - } - } - - - // for (auto hypo : completedHypotheses_) { - // std::vector words; - // words.push_back(hypo.GetWord()); - // size_t state = hypo.GetPrevStateIndex(); - // while (state > 0) { - // words.push_back(hypotheses_[state].GetWord()); - // state = hypotheses_[state].GetPrevStateIndex(); - // } - // for (auto it = words.rbegin(); it != words.rend(); ++it) std::cerr << *it << " "; - // std::cerr << hypo.GetCost() << std::endl; - // } - - std::vector bestSentence; - bestSentence.push_back(completedHypotheses_[bestHypoId].GetWord()); - size_t state = completedHypotheses_[bestHypoId].GetPrevStateIndex(); - - while (state > 0) { - bestSentence.push_back(hypotheses_[state].GetWord()); - state = hypotheses_[state].GetPrevStateIndex(); - } - - return bestSentence; - } - - private: - Hypotheses hypotheses_; - size_t beamSize_; - Hypotheses completedHypotheses_; - const size_t EOSIndex_; - size_t baseIndex_; -}; - - diff --git a/src/decoder.bah/nmt_decoder.h b/src/decoder.bah/nmt_decoder.h deleted file mode 100644 index 4bcaa316..00000000 --- a/src/decoder.bah/nmt_decoder.h +++ /dev/null @@ -1,175 +0,0 @@ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "common/vocab.h" -#include "bahdanau/encoder.h" -#include "bahdanau/decoder.h" -#include "bahdanau/model.h" -#include "common/utils.h" -#include "mblas/matrix.h" -#include "decoder/hypothesis_manager.h" - - -using namespace thrust::placeholders; - -class NMTDecoder { - using Words = std::vector; - using Hypotheses = std::vector; - public: - NMTDecoder( - std::shared_ptr model, - std::shared_ptr srcVocab, - std::shared_ptr trgVocab, - const size_t beamSize=1) - : model_(model), - srcVocab_(srcVocab), - trgVocab_(trgVocab), - encoder_(new Encoder(*model_)), - decoder_(new Decoder(*model_)), - beamSize_(beamSize), - Costs_() { - } - - Words translate(std::string& sentence) { - size_t sourceSentenceLength = prepareSourceSentence(sentence); - prepareDecoder(); - - size_t batchSize = beamSize_; - Costs_.Clear(); - Costs_.Resize(batchSize, 1, 0.0); - HypothesisManager hypoManager(batchSize, (*trgVocab_)[""]); - - mblas::Matrix Probs; - - for(size_t len = 0; len < 3 * sourceSentenceLength; ++len) { - std::vector bestWordIndices, bestWordHyps; - decoder_->GetProbs(Probs, AlignedSourceContext_, - PrevState_, PrevEmbedding_, SourceContext_); - - // Przeniesione tutaj. moze decoder powinien to robic. - Element(Log(_1), Probs); - - // Brzydkie, ale GH tez to ma, troche pomaga przy wiekszym - // BeamSize, ale jeszcze gdzies jest problem. - if(len < sourceSentenceLength * 0.5) { - size_t eol = (*trgVocab_)[""]; - for(size_t i = 0; i < Probs.Rows(); ++i) { - Probs.Set(i, eol, std::numeric_limits::lowest()); - } - } - - auto bestHypos = GetBestExtensions(Probs, batchSize); - hypoManager.AddHypotheses(bestHypos); - - size_t cidx = 0; - std::vector costIndeces; - for (auto& best: bestHypos) { - if (best.GetWord() != (*trgVocab_)[""]) { - bestWordIndices.push_back(best.GetWord()); - bestWordHyps.push_back(best.GetPrevStateIndex()); - costIndeces.push_back(cidx); - } else { - //std::cerr << "Finshed at " << Costs_(0, cidx) << std::endl; - --batchSize; - } - cidx++; - } - - if (batchSize <= 0) - break; - - // Zrobic warunkowo - mblas::Matrix CostsTemp; - mblas::Assemble(CostsTemp, Costs_, costIndeces); - mblas::Swap(Costs_, CostsTemp); - //mblas::debug1(Costs_); - - decoder_->Lookup(Embedding_, bestWordIndices); - Assemble(BestState_, PrevState_, bestWordHyps); - decoder_->GetNextState(State_, Embedding_, - BestState_, AlignedSourceContext_); - - mblas::Swap(State_, PrevState_); - mblas::Swap(Embedding_, PrevEmbedding_); - } - - return hypoManager.GetBestTranslation(); - } - - private: - size_t prepareSourceSentence(std::string& sentence) { - Trim(sentence); - std::vector tokens; - Split(sentence, tokens, " "); - auto encoded_tokens = srcVocab_->Encode(tokens, true); - encoder_->GetContext(encoded_tokens, SourceContext_); - return encoded_tokens.size(); - } - - Hypotheses GetBestExtensions(mblas::Matrix& Probs, size_t batchSize) { - Hypotheses hypos; - - // One kernel. Na pewno nie dwa razy transpose wielkiej macierzy, batchsize * vocab - Costs_.Reshape(1, batchSize); - Broadcast(_1 + _2, Transpose(Probs), Costs_); - Costs_.Reshape(batchSize, 1); - Transpose(Probs); - - size_t probSize = Probs.Cols() * Probs.Rows(); - thrust::device_vector keys(probSize); - thrust::sequence(keys.begin(), keys.end()); - - // warto sortować w odwrotnej kolejnosci, zaoszczedzi kombinacje ponizej - thrust::sort_by_key(Probs.begin(), Probs.end(), keys.begin()); - // OK, to pewnie uzywa thrust::copy? Sprawdzić - thrust::host_vector bestKeys(keys.end() - batchSize, keys.end()); - - HypothesisManager hypoManager(batchSize, (*trgVocab_)[""]); - - // za pomoca thrust::copy zrobic dwie kopie, jedno do Costs, jedna do wektora na cpu, w drugim kroku uzyc cpu - for (size_t i = 0; i < bestKeys.size(); ++i) { - Costs_.GetVec()[i] = Probs.GetVec()[probSize - batchSize + i]; - hypos.emplace_back(bestKeys[i] % Probs.Cols(), bestKeys[i] / Probs.Cols(), Probs.GetVec()[probSize - batchSize + i]); - } - - return hypos; - - } - - void prepareDecoder() { - decoder_->EmptyState(PrevState_, SourceContext_, 1); - decoder_->EmptyEmbedding(PrevEmbedding_, 1); - } - - protected: - std::shared_ptr model_; - std::shared_ptr srcVocab_; - std::shared_ptr trgVocab_; - std::shared_ptr encoder_; - std::shared_ptr decoder_; - const size_t beamSize_; - mblas::Matrix SourceContext_; - mblas::Matrix PrevState_; - mblas::Matrix PrevEmbedding_; - mblas::Matrix BestState_; - mblas::Matrix Costs_; - - mblas::Matrix AlignedSourceContext_; - - mblas::Matrix State_; - mblas::Matrix Embedding_; - -}; diff --git a/src/decoder.bah/result.h b/src/decoder.bah/result.h deleted file mode 100644 index dda0e1f2..00000000 --- a/src/decoder.bah/result.h +++ /dev/null @@ -1,15 +0,0 @@ -#pragma once - -#include - -struct Result { - Result(const size_t state, const size_t word, const float score) - : state(state), - word(word), - score(score) { - } - - size_t state; - size_t word; - float score; -}; diff --git a/src/decoder/search.h b/src/decoder/search.h index 66f5b18f..b1603302 100644 --- a/src/decoder/search.h +++ b/src/decoder/search.h @@ -106,7 +106,9 @@ class Search { return history; } - void BestHyps(Beam& bestHyps, const Beam& prevHyps, std::vector& ProbsEnsemble, const size_t beamSize) { + void BestHyps(Beam& bestHyps, const Beam& prevHyps, + std::vector& ProbsEnsemble, + const size_t beamSize) { using namespace mblas; Matrix& Probs = *ProbsEnsemble[0]; diff --git a/src/test/test.cu b/src/test/test.cu deleted file mode 100644 index e00dd952..00000000 --- a/src/test/test.cu +++ /dev/null @@ -1,102 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "mblas/matrix.h" -#include "bahdanau.h" -#include "vocab.h" - -#include "states.h" - -using namespace mblas; - -int main(int argc, char** argv) { - size_t device = 0; - - if(argc > 1) { - if(std::string(argv[1]) == "1") - device = 1; - else if(std::string(argv[1]) == "2") - device = 2; - } - - std::cerr << device << std::endl; - cudaSetDevice(device); - - std::string source = "thank you ."; - std::string target = "vielen dank ."; - //std::string source = "you know , one of the intense pleasures of travel and one of the delights of ethnographic research is the opportunity to live amongst those who have not forgotten the old ways , who still feel their past in the wind , touch it in stones polished by rain , taste it in the bitter leaves of plants ."; - //std::string target = "wissen sie , eine der intensiven freuden des reisens und eine der freuden der ethnografischen forschung ist die chance zu leben unter jenen , die die alten wege nicht vergessen haben , die immer noch ihre vergangenheit im wind spüren , berühren sie in steine poliert durch regen , schmecken sie in den bitteren blätter der pflanzen ."; - - std::cerr << "Loading model" << std::endl; - Weights weights("/home/marcinj/Badania/best_nmt/search_model.npz", device); - Vocab svcb("/home/marcinj/Badania/best_nmt/vocab/en_de.en.txt"); - Vocab tvcb("/home/marcinj/Badania/best_nmt/vocab/en_de.de.txt"); - - std::cerr << "Creating encoder" << std::endl; - Encoder encoder(weights); - std::cerr << "Creating decoder" << std::endl; - Decoder decoder(weights); - - std::vector sourceSplit; - boost::split(sourceSplit, source, boost::is_any_of(" "), - boost::token_compress_on); - - std::cerr << "Source: " << std::endl; - std::vector sWords(sourceSplit.size()); - std::transform(sourceSplit.begin(), sourceSplit.end(), sWords.begin(), - [&](const std::string& w) { std::cerr << svcb[w] << ", "; return svcb[w]; }); - sWords.push_back(svcb[""]); - std::cerr << svcb[""] << std::endl; - - typedef std::vector Batch; - - std::vector targetSplit; - boost::split(targetSplit, target, boost::is_any_of(" "), - boost::token_compress_on); - - std::cerr << "Target: " << std::endl; - size_t bs = 1000; - std::vector> tWordsBatch(targetSplit.size()); - std::transform(targetSplit.begin(), targetSplit.end(), tWordsBatch.begin(), - [&](const std::string& w) { std::cerr << tvcb[w] << ", "; return Batch(bs, tvcb[w]); }); - tWordsBatch.push_back(Batch(bs, tvcb[""])); - std::cerr << tvcb[""] << std::endl; - - mblas::Matrix SourceContext; - encoder.GetContext(sWords, SourceContext); - - mblas::Matrix State, NextState; - mblas::Matrix Embeddings, NextEmbeddings; - mblas::Matrix Probs; - - std::cerr << "Testing" << std::endl; - boost::timer::auto_cpu_timer timer; - size_t batchSize = tWordsBatch[0].size(); - - for(size_t i = 0; i < 1; ++i) { - decoder.EmptyState(State, SourceContext, batchSize); - decoder.EmptyEmbedding(Embeddings, batchSize); - - float sum = 0; - for(auto batch : tWordsBatch) { - decoder.MakeStep(NextState, NextEmbeddings, Probs, - batch, State, Embeddings, SourceContext); - - for(size_t i = 0; i < 1; ++i) { - float p = Probs(i, batch[i]); - std:: cerr << log(p) << " "; - if(i == 0) { - sum += log(p); - } - } - - mblas::Swap(Embeddings, NextEmbeddings); - mblas::Swap(State, NextState); - } - std::cerr << i << " " << sum << std::endl; - } -} diff --git a/src/test/test.dl4mt.cu b/src/test/test.dl4mt.cu deleted file mode 100644 index d0224135..00000000 --- a/src/test/test.dl4mt.cu +++ /dev/null @@ -1,100 +0,0 @@ -#include -#include -#include -#include -#include -#include - -#include "mblas/matrix.h" -#include "dl4mt.h" -#include "vocab.h" - -using namespace mblas; - -int main(int argc, char** argv) { - size_t device = 0; - - if(argc > 1) { - if(std::string(argv[1]) == "1") - device = 1; - else if(std::string(argv[1]) == "2") - device = 2; - } - - std::cerr << device << std::endl; - cudaSetDevice(device); - - std::string source = "thank you ."; - std::string target = "vielen Dank ."; - - std::cerr << "Loading model" << std::endl; - Weights weights("testmodel/model.npz", device); - - Vocab svcb("testmodel/vocab.en.txt"); - Vocab tvcb("testmodel/vocab.de.txt"); - - std::cerr << "Creating encoder" << std::endl; - Encoder encoder(weights); - - std::cerr << "Creating decoder" << std::endl; - Decoder decoder(weights); - - std::vector sourceSplit; - boost::split(sourceSplit, source, boost::is_any_of(" "), - boost::token_compress_on); - - std::cerr << "Source: " << std::endl; - std::vector sWords(sourceSplit.size()); - std::transform(sourceSplit.begin(), sourceSplit.end(), sWords.begin(), - [&](const std::string& w) { std::cerr << svcb[w] << ", "; return svcb[w]; }); - sWords.push_back(svcb[""]); - std::cerr << svcb[""] << std::endl; - - typedef std::vector Batch; - - std::vector targetSplit; - boost::split(targetSplit, target, boost::is_any_of(" "), - boost::token_compress_on); - - std::cerr << "Target: " << std::endl; - size_t bs = 1000; - - std::vector> tWordsBatch(targetSplit.size()); - std::transform(targetSplit.begin(), targetSplit.end(), tWordsBatch.begin(), - [&](const std::string& w) { std::cerr << tvcb[w] << ", "; return Batch(bs, tvcb[w]); }); - tWordsBatch.push_back(Batch(bs, tvcb[""])); - std::cerr << tvcb[""] << std::endl; - - mblas::Matrix SourceContext; - encoder.GetContext(sWords, SourceContext); - - mblas::Matrix State, NextState; - mblas::Matrix Embeddings, NextEmbeddings; - mblas::Matrix Probs; - - std::cerr << "Testing" << std::endl; - boost::timer::auto_cpu_timer timer; - size_t batchSize = tWordsBatch[0].size(); - - for(size_t i = 0; i < 1; ++i) { - decoder.EmptyState(State, SourceContext, batchSize); - decoder.EmptyEmbedding(Embeddings, batchSize); - - float sum = 0; - for(auto batch : tWordsBatch) { - decoder.MakeStep(NextState, Probs, - State, Embeddings, SourceContext); - decoder.Lookup(NextEmbeddings, batch); - for(size_t i = 0; i < 1; ++i) { - float p = Probs(i, batch[i]); - if(i == 0) { - sum += log(p); - } - } - - mblas::Swap(Embeddings, NextEmbeddings); - mblas::Swap(State, NextState); - } - std::cerr << i << " " << sum << std::endl; - } -}