diff --git a/src/chainable.h b/src/chainable.h index 885efdbd..a1683966 100644 --- a/src/chainable.h +++ b/src/chainable.h @@ -18,7 +18,7 @@ struct Chainable { virtual void allocate(size_t) = 0; virtual std::string graphviz() = 0; - + virtual const std::string &name() const = 0; virtual const Shape& shape() = 0; virtual DataType &val() = 0; @@ -33,4 +33,4 @@ typedef std::shared_ptr ChainableStackPtr; typedef std::shared_ptr> ChainPtr; -} \ No newline at end of file +} diff --git a/src/expression_graph.cu b/src/expression_graph.cu index 61f8d2b5..22de1c89 100644 --- a/src/expression_graph.cu +++ b/src/expression_graph.cu @@ -37,5 +37,14 @@ std::string Expr::Debug() const strm << marian::Debug(shape); return strm.str(); } - + +/////////////////////////////////////////////////////// +ExpressionGraph::ExpressionGraph(int cudaDevice) +: stack_(new ChainableStack) +{ + std::srand (time(NULL)); + cudaSetDevice(0); + +} + } diff --git a/src/expression_graph.h b/src/expression_graph.h index 7a766679..02ee0117 100644 --- a/src/expression_graph.h +++ b/src/expression_graph.h @@ -38,9 +38,7 @@ class Expr { class ExpressionGraph { public: - ExpressionGraph() - : stack_(new ChainableStack) - {} + ExpressionGraph(int cudaDevice); void forward(size_t batchSize) { for(auto&& v : *stack_) { @@ -54,10 +52,10 @@ class ExpressionGraph { std::stringstream ss; ss << "digraph ExpressionGraph {" << std::endl; ss << "rankdir=BT" << std::endl; - typedef typename ChainableStack::reverse_iterator It; - for(It it = stack_->rbegin(); it != stack_->rend(); ++it) + for(It it = stack_->rbegin(); it != stack_->rend(); ++it) { ss << (*it)->graphviz(); + } ss << "}" << std::endl; return ss.str(); } diff --git a/src/node.h b/src/node.h index 29d240cd..dfdaca00 100644 --- a/src/node.h +++ b/src/node.h @@ -67,6 +67,8 @@ class Node : public Chainable, virtual const Shape& shape() { return shape_; } + + const std::string &name() const { return name_; } protected: Shape shape_; diff --git a/src/sgd.cu b/src/sgd.cu index 5fe69138..598d9f6b 100644 --- a/src/sgd.cu +++ b/src/sgd.cu @@ -23,8 +23,6 @@ SGD::SGD(ExpressionGraph& g, float eta, void SGD::Run() { - std::srand ( unsigned ( std::time(0) ) ); - size_t numExamples = xData_.size()/ numFeatures_; Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f); Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f); diff --git a/src/test.cu b/src/test.cu index 70b50007..8d7073f4 100644 --- a/src/test.cu +++ b/src/test.cu @@ -4,7 +4,6 @@ #include "vocab.h" int main(int argc, char** argv) { - cudaSetDevice(0); using namespace std; using namespace marian; @@ -22,7 +21,7 @@ int main(int argc, char** argv) { std::vector Y; std::vector H; - ExpressionGraph g; + ExpressionGraph g(0); for (int t = 0; t < num_inputs; ++t) { X.emplace_back(g.input(shape={batch_size, input_size})); diff --git a/src/train_mnist.cu b/src/train_mnist.cu index 64ccf564..09e08d15 100644 --- a/src/train_mnist.cu +++ b/src/train_mnist.cu @@ -16,7 +16,7 @@ int main(int argc, char** argv) { using namespace marian; using namespace keywords; - ExpressionGraph g; + ExpressionGraph g(0); Expr x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x"); Expr y = named(g.input(shape={whatevs, LABEL_SIZE}), "y"); diff --git a/src/validate_encoder_decoder.cu b/src/validate_encoder_decoder.cu index d8eea261..ded9982e 100644 --- a/src/validate_encoder_decoder.cu +++ b/src/validate_encoder_decoder.cu @@ -1,6 +1,8 @@ #include "marian.h" #include "mnist.h" +#include "vocab.h" +#include using namespace marian; using namespace keywords; @@ -32,10 +34,10 @@ ExpressionGraph build_graph(int cuda_device) { Y.emplace_back(named(g.input(shape={batch_size, output_size}), ss.str())); } - Expr Wxh = g.param(shape={input_size, hidden_size}, init=uniform(), name="Wxh"); - Expr Whh = g.param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh"); - Expr bh = g.param(shape={1, hidden_size}, init=uniform(), name="bh"); - Expr h0 = g.param(shape={1, hidden_size}, init=uniform(), name="h0"); + Expr Wxh = named(g.param(shape={input_size, hidden_size}, init=uniform()), "Wxh"); + Expr Whh = named(g.param(shape={hidden_size, hidden_size}, init=uniform()), "Whh"); + Expr bh = named(g.param(shape={1, hidden_size}, init=uniform()), "bh"); + Expr h0 = named(g.param(shape={1, hidden_size}, init=uniform()), "h0"); std::cerr << "Building encoder RNN..." << std::endl; H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh)); @@ -43,9 +45,9 @@ ExpressionGraph build_graph(int cuda_device) { H.emplace_back(tanh(dot(X[t], Wxh) + dot(H[t-1], Whh) + bh)); } - Expr Wxh_d = g.param(shape={output_size, hidden_size}, init=uniform(), name="Wxh_d"); - Expr Whh_d = g.param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh_d"); - Expr bh_d = g.param(shape={1, hidden_size}, init=uniform(), name="bh_d"); + Expr Wxh_d = named(g.param(shape={output_size, hidden_size}, init=uniform()), "Wxh_d"); + Expr Whh_d = named(g.param(shape={hidden_size, hidden_size}, init=uniform()), "Whh_d"); + Expr bh_d = named(g.param(shape={1, hidden_size}, init=uniform()), "bh_d"); std::cerr << "Building decoder RNN..." << std::endl; auto h0_d = H[num_inputs]; @@ -54,8 +56,8 @@ ExpressionGraph build_graph(int cuda_device) { S.emplace_back(tanh(dot(Y[t], Wxh_d) + dot(S[t-1], Whh_d) + bh_d)); } - Expr Why = g.param(shape={hidden_size, output_size}, init=uniform(), name="Why"); - Expr by = g.param(shape={1, output_size}, init=uniform(), name="by"); + Expr Why = named(g.param(shape={hidden_size, output_size}, init=uniform()), "Why"); + Expr by = named(g.param(shape={1, output_size}, init=uniform()), "by"); std::cerr << "Building output layer..." << std::endl; std::vector Yp; @@ -66,29 +68,40 @@ ExpressionGraph build_graph(int cuda_device) { Yp.emplace_back(named(softmax_fast(dot(S[t-1], Why) + by), "pred")); cross_entropy = cross_entropy + sum(Y[t] * log(Yp[t]), axis=1); } - auto graph = -mean(cross_entropy, axis=0, name="cost"); + auto cost = named(-mean(cross_entropy, axis=0), "cost"); std::cerr << "Done." << std::endl; return g; } -#if 0 + + +int main(int argc, char** argv) { +#if 1 + std::cerr << "Loading the data... "; + Vocab sourceVocab, targetVocab; + // read parallel corpus from file std::fstream sourceFile("../examples/mt/dev/newstest2013.de"); std::fstream targetFile("../examples/mt/dev/newstest2013.en"); + std::vector > source_sentences, target_sentences; std::string sourceLine, targetLine; while (getline(sourceFile, sourceLine)) { getline(targetFile, targetLine); std::vector sourceIds = sourceVocab.ProcessSentence(sourceLine); - std::vector targetIds = sourceVocab.ProcessSentence(targetLine); + std::vector targetIds = targetVocab.ProcessSentence(targetLine); + source_sentences.push_back(sourceIds); + target_sentences.push_back(targetIds); } + std::cerr << "Done." << std::endl; + std::cerr << source_sentences.size() + << " sentence pairs read." << std::endl; + std::cerr << "Source vocabulary size: " << sourceVocab.Size() << std::endl; + std::cerr << "Target vocabulary size: " << targetVocab.Size() << std::endl; #endif - -int main(int argc, char** argv) { - ExpressionGraph g = build_graph(0); // For the stop symbol. @@ -109,6 +122,7 @@ int main(int argc, char** argv) { std::stringstream ss; ss << "X" << t; + if (!g.has_node(ss.str())) std::cerr << "No node " << ss.str() << "!!!" << std::endl; g[ss.str()] = Xt; } @@ -128,6 +142,7 @@ int main(int argc, char** argv) { std::stringstream ss; ss << "Y" << t; + if (!g.has_node(ss.str())) std::cerr << "No node " << ss.str() << "!!!" << std::endl; g[ss.str()] = Yt; } @@ -140,18 +155,18 @@ int main(int argc, char** argv) { g.backward(); std::cerr << "Done" << std::endl; - std::cerr << g["graph"].val().Debug() << std::endl; + std::cerr << g["cost"].val().Debug() << std::endl; std::cerr << g["X0"].val().Debug() << std::endl; std::cerr << g["Y0"].val().Debug() << std::endl; -#if 0 - std::cerr << Whh.grad().Debug() << std::endl; - std::cerr << bh.grad().Debug() << std::endl; - std::cerr << Why.grad().Debug() << std::endl; - std::cerr << by.grad().Debug() << std::endl; - std::cerr << Wxh.grad().Debug() << std::endl; - std::cerr << h0.grad().Debug() << std::endl; +#if 1 + std::cerr << g["Whh"].grad().Debug() << std::endl; + std::cerr << g["bh"].grad().Debug() << std::endl; + std::cerr << g["Why"].grad().Debug() << std::endl; + std::cerr << g["by"].grad().Debug() << std::endl; + std::cerr << g["Wxh"].grad().Debug() << std::endl; + std::cerr << g["h0"].grad().Debug() << std::endl; #endif return 0; diff --git a/src/validate_mnist.cu b/src/validate_mnist.cu index f71be921..01fb4c50 100644 --- a/src/validate_mnist.cu +++ b/src/validate_mnist.cu @@ -10,7 +10,7 @@ const size_t IMAGE_SIZE = 784; const size_t LABEL_SIZE = 10; int BATCH_SIZE = 10000; -ExpressionGraph build_graph() { +ExpressionGraph build_graph(int cudaDevice) { std::cerr << "Loading model params..."; NpzConverter converter("../scripts/test_model_single/model.npz"); @@ -22,7 +22,7 @@ ExpressionGraph build_graph() { std::cerr << "Building model..."; - ExpressionGraph g; + ExpressionGraph g(cudaDevice); auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x"); auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y"); @@ -46,15 +46,13 @@ ExpressionGraph build_graph() { } int main(int argc, char** argv) { - - cudaSetDevice(0); - + std::cerr << "Loading test set..."; std::vector testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE); std::vector testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE); std::cerr << "Done." << std::endl; - ExpressionGraph g = build_graph(); + ExpressionGraph g = build_graph(0); Tensor xt({BATCH_SIZE, IMAGE_SIZE}); Tensor yt({BATCH_SIZE, LABEL_SIZE}); diff --git a/src/validate_mnist_batch.cu b/src/validate_mnist_batch.cu index 50ab97b5..754d254c 100644 --- a/src/validate_mnist_batch.cu +++ b/src/validate_mnist_batch.cu @@ -7,9 +7,7 @@ using namespace marian; using namespace keywords; int main(int argc, char** argv) { - - cudaSetDevice(0); - + const size_t IMAGE_SIZE = 784; const size_t LABEL_SIZE = 10; const size_t BATCH_SIZE = 24; @@ -59,7 +57,7 @@ int main(int argc, char** argv) { std::cerr << "\tDone." << std::endl; - ExpressionGraph g; + ExpressionGraph g(0); auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X"); auto y = g.input(shape={whatevs, LABEL_SIZE}, name="Y"); diff --git a/src/vocab.cpp b/src/vocab.cpp index 705c21b2..25c28386 100644 --- a/src/vocab.cpp +++ b/src/vocab.cpp @@ -1,3 +1,4 @@ +#include #include "vocab.h" using namespace std; @@ -23,7 +24,6 @@ inline std::vector Tokenize(const std::string& str, return tokens; } -//////////////////////////////////////////////////////// size_t Vocab::GetOrCreate(const std::string &word) { @@ -39,6 +39,12 @@ size_t Vocab::GetOrCreate(const std::string &word) return id; } +size_t Vocab::Get(const std::string &word) const +{ + Coll::const_iterator iter = coll_.find(word); + return iter->second; +} + std::vector Vocab::ProcessSentence(const std::string &sentence) { vector toks = Tokenize(sentence); diff --git a/src/vocab.h b/src/vocab.h index 5e055511..3127083d 100644 --- a/src/vocab.h +++ b/src/vocab.h @@ -7,9 +7,22 @@ class Vocab { public: - size_t GetOrCreate(const std::string &word); + Vocab() { + GetOrCreate("__UNK__"); + GetOrCreate("__PAD__"); + GetOrCreate("__EOS__"); + } + virtual ~Vocab() {} + +public: + size_t Size() const { return coll_.size(); } + size_t Get(const std::string &word) const; + size_t GetOrCreate(const std::string &word); std::vector ProcessSentence(const std::string &sentence); + size_t GetUNK() const { return Get("__UNK__"); } + size_t GetPAD() const { return Get("__PAD__"); } + size_t GetEOS() const { return Get("__EOS__"); } protected: typedef std::unordered_map Coll; Coll coll_;