mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
Separated graph computatiom from data in the e-d.
This commit is contained in:
commit
5ecd222d82
@ -18,7 +18,7 @@ struct Chainable {
|
|||||||
|
|
||||||
virtual void allocate(size_t) = 0;
|
virtual void allocate(size_t) = 0;
|
||||||
virtual std::string graphviz() = 0;
|
virtual std::string graphviz() = 0;
|
||||||
|
virtual const std::string &name() const = 0;
|
||||||
|
|
||||||
virtual const Shape& shape() = 0;
|
virtual const Shape& shape() = 0;
|
||||||
virtual DataType &val() = 0;
|
virtual DataType &val() = 0;
|
||||||
@ -33,4 +33,4 @@ typedef std::shared_ptr<ChainableStack> ChainableStackPtr;
|
|||||||
typedef std::shared_ptr<Chainable<Tensor>> ChainPtr;
|
typedef std::shared_ptr<Chainable<Tensor>> ChainPtr;
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -37,5 +37,14 @@ std::string Expr::Debug() const
|
|||||||
strm << marian::Debug(shape);
|
strm << marian::Debug(shape);
|
||||||
return strm.str();
|
return strm.str();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////
|
||||||
|
ExpressionGraph::ExpressionGraph(int cudaDevice)
|
||||||
|
: stack_(new ChainableStack)
|
||||||
|
{
|
||||||
|
std::srand (time(NULL));
|
||||||
|
cudaSetDevice(0);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -38,9 +38,7 @@ class Expr {
|
|||||||
|
|
||||||
class ExpressionGraph {
|
class ExpressionGraph {
|
||||||
public:
|
public:
|
||||||
ExpressionGraph()
|
ExpressionGraph(int cudaDevice);
|
||||||
: stack_(new ChainableStack)
|
|
||||||
{}
|
|
||||||
|
|
||||||
void forward(size_t batchSize) {
|
void forward(size_t batchSize) {
|
||||||
for(auto&& v : *stack_) {
|
for(auto&& v : *stack_) {
|
||||||
@ -54,10 +52,10 @@ class ExpressionGraph {
|
|||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "digraph ExpressionGraph {" << std::endl;
|
ss << "digraph ExpressionGraph {" << std::endl;
|
||||||
ss << "rankdir=BT" << std::endl;
|
ss << "rankdir=BT" << std::endl;
|
||||||
|
|
||||||
typedef typename ChainableStack::reverse_iterator It;
|
typedef typename ChainableStack::reverse_iterator It;
|
||||||
for(It it = stack_->rbegin(); it != stack_->rend(); ++it)
|
for(It it = stack_->rbegin(); it != stack_->rend(); ++it) {
|
||||||
ss << (*it)->graphviz();
|
ss << (*it)->graphviz();
|
||||||
|
}
|
||||||
ss << "}" << std::endl;
|
ss << "}" << std::endl;
|
||||||
return ss.str();
|
return ss.str();
|
||||||
}
|
}
|
||||||
|
@ -67,6 +67,8 @@ class Node : public Chainable<Tensor>,
|
|||||||
virtual const Shape& shape() {
|
virtual const Shape& shape() {
|
||||||
return shape_;
|
return shape_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const std::string &name() const { return name_; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Shape shape_;
|
Shape shape_;
|
||||||
|
@ -23,8 +23,6 @@ SGD::SGD(ExpressionGraph& g, float eta,
|
|||||||
|
|
||||||
void SGD::Run()
|
void SGD::Run()
|
||||||
{
|
{
|
||||||
std::srand ( unsigned ( std::time(0) ) );
|
|
||||||
|
|
||||||
size_t numExamples = xData_.size()/ numFeatures_;
|
size_t numExamples = xData_.size()/ numFeatures_;
|
||||||
Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f);
|
Tensor xt({(int)maxBatchSize_, (int)numExamples}, 0.0f);
|
||||||
Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f);
|
Tensor yt({(int)maxBatchSize_, (int)numClasses_}, 0.0f);
|
||||||
|
@ -4,7 +4,6 @@
|
|||||||
#include "vocab.h"
|
#include "vocab.h"
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
cudaSetDevice(0);
|
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
using namespace marian;
|
using namespace marian;
|
||||||
@ -22,7 +21,7 @@ int main(int argc, char** argv) {
|
|||||||
std::vector<Expr> Y;
|
std::vector<Expr> Y;
|
||||||
std::vector<Expr> H;
|
std::vector<Expr> H;
|
||||||
|
|
||||||
ExpressionGraph g;
|
ExpressionGraph g(0);
|
||||||
|
|
||||||
for (int t = 0; t < num_inputs; ++t) {
|
for (int t = 0; t < num_inputs; ++t) {
|
||||||
X.emplace_back(g.input(shape={batch_size, input_size}));
|
X.emplace_back(g.input(shape={batch_size, input_size}));
|
||||||
|
@ -16,7 +16,7 @@ int main(int argc, char** argv) {
|
|||||||
using namespace marian;
|
using namespace marian;
|
||||||
using namespace keywords;
|
using namespace keywords;
|
||||||
|
|
||||||
ExpressionGraph g;
|
ExpressionGraph g(0);
|
||||||
|
|
||||||
Expr x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
Expr x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
||||||
Expr y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
Expr y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
||||||
|
@ -1,6 +1,8 @@
|
|||||||
|
|
||||||
#include "marian.h"
|
#include "marian.h"
|
||||||
#include "mnist.h"
|
#include "mnist.h"
|
||||||
|
#include "vocab.h"
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
using namespace marian;
|
using namespace marian;
|
||||||
using namespace keywords;
|
using namespace keywords;
|
||||||
@ -32,10 +34,10 @@ ExpressionGraph build_graph(int cuda_device) {
|
|||||||
Y.emplace_back(named(g.input(shape={batch_size, output_size}), ss.str()));
|
Y.emplace_back(named(g.input(shape={batch_size, output_size}), ss.str()));
|
||||||
}
|
}
|
||||||
|
|
||||||
Expr Wxh = g.param(shape={input_size, hidden_size}, init=uniform(), name="Wxh");
|
Expr Wxh = named(g.param(shape={input_size, hidden_size}, init=uniform()), "Wxh");
|
||||||
Expr Whh = g.param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh");
|
Expr Whh = named(g.param(shape={hidden_size, hidden_size}, init=uniform()), "Whh");
|
||||||
Expr bh = g.param(shape={1, hidden_size}, init=uniform(), name="bh");
|
Expr bh = named(g.param(shape={1, hidden_size}, init=uniform()), "bh");
|
||||||
Expr h0 = g.param(shape={1, hidden_size}, init=uniform(), name="h0");
|
Expr h0 = named(g.param(shape={1, hidden_size}, init=uniform()), "h0");
|
||||||
|
|
||||||
std::cerr << "Building encoder RNN..." << std::endl;
|
std::cerr << "Building encoder RNN..." << std::endl;
|
||||||
H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh));
|
H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh));
|
||||||
@ -43,9 +45,9 @@ ExpressionGraph build_graph(int cuda_device) {
|
|||||||
H.emplace_back(tanh(dot(X[t], Wxh) + dot(H[t-1], Whh) + bh));
|
H.emplace_back(tanh(dot(X[t], Wxh) + dot(H[t-1], Whh) + bh));
|
||||||
}
|
}
|
||||||
|
|
||||||
Expr Wxh_d = g.param(shape={output_size, hidden_size}, init=uniform(), name="Wxh_d");
|
Expr Wxh_d = named(g.param(shape={output_size, hidden_size}, init=uniform()), "Wxh_d");
|
||||||
Expr Whh_d = g.param(shape={hidden_size, hidden_size}, init=uniform(), name="Whh_d");
|
Expr Whh_d = named(g.param(shape={hidden_size, hidden_size}, init=uniform()), "Whh_d");
|
||||||
Expr bh_d = g.param(shape={1, hidden_size}, init=uniform(), name="bh_d");
|
Expr bh_d = named(g.param(shape={1, hidden_size}, init=uniform()), "bh_d");
|
||||||
|
|
||||||
std::cerr << "Building decoder RNN..." << std::endl;
|
std::cerr << "Building decoder RNN..." << std::endl;
|
||||||
auto h0_d = H[num_inputs];
|
auto h0_d = H[num_inputs];
|
||||||
@ -54,8 +56,8 @@ ExpressionGraph build_graph(int cuda_device) {
|
|||||||
S.emplace_back(tanh(dot(Y[t], Wxh_d) + dot(S[t-1], Whh_d) + bh_d));
|
S.emplace_back(tanh(dot(Y[t], Wxh_d) + dot(S[t-1], Whh_d) + bh_d));
|
||||||
}
|
}
|
||||||
|
|
||||||
Expr Why = g.param(shape={hidden_size, output_size}, init=uniform(), name="Why");
|
Expr Why = named(g.param(shape={hidden_size, output_size}, init=uniform()), "Why");
|
||||||
Expr by = g.param(shape={1, output_size}, init=uniform(), name="by");
|
Expr by = named(g.param(shape={1, output_size}, init=uniform()), "by");
|
||||||
|
|
||||||
std::cerr << "Building output layer..." << std::endl;
|
std::cerr << "Building output layer..." << std::endl;
|
||||||
std::vector<Expr> Yp;
|
std::vector<Expr> Yp;
|
||||||
@ -66,29 +68,40 @@ ExpressionGraph build_graph(int cuda_device) {
|
|||||||
Yp.emplace_back(named(softmax_fast(dot(S[t-1], Why) + by), "pred"));
|
Yp.emplace_back(named(softmax_fast(dot(S[t-1], Why) + by), "pred"));
|
||||||
cross_entropy = cross_entropy + sum(Y[t] * log(Yp[t]), axis=1);
|
cross_entropy = cross_entropy + sum(Y[t] * log(Yp[t]), axis=1);
|
||||||
}
|
}
|
||||||
auto graph = -mean(cross_entropy, axis=0, name="cost");
|
auto cost = named(-mean(cross_entropy, axis=0), "cost");
|
||||||
|
|
||||||
std::cerr << "Done." << std::endl;
|
std::cerr << "Done." << std::endl;
|
||||||
|
|
||||||
return g;
|
return g;
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
|
|
||||||
|
int main(int argc, char** argv) {
|
||||||
|
#if 1
|
||||||
|
std::cerr << "Loading the data... ";
|
||||||
|
Vocab sourceVocab, targetVocab;
|
||||||
|
|
||||||
// read parallel corpus from file
|
// read parallel corpus from file
|
||||||
std::fstream sourceFile("../examples/mt/dev/newstest2013.de");
|
std::fstream sourceFile("../examples/mt/dev/newstest2013.de");
|
||||||
std::fstream targetFile("../examples/mt/dev/newstest2013.en");
|
std::fstream targetFile("../examples/mt/dev/newstest2013.en");
|
||||||
|
|
||||||
|
std::vector<std::vector<size_t> > source_sentences, target_sentences;
|
||||||
std::string sourceLine, targetLine;
|
std::string sourceLine, targetLine;
|
||||||
while (getline(sourceFile, sourceLine)) {
|
while (getline(sourceFile, sourceLine)) {
|
||||||
getline(targetFile, targetLine);
|
getline(targetFile, targetLine);
|
||||||
std::vector<size_t> sourceIds = sourceVocab.ProcessSentence(sourceLine);
|
std::vector<size_t> sourceIds = sourceVocab.ProcessSentence(sourceLine);
|
||||||
std::vector<size_t> targetIds = sourceVocab.ProcessSentence(targetLine);
|
std::vector<size_t> targetIds = targetVocab.ProcessSentence(targetLine);
|
||||||
|
source_sentences.push_back(sourceIds);
|
||||||
|
target_sentences.push_back(targetIds);
|
||||||
}
|
}
|
||||||
|
std::cerr << "Done." << std::endl;
|
||||||
|
std::cerr << source_sentences.size()
|
||||||
|
<< " sentence pairs read." << std::endl;
|
||||||
|
std::cerr << "Source vocabulary size: " << sourceVocab.Size() << std::endl;
|
||||||
|
std::cerr << "Target vocabulary size: " << targetVocab.Size() << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
|
||||||
|
|
||||||
ExpressionGraph g = build_graph(0);
|
ExpressionGraph g = build_graph(0);
|
||||||
|
|
||||||
// For the stop symbol.
|
// For the stop symbol.
|
||||||
@ -109,6 +122,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "X" << t;
|
ss << "X" << t;
|
||||||
|
if (!g.has_node(ss.str())) std::cerr << "No node " << ss.str() << "!!!" << std::endl;
|
||||||
g[ss.str()] = Xt;
|
g[ss.str()] = Xt;
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -128,6 +142,7 @@ int main(int argc, char** argv) {
|
|||||||
|
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << "Y" << t;
|
ss << "Y" << t;
|
||||||
|
if (!g.has_node(ss.str())) std::cerr << "No node " << ss.str() << "!!!" << std::endl;
|
||||||
g[ss.str()] = Yt;
|
g[ss.str()] = Yt;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -140,18 +155,18 @@ int main(int argc, char** argv) {
|
|||||||
g.backward();
|
g.backward();
|
||||||
std::cerr << "Done" << std::endl;
|
std::cerr << "Done" << std::endl;
|
||||||
|
|
||||||
std::cerr << g["graph"].val().Debug() << std::endl;
|
std::cerr << g["cost"].val().Debug() << std::endl;
|
||||||
|
|
||||||
std::cerr << g["X0"].val().Debug() << std::endl;
|
std::cerr << g["X0"].val().Debug() << std::endl;
|
||||||
std::cerr << g["Y0"].val().Debug() << std::endl;
|
std::cerr << g["Y0"].val().Debug() << std::endl;
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
std::cerr << Whh.grad().Debug() << std::endl;
|
std::cerr << g["Whh"].grad().Debug() << std::endl;
|
||||||
std::cerr << bh.grad().Debug() << std::endl;
|
std::cerr << g["bh"].grad().Debug() << std::endl;
|
||||||
std::cerr << Why.grad().Debug() << std::endl;
|
std::cerr << g["Why"].grad().Debug() << std::endl;
|
||||||
std::cerr << by.grad().Debug() << std::endl;
|
std::cerr << g["by"].grad().Debug() << std::endl;
|
||||||
std::cerr << Wxh.grad().Debug() << std::endl;
|
std::cerr << g["Wxh"].grad().Debug() << std::endl;
|
||||||
std::cerr << h0.grad().Debug() << std::endl;
|
std::cerr << g["h0"].grad().Debug() << std::endl;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -10,7 +10,7 @@ const size_t IMAGE_SIZE = 784;
|
|||||||
const size_t LABEL_SIZE = 10;
|
const size_t LABEL_SIZE = 10;
|
||||||
int BATCH_SIZE = 10000;
|
int BATCH_SIZE = 10000;
|
||||||
|
|
||||||
ExpressionGraph build_graph() {
|
ExpressionGraph build_graph(int cudaDevice) {
|
||||||
std::cerr << "Loading model params...";
|
std::cerr << "Loading model params...";
|
||||||
NpzConverter converter("../scripts/test_model_single/model.npz");
|
NpzConverter converter("../scripts/test_model_single/model.npz");
|
||||||
|
|
||||||
@ -22,7 +22,7 @@ ExpressionGraph build_graph() {
|
|||||||
|
|
||||||
std::cerr << "Building model...";
|
std::cerr << "Building model...";
|
||||||
|
|
||||||
ExpressionGraph g;
|
ExpressionGraph g(cudaDevice);
|
||||||
auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
auto x = named(g.input(shape={whatevs, IMAGE_SIZE}), "x");
|
||||||
auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
auto y = named(g.input(shape={whatevs, LABEL_SIZE}), "y");
|
||||||
|
|
||||||
@ -46,15 +46,13 @@ ExpressionGraph build_graph() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
cudaSetDevice(0);
|
|
||||||
|
|
||||||
std::cerr << "Loading test set...";
|
std::cerr << "Loading test set...";
|
||||||
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
|
std::vector<float> testImages = datasets::mnist::ReadImages("../examples/mnist/t10k-images-idx3-ubyte", BATCH_SIZE, IMAGE_SIZE);
|
||||||
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
|
std::vector<float> testLabels = datasets::mnist::ReadLabels("../examples/mnist/t10k-labels-idx1-ubyte", BATCH_SIZE, LABEL_SIZE);
|
||||||
std::cerr << "Done." << std::endl;
|
std::cerr << "Done." << std::endl;
|
||||||
|
|
||||||
ExpressionGraph g = build_graph();
|
ExpressionGraph g = build_graph(0);
|
||||||
|
|
||||||
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
|
Tensor xt({BATCH_SIZE, IMAGE_SIZE});
|
||||||
Tensor yt({BATCH_SIZE, LABEL_SIZE});
|
Tensor yt({BATCH_SIZE, LABEL_SIZE});
|
||||||
|
@ -7,9 +7,7 @@ using namespace marian;
|
|||||||
using namespace keywords;
|
using namespace keywords;
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
|
|
||||||
cudaSetDevice(0);
|
|
||||||
|
|
||||||
const size_t IMAGE_SIZE = 784;
|
const size_t IMAGE_SIZE = 784;
|
||||||
const size_t LABEL_SIZE = 10;
|
const size_t LABEL_SIZE = 10;
|
||||||
const size_t BATCH_SIZE = 24;
|
const size_t BATCH_SIZE = 24;
|
||||||
@ -59,7 +57,7 @@ int main(int argc, char** argv) {
|
|||||||
std::cerr << "\tDone." << std::endl;
|
std::cerr << "\tDone." << std::endl;
|
||||||
|
|
||||||
|
|
||||||
ExpressionGraph g;
|
ExpressionGraph g(0);
|
||||||
|
|
||||||
auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
|
auto x = g.input(shape={whatevs, IMAGE_SIZE}, name="X");
|
||||||
auto y = g.input(shape={whatevs, LABEL_SIZE}, name="Y");
|
auto y = g.input(shape={whatevs, LABEL_SIZE}, name="Y");
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
#include <limits>
|
||||||
#include "vocab.h"
|
#include "vocab.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
@ -23,7 +24,6 @@ inline std::vector<std::string> Tokenize(const std::string& str,
|
|||||||
|
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
size_t Vocab::GetOrCreate(const std::string &word)
|
size_t Vocab::GetOrCreate(const std::string &word)
|
||||||
{
|
{
|
||||||
@ -39,6 +39,12 @@ size_t Vocab::GetOrCreate(const std::string &word)
|
|||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
size_t Vocab::Get(const std::string &word) const
|
||||||
|
{
|
||||||
|
Coll::const_iterator iter = coll_.find(word);
|
||||||
|
return iter->second;
|
||||||
|
}
|
||||||
|
|
||||||
std::vector<size_t> Vocab::ProcessSentence(const std::string &sentence)
|
std::vector<size_t> Vocab::ProcessSentence(const std::string &sentence)
|
||||||
{
|
{
|
||||||
vector<string> toks = Tokenize(sentence);
|
vector<string> toks = Tokenize(sentence);
|
||||||
|
15
src/vocab.h
15
src/vocab.h
@ -7,9 +7,22 @@
|
|||||||
class Vocab
|
class Vocab
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
size_t GetOrCreate(const std::string &word);
|
Vocab() {
|
||||||
|
GetOrCreate("__UNK__");
|
||||||
|
GetOrCreate("__PAD__");
|
||||||
|
GetOrCreate("__EOS__");
|
||||||
|
}
|
||||||
|
virtual ~Vocab() {}
|
||||||
|
|
||||||
|
public:
|
||||||
|
size_t Size() const { return coll_.size(); }
|
||||||
|
size_t Get(const std::string &word) const;
|
||||||
|
size_t GetOrCreate(const std::string &word);
|
||||||
std::vector<size_t> ProcessSentence(const std::string &sentence);
|
std::vector<size_t> ProcessSentence(const std::string &sentence);
|
||||||
|
|
||||||
|
size_t GetUNK() const { return Get("__UNK__"); }
|
||||||
|
size_t GetPAD() const { return Get("__PAD__"); }
|
||||||
|
size_t GetEOS() const { return Get("__EOS__"); }
|
||||||
protected:
|
protected:
|
||||||
typedef std::unordered_map<std::string, size_t> Coll;
|
typedef std::unordered_map<std::string, size_t> Coll;
|
||||||
Coll coll_;
|
Coll coll_;
|
||||||
|
Loading…
Reference in New Issue
Block a user