mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
parse parallel data
This commit is contained in:
parent
383b82c6f9
commit
1833ea1eab
@ -9,6 +9,7 @@ cuda_add_library(marian_lib
|
||||
tensor.cu
|
||||
tensor_operators.cu
|
||||
expression_operators.cu
|
||||
vocab.cpp
|
||||
)
|
||||
|
||||
target_link_libraries(marian_lib)
|
||||
|
18
src/test.cu
18
src/test.cu
@ -1,13 +1,17 @@
|
||||
|
||||
#include <fstream>
|
||||
#include "marian.h"
|
||||
#include "mnist.h"
|
||||
#include "vocab.h"
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
cudaSetDevice(0);
|
||||
|
||||
using namespace std;
|
||||
using namespace marian;
|
||||
using namespace keywords;
|
||||
|
||||
Vocab sourceVocab, targetVocab;
|
||||
|
||||
int input_size = 10;
|
||||
int output_size = 2;
|
||||
int batch_size = 25;
|
||||
@ -30,6 +34,18 @@ int main(int argc, char** argv) {
|
||||
Expr bh = g.param(shape={1, hidden_size}, init=uniform(), name="bh");
|
||||
Expr h0 = g.param(shape={1, hidden_size}, init=uniform(), name="h0");
|
||||
|
||||
// read parallel corpus from file
|
||||
std::fstream sourceFile("../examples/mt/dev/newstest2013.de");
|
||||
std::fstream targetFile("../examples/mt/dev/newstest2013.en");
|
||||
|
||||
string sourceLine, targetLine;
|
||||
while (getline(sourceFile, sourceLine)) {
|
||||
getline(targetFile, targetLine);
|
||||
|
||||
std::vector<size_t> sourceIds = sourceVocab.ProcessSentence(sourceLine);
|
||||
std::vector<size_t> targetIds = sourceVocab.ProcessSentence(targetLine);
|
||||
}
|
||||
|
||||
std::cerr << "Building RNN..." << std::endl;
|
||||
H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh));
|
||||
for (int t = 1; t < num_inputs; ++t) {
|
||||
|
Loading…
Reference in New Issue
Block a user