mirror of
https://github.com/marian-nmt/marian.git
synced 2024-09-17 09:47:34 +03:00
parse parallel data
This commit is contained in:
parent
383b82c6f9
commit
1833ea1eab
@ -9,6 +9,7 @@ cuda_add_library(marian_lib
|
|||||||
tensor.cu
|
tensor.cu
|
||||||
tensor_operators.cu
|
tensor_operators.cu
|
||||||
expression_operators.cu
|
expression_operators.cu
|
||||||
|
vocab.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(marian_lib)
|
target_link_libraries(marian_lib)
|
||||||
|
18
src/test.cu
18
src/test.cu
@ -1,13 +1,17 @@
|
|||||||
|
#include <fstream>
|
||||||
#include "marian.h"
|
#include "marian.h"
|
||||||
#include "mnist.h"
|
#include "mnist.h"
|
||||||
|
#include "vocab.h"
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
cudaSetDevice(0);
|
cudaSetDevice(0);
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
using namespace marian;
|
using namespace marian;
|
||||||
using namespace keywords;
|
using namespace keywords;
|
||||||
|
|
||||||
|
Vocab sourceVocab, targetVocab;
|
||||||
|
|
||||||
int input_size = 10;
|
int input_size = 10;
|
||||||
int output_size = 2;
|
int output_size = 2;
|
||||||
int batch_size = 25;
|
int batch_size = 25;
|
||||||
@ -30,6 +34,18 @@ int main(int argc, char** argv) {
|
|||||||
Expr bh = g.param(shape={1, hidden_size}, init=uniform(), name="bh");
|
Expr bh = g.param(shape={1, hidden_size}, init=uniform(), name="bh");
|
||||||
Expr h0 = g.param(shape={1, hidden_size}, init=uniform(), name="h0");
|
Expr h0 = g.param(shape={1, hidden_size}, init=uniform(), name="h0");
|
||||||
|
|
||||||
|
// read parallel corpus from file
|
||||||
|
std::fstream sourceFile("../examples/mt/dev/newstest2013.de");
|
||||||
|
std::fstream targetFile("../examples/mt/dev/newstest2013.en");
|
||||||
|
|
||||||
|
string sourceLine, targetLine;
|
||||||
|
while (getline(sourceFile, sourceLine)) {
|
||||||
|
getline(targetFile, targetLine);
|
||||||
|
|
||||||
|
std::vector<size_t> sourceIds = sourceVocab.ProcessSentence(sourceLine);
|
||||||
|
std::vector<size_t> targetIds = sourceVocab.ProcessSentence(targetLine);
|
||||||
|
}
|
||||||
|
|
||||||
std::cerr << "Building RNN..." << std::endl;
|
std::cerr << "Building RNN..." << std::endl;
|
||||||
H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh));
|
H.emplace_back(tanh(dot(X[0], Wxh) + dot(h0, Whh) + bh));
|
||||||
for (int t = 1; t < num_inputs; ++t) {
|
for (int t = 1; t < num_inputs; ++t) {
|
||||||
|
Loading…
Reference in New Issue
Block a user