2016-04-27 11:42:07 +03:00
|
|
|
/*
|
|
|
|
* Sentence.cpp
|
|
|
|
*
|
|
|
|
* Created on: 14 Dec 2015
|
|
|
|
* Author: hieu
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "Sentence.h"
|
2016-08-27 01:08:21 +03:00
|
|
|
#include "../System.h"
|
2016-04-27 11:42:07 +03:00
|
|
|
|
|
|
|
using namespace std;
|
|
|
|
|
|
|
|
namespace Moses2
|
|
|
|
{
|
|
|
|
namespace SCFG
|
|
|
|
{
|
2016-04-27 21:59:03 +03:00
|
|
|
Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab,
|
|
|
|
const System &system, const std::string &str, long translationId)
|
|
|
|
{
|
2016-06-22 18:43:43 +03:00
|
|
|
//cerr << "SCFG Sentence" << endl;
|
2016-04-27 21:59:03 +03:00
|
|
|
|
|
|
|
Sentence *ret;
|
|
|
|
|
2016-08-27 01:08:21 +03:00
|
|
|
if (system.options.input.xml_policy) {
|
|
|
|
// xml
|
|
|
|
ret = CreateFromStringXML(pool, vocab, system, str);
|
2016-08-29 15:13:19 +03:00
|
|
|
//cerr << "ret=" << ret->Debug(system) << endl;
|
2016-08-27 01:08:21 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
std::vector<std::string> toks = Tokenize(str);
|
|
|
|
size_t size = toks.size() + 2;
|
|
|
|
|
|
|
|
ret = new (pool.Allocate<SCFG::Sentence>()) Sentence(pool, size);
|
|
|
|
ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
|
2016-04-27 21:59:03 +03:00
|
|
|
|
2016-08-27 01:08:21 +03:00
|
|
|
}
|
2016-04-27 21:59:03 +03:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
2016-04-27 11:42:07 +03:00
|
|
|
|
2016-08-27 01:08:21 +03:00
|
|
|
Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab,
|
|
|
|
const System &system, const std::string &str)
|
|
|
|
{
|
|
|
|
Sentence *ret;
|
|
|
|
|
|
|
|
vector<XMLOption*> xmlOptions;
|
|
|
|
pugi::xml_document doc;
|
|
|
|
|
|
|
|
string str2 = "<xml>" + str + "</xml>";
|
|
|
|
pugi::xml_parse_result result = doc.load(str2.c_str(),
|
2016-09-28 17:15:07 +03:00
|
|
|
pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments);
|
2016-08-27 01:08:21 +03:00
|
|
|
pugi::xml_node topNode = doc.child("xml");
|
|
|
|
|
|
|
|
std::vector<std::string> toks;
|
|
|
|
XMLParse(pool, system, 0, topNode, toks, xmlOptions);
|
|
|
|
|
|
|
|
// debug
|
|
|
|
/*
|
|
|
|
cerr << "xmloptions:" << endl;
|
|
|
|
for (size_t i = 0; i < xmlOptions.size(); ++i) {
|
|
|
|
cerr << xmlOptions[i]->Debug(system) << endl;
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
|
|
|
|
// create words
|
|
|
|
size_t size = toks.size() + 2;
|
|
|
|
ret = new (pool.Allocate<Sentence>()) Sentence(pool, size);
|
|
|
|
ret->PhraseImplTemplate<SCFG::Word>::CreateFromString(vocab, system, toks, true);
|
|
|
|
|
|
|
|
// xml
|
|
|
|
for(size_t i=0; i<xmlOptions.size(); i++) {
|
|
|
|
const XMLOption *xmlOption = xmlOptions[i];
|
|
|
|
if (strcmp(xmlOption->GetNodeName(), "ne") == 0) {
|
|
|
|
FactorType placeholderFactor = system.options.input.placeholder_factor;
|
|
|
|
UTIL_THROW_IF2(placeholderFactor == NOT_FOUND,
|
|
|
|
"Placeholder XML in input. Must have argument -placeholder-factor [NUM]");
|
|
|
|
UTIL_THROW_IF2(xmlOption->phraseSize != 1,
|
|
|
|
"Placeholder must only cover 1 word");
|
|
|
|
|
|
|
|
const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false);
|
2016-08-29 15:13:19 +03:00
|
|
|
(*ret)[xmlOption->startPos + 1][placeholderFactor] = factor;
|
2016-08-27 01:08:21 +03:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
// default - forced translation. Add to class variable
|
|
|
|
ret->AddXMLOption(system, xmlOption);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//cerr << "ret=" << ret->Debug(system) << endl;
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Sentence::XMLParse(
|
|
|
|
MemPool &pool,
|
|
|
|
const System &system,
|
|
|
|
size_t depth,
|
|
|
|
const pugi::xml_node &parentNode,
|
|
|
|
std::vector<std::string> &toks,
|
|
|
|
vector<XMLOption*> &xmlOptions)
|
|
|
|
{ // pugixml
|
|
|
|
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) {
|
|
|
|
string nodeName = childNode.name();
|
|
|
|
//cerr << depth << " nodeName=" << nodeName << endl;
|
|
|
|
|
|
|
|
int startPos = toks.size();
|
|
|
|
|
|
|
|
string value = childNode.value();
|
|
|
|
if (!value.empty()) {
|
|
|
|
//cerr << depth << "childNode text=" << value << endl;
|
|
|
|
std::vector<std::string> subPhraseToks = Tokenize(value);
|
|
|
|
for (size_t i = 0; i < subPhraseToks.size(); ++i) {
|
|
|
|
toks.push_back(subPhraseToks[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!nodeName.empty()) {
|
|
|
|
XMLOption *xmlOption = new (pool.Allocate<XMLOption>()) XMLOption(pool, nodeName, startPos);
|
|
|
|
|
|
|
|
pugi::xml_attribute attr;
|
|
|
|
attr = childNode.attribute("translation");
|
|
|
|
if (!attr.empty()) {
|
|
|
|
xmlOption->SetTranslation(pool, attr.as_string());
|
|
|
|
}
|
|
|
|
|
|
|
|
attr = childNode.attribute("entity");
|
|
|
|
if (!attr.empty()) {
|
|
|
|
xmlOption->SetEntity(pool, attr.as_string());
|
|
|
|
}
|
|
|
|
|
|
|
|
attr = childNode.attribute("prob");
|
|
|
|
if (!attr.empty()) {
|
|
|
|
xmlOption->prob = attr.as_float();
|
|
|
|
}
|
|
|
|
|
|
|
|
xmlOptions.push_back(xmlOption);
|
|
|
|
|
|
|
|
// recursively call this function. For proper recursive trees
|
|
|
|
XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions);
|
|
|
|
|
|
|
|
size_t endPos = toks.size();
|
|
|
|
xmlOption->phraseSize = endPos - startPos;
|
|
|
|
|
|
|
|
/*
|
|
|
|
cerr << "xmlOptions=";
|
|
|
|
xmlOption->Debug(cerr, system);
|
|
|
|
cerr << endl;
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-04-27 11:42:07 +03:00
|
|
|
}
|
|
|
|
} /* namespace Moses2 */
|
|
|
|
|