From 16e4220f17133b93c3a99f4e2cbd8b36690130f7 Mon Sep 17 00:00:00 2001 From: Nicola Bertoldi Date: Wed, 14 Aug 2013 12:20:51 +0200 Subject: [PATCH] functions to handle with Document-Level Translation tags --- moses/Util.cpp | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++ moses/Util.h | 3 ++ 2 files changed, 77 insertions(+) diff --git a/moses/Util.cpp b/moses/Util.cpp index f92c32dbb..2c6540a55 100644 --- a/moses/Util.cpp +++ b/moses/Util.cpp @@ -107,6 +107,80 @@ double GetUserTime() return g_timer.get_elapsed_time(); } +std::vector< std::map > ProcessAndStripDLT(std::string &line) +{ + std::vector< std::map > meta; + std::string lline = ToLower(line); + bool check_dlt = true; + + std::cerr << "GLOBAL START" << endl; + while (check_dlt) + { +// std::cerr << "index:|" << index << "|" << endl; + size_t start = lline.find(""); + if (close == std::string::npos) + { // error: dlt tag is not ended + check_dlt = false; + continue; + } + std::string dlt = Trim(lline.substr(start+4, close-start-4)); +// std::cerr << "dlt:|" << dlt << "|" << endl; + line.erase(start,close+2); + lline.erase(start,close+2); + + if (dlt == "") continue; + + std::map tmp_meta; + for (size_t i = 1; i < dlt.size(); i++) { + if (dlt[i] == '=') { + std::string label = dlt.substr(0, i); + std::string val = dlt.substr(i+1); +// std::cerr << "label:|" << label << "|" << endl; +// std::cerr << "val:|" << val << "|" << endl; + if (val[0] == '"') { + val = val.substr(1); + // it admits any double quotation mark in the value of the attribute + // it assumes that just one attribute is present in the tag, + // it assumes that the value starts and ends with double quotation mark + size_t close = val.rfind('"'); + if (close == std::string::npos) { + TRACE_ERR("SGML parse error: missing \"\n"); + dlt = ""; + i = 0; + } else { + dlt = val.substr(close+1); + val = val.substr(0, close); + i = 0; + } + } else { + size_t close = val.find(' '); + if (close == std::string::npos) { + dlt = ""; + i = 0; + } else { + dlt = val.substr(close+1); + val = val.substr(0, close); + } + } + label = Trim(label); + dlt = Trim(dlt); + + tmp_meta[label] = val; + } + } + + meta.push_back(tmp_meta); + } + std::cerr << "GLOBAL END" << endl; + return meta; +} + std::map ProcessAndStripSGML(std::string &line) { std::map meta; diff --git a/moses/Util.h b/moses/Util.h index 5517598f1..b5031b7a7 100644 --- a/moses/Util.h +++ b/moses/Util.h @@ -372,6 +372,9 @@ void ResetUserTime(); void PrintUserTime(const std::string &message); double GetUserTime(); +// dump SGML parser for tags +std::vector< std::map > ProcessAndStripDLT(std::string &line); + // dump SGML parser for tags std::map ProcessAndStripSGML(std::string &line);