Merge remote-tracking branch 'upstream/master'

This commit is contained in:
Prashant Mathur 2016-11-08 12:09:10 +01:00
commit a31fe2e80c
473 changed files with 54719 additions and 1855 deletions

View File

@ -7,5 +7,3 @@ into the source tree from elsewhere:
* "bjam-files" is taken from Boost.
* "util" and "lm" are taken from KenLM: https://github.com/kpu/kenlm

View File

@ -316,6 +316,8 @@ rephraser
contrib/c++tokenizer//tokenizer
contrib/expected-bleu-training//train-expected-bleu
contrib/expected-bleu-training//prepare-expected-bleu-training
contrib/moses2//programs
;
@ -339,3 +341,5 @@ if [ path.exists $(TOP)/dist ] && $(prefix) != dist {
local temp = [ _shell "mkdir -p $(TOP)/bin" ] ;
local temp = [ _shell "rm -f $(TOP)/bin/moses_chart" ] ;
local temp = [ _shell "cd $(TOP)/bin && ln -s moses moses_chart" ] ;
local temp = [ _shell "cd $(TOP)/bin && ln -s CreateProbingPT CreateProbingPT2" ] ;

View File

@ -23,7 +23,7 @@
#include <sys/stat.h>
#include <string>
#include "OnDiskWrapper.h"
#include "moses/Factor.h"
#include "moses/Util.h"
#include "util/exception.hh"
#include "util/string_stream.hh"
@ -219,42 +219,5 @@ uint64_t OnDiskWrapper::GetMisc(const std::string &key) const
return iter->second;
}
Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &factorsVec
, const Moses::Word &origWord) const
{
bool isNonTerminal = origWord.IsNonTerminal();
Word *newWord = new Word(isNonTerminal);
util::StringStream strme;
size_t factorType = factorsVec[0];
const Moses::Factor *factor = origWord.GetFactor(factorType);
UTIL_THROW_IF2(factor == NULL, "Expecting factor " << factorType);
strme << factor->GetString();
for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
size_t factorType = factorsVec[ind];
const Moses::Factor *factor = origWord.GetFactor(factorType);
if (factor == NULL) {
// can have less factors than factorType.size()
break;
}
UTIL_THROW_IF2(factor == NULL,
"Expecting factor " << factorType << " at position " << ind);
strme << "|" << factor->GetString();
} // for (size_t factorType
bool found;
uint64_t vocabId = m_vocab.GetVocabId(strme.str(), found);
if (!found) {
// factor not in phrase table -> phrse definately not in. exit
delete newWord;
return NULL;
} else {
newWord->SetVocabId(vocabId);
return newWord;
}
}
}

View File

@ -22,7 +22,6 @@
#include <fstream>
#include "Vocab.h"
#include "PhraseNode.h"
#include "moses/Word.h"
namespace OnDiskPt
{
@ -107,9 +106,6 @@ public:
uint64_t GetMisc(const std::string &key) const;
Word *ConvertFromMoses(const std::vector<Moses::FactorType> &factorsVec
, const Moses::Word &origWord) const;
};
}

View File

@ -21,8 +21,6 @@
#include <algorithm>
#include <iostream>
#include "moses/Util.h"
#include "moses/TargetPhrase.h"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "TargetPhrase.h"
#include "OnDiskWrapper.h"
#include "util/exception.hh"
@ -251,74 +249,6 @@ size_t TargetPhrase::WriteScoresToMemory(char *mem) const
return memUsed;
}
Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::FactorType> & inputFactors
, const std::vector<Moses::FactorType> &outputFactors
, const Vocab &vocab
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, bool isSyntax) const
{
Moses::TargetPhrase *ret = new Moses::TargetPhrase(&phraseDict);
// words
size_t phraseSize = GetSize();
UTIL_THROW_IF2(phraseSize == 0, "Target phrase cannot be empty"); // last word is lhs
if (isSyntax) {
--phraseSize;
}
for (size_t pos = 0; pos < phraseSize; ++pos) {
GetWord(pos).ConvertToMoses(outputFactors, vocab, ret->AddWord());
}
// alignments
// int index = 0;
Moses::AlignmentInfo::CollType alignTerm, alignNonTerm;
std::set<std::pair<size_t, size_t> > alignmentInfo;
const PhrasePtr sp = GetSourcePhrase();
for (size_t ind = 0; ind < m_align.size(); ++ind) {
const std::pair<size_t, size_t> &entry = m_align[ind];
alignmentInfo.insert(entry);
size_t sourcePos = entry.first;
size_t targetPos = entry.second;
if (GetWord(targetPos).IsNonTerminal()) {
alignNonTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
} else {
alignTerm.insert(std::pair<size_t,size_t>(sourcePos, targetPos));
}
}
ret->SetAlignTerm(alignTerm);
ret->SetAlignNonTerm(alignNonTerm);
if (isSyntax) {
Moses::Word *lhsTarget = new Moses::Word(true);
GetWord(GetSize() - 1).ConvertToMoses(outputFactors, vocab, *lhsTarget);
ret->SetTargetLHS(lhsTarget);
}
// set source phrase
Moses::Phrase mosesSP(Moses::Input);
for (size_t pos = 0; pos < sp->GetSize(); ++pos) {
sp->GetWord(pos).ConvertToMoses(inputFactors, vocab, mosesSP.AddWord());
}
// scores
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
// sparse features
ret->GetScoreBreakdown().Assign(&phraseDict, m_sparseFeatures);
// property
ret->SetProperties(m_property);
ret->EvaluateInIsolation(mosesSP, phraseDict.GetFeaturesToApply());
return ret;
}
uint64_t TargetPhrase::ReadOtherInfoFromFile(uint64_t filePos, std::fstream &fileTPColl)
{
assert(filePos == (uint64_t)fileTPColl.tellg());

View File

@ -102,21 +102,23 @@ public:
return m_scores[ind];
}
Moses::TargetPhrase *ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
, const std::vector<Moses::FactorType> &outputFactors
, const Vocab &vocab
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, bool isSyntax) const;
uint64_t ReadOtherInfoFromFile(uint64_t filePos, std::fstream &fileTPColl);
uint64_t ReadFromFile(std::fstream &fileTP);
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
const std::string &GetProperty() const {
return m_property;
}
void SetProperty(const std::string &value) {
m_property = value;
}
const std::string &GetSparseFeatures() const {
return m_sparseFeatures;
}
void SetSparseFeatures(const std::string &value) {
m_sparseFeatures = value;
}

View File

@ -21,8 +21,6 @@
#include <algorithm>
#include <iostream>
#include "moses/Util.h"
#include "moses/TargetPhraseCollection.h"
#include "moses/TranslationModel/PhraseDictionary.h"
#include "TargetPhraseCollection.h"
#include "Vocab.h"
#include "OnDiskWrapper.h"
@ -114,39 +112,6 @@ void TargetPhraseCollection::Save(OnDiskWrapper &onDiskWrapper)
}
Moses::TargetPhraseCollection::shared_ptr TargetPhraseCollection::ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
, const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, Vocab &vocab
, bool isSyntax) const
{
Moses::TargetPhraseCollection::shared_ptr ret;
ret.reset(new Moses::TargetPhraseCollection);
CollType::const_iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
const TargetPhrase &tp = **iter;
Moses::TargetPhrase *mosesPhrase
= tp.ConvertToMoses(inputFactors, outputFactors, vocab,
phraseDict, weightT, isSyntax);
/*
// debugging output
stringstream strme;
strme << filePath << " " << *mosesPhrase;
mosesPhrase->SetDebugOutput(strme.str());
*/
ret->Add(mosesPhrase);
}
ret->Sort(true, phraseDict.GetTableLimit());
return ret;
}
void TargetPhraseCollection::ReadFromFile(size_t tableLimit, uint64_t filePos, OnDiskWrapper &onDiskWrapper)
{
fstream &fileTPColl = onDiskWrapper.GetFileTargetColl();

View File

@ -21,7 +21,6 @@
#include "TargetPhrase.h"
#include "Vocab.h"
#include "moses/TargetPhraseCollection.h"
#include <boost/shared_ptr.hpp>
namespace Moses
@ -74,12 +73,6 @@ public:
uint64_t GetFilePos() const;
Moses::TargetPhraseCollection::shared_ptr ConvertToMoses(const std::vector<Moses::FactorType> &inputFactors
, const std::vector<Moses::FactorType> &outputFactors
, const Moses::PhraseDictionary &phraseDict
, const std::vector<float> &weightT
, Vocab &vocab
, bool isSyntax) const;
void ReadFromFile(size_t tableLimit, uint64_t filePos, OnDiskWrapper &onDiskWrapper);
const std::string GetDebugStr() const;

View File

@ -21,6 +21,7 @@
#include <fstream>
#include "OnDiskWrapper.h"
#include "Vocab.h"
#include "moses/Util.h"
#include "util/exception.hh"
using namespace std;

View File

@ -19,9 +19,7 @@
***********************************************************************/
#include <boost/algorithm/string/predicate.hpp>
#include "moses/FactorCollection.h"
#include "moses/Util.h"
#include "moses/Word.h"
#include "Word.h"
#include "util/tokenize_piece.hh"
@ -98,29 +96,6 @@ size_t Word::ReadFromFile(std::fstream &file)
return memAlloc;
}
void Word::ConvertToMoses(
const std::vector<Moses::FactorType> &outputFactorsVec,
const Vocab &vocab,
Moses::Word &overwrite) const
{
Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
overwrite = Moses::Word(m_isNonTerminal);
if (m_isNonTerminal) {
const std::string &tok = vocab.GetString(m_vocabId);
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
} else {
// TODO: this conversion should have been done at load time.
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
}
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
}
int Word::Compare(const Word &compare) const
{
int ret;

View File

@ -67,14 +67,13 @@ public:
size_t ReadFromMemory(const char *mem);
size_t ReadFromFile(std::fstream &file);
void SetVocabId(uint32_t vocabId) {
m_vocabId = vocabId;
uint64_t GetVocabId() const {
return m_vocabId;
}
void ConvertToMoses(
const std::vector<Moses::FactorType> &outputFactorsVec,
const Vocab &vocab,
Moses::Word &overwrite) const;
void SetVocabId(uint64_t vocabId) {
m_vocabId = vocabId;
}
void DebugPrint(std::ostream &out, const Vocab &vocab) const;
inline const std::string &GetString(const Vocab &vocab) const {

View File

@ -0,0 +1,93 @@
# goshen
Goshen is a Chrome extension that duplicates the utility of the Google Translate chrome extension for on-page website translation, using the Goshen JavaScript library with Moses as a backend translator. (It also has the ability to swap in an arbitrary translation engine, if the appropriate adapters are written.)
## 1. The Goshen.js Library
As Google Translate is the current go-to machine-translation system for developers, I intend to make Moses a viable alternative for even the non-savvy developer. This is in large part simplified by having an easily deployed (perhaps Dockerized) Moses server, as mentioned in the section above. However, it is also greatly simplified by exposing a comprehensive and well-formed JavaScript API that allows the same level of flexibility as the existing Google API.
Instead of trying to duplicate the Google Translate API, I instead chose to write a wrapper for *any* translation engine. An engine with an exposed HTTP endpoint can be added to the Goshen translation library by implementing `GoshenAdapter`, for which I have provided a complete `moses-mt-server` implementation (`MosesGoshenAdapter`) and a partially complete proof of concept for Google Translate (`GoogleTranslateGoshenAdapter`). This is to illustrate that the engines can be used interchangeably for simple translation tasks, but the entirety of Moses functionality can be accessed whereas Google Translate's public API fails to accommodate some more technical tasks.
The library is both commented and minified, available in the `goshenlib/` directory, [here](https://github.com/j6k4m8/goshen-moses). It is also possible to import the unminified, importable version from `goshenlib/dist`. The complete documentation, as well as usage examples and implementation explanations and justifications, are available in `goshenlib/docs` at the above repository.
## 2. Chrome Extension
This directory contains a Chrome extension that utilizes the CASMACAT moses-mt-server/Moses backend to provide a frontend website translation service. The extension automatically detects the relevant content of most articles or body-text on the page, and at the user's request, translates it to the requested language. Usage is explained below, as well as inside the extension popup after installation, for quick reference.
### Usage
1. **Install the unpacked extension.** Go to `chrome://extensions` and click <kbd>Load Unpacked Extension</kbd>. Navigate to this `goshen-chrome/` directory, and load.
2. This adds a Goshen icon to your Chrome toolbar. Clicking it brings up a simple modal that allows the switching of languages.
3. Use the <kbd>Alt</kbd>+<kbd>T</kbd> key-chord ("T" for "Translate") to begin text-selection. The Goshen-translate extension will highlight elements of text in cyan as you mouse over them: To translate what is currently highlighted, click.
## Goshen.js Documentatio
### Overview
The Goshen library provides a web-developer-facing library for handling machine translation. It allows interaction with arbitrary machine translation services, agnostic of the technology or algorithm stack.
### Usage
A very brief tutorial is provided here:
- Create a new Goshen object. Use the MosesGoshenAdapter, so that translations are handled by a Moses MT server.
```JavaScript
g = new Goshen('localhost:3000', 'http', MosesGoshenAdapter);
```
- Use the Goshen object to pass a translation job to the Moses adapter. The adapter will pass back a completed translation once the job completes.
```JavaScript
g.translate('This is a simple sentence.', Languages.ENGLISH, Languages.SPANISH);
```
- You can also optionally pass a callback function to the .translate method:
```JavaScript
g.translate('This is a simple sentence.',
Languages.ENGLISH,
Languages.SPANISH,
function(err, val) {
if (!!err) {
console.warn("Encountered an error: " + err);
} else {
console.info("Translated to: " + val);
}
});
```
If a callback is supplied, the function is run on a new thread, and is non-blocking. If one is not supplied, then the return value of the function contains the translated text. `undefined` is returned if the translation fails.
### `Goshen`
The generic class for a Goshen.js object, the object that handles translation with an arbitrary translation backend. In order to specify a backend, pass a `type` parameter to the constructor. (Default is Moses, of course!)
- `Goshen`
- Arguments:
- `hostname`: A string hostname, such as `locahost:8000`. This is the base URL for formulating the RESTful API endpoint.
- `protocol`: The HTTP protocol. Either `http` or `https`.
- `type`: What type of GoshenAdapter to use. Options are currently `GoogleTranslateGoshenAdapter` or `MosesGoshenAdapter`.
- `opts`: A dictonary of options to pass to the adapter constructor. Currently, none are required for existing adapters.
- function `url`
Generate a complete URI. If `hostname` is `localhost:8000` and `protocol` is `https`, then `this.url('foo')` returns `https://localhost:8000/foo`
- Arguments:
- `suffix`: A suffix to concatenate onto the end of a well-formed URI.
- Returns:
- String: The complete web-accessible URL.
- function `translate`
Translate a text from a source language to a target language.
- Arguments:
- `text`: The text to translate. If this is too long, a series of truncated versions are translated, splitting on sentence-delimiters if possible.
- `source`: An item from the `LANGUAGES` set (e.g. `'en-us'`)
- `target`: An item from the `LANGUAGES` set (e.g. `'en-us'`)
- `callback`: Optional. If supplied, must be a function (or be of a callable type) that will be run with `errors` and `value` as its two arguments.
- Returns:
- String: The translated text. All supplementary data, such as alignments or language detections, are ignored by this function.
### `GoshenAdapter`
The `Goshen` class secretly outsources all of its computation to a GoshenAdapter class attribute, which is responsible for performing the machine translation. `GoshenAdapter`s should expose `url` and `translate` functions unambiguously, with the same signatures as those in the `Goshen` class. Other functions may be optionally exposed.
#### `MosesGoshenAdapter`
This is one particular implementation of the `GoshenAdapter` type, that uses the `moses-mt-server` backend as its translation engine API endpoint. It splits text into manageable chunks when translating, to avoid crashing the underlying Moses server (RAM allocation fail).
#### `GoogleTranslateGoshenAdapter`
This is another implementation of the `GoshenAdapter` type, that uses the Google Translate API as its translation engine endpoint. Because Google handles arbitrarily long text, this adapter does not split text, as `MosesGoshenAdapter`s do.
For more information, see [this full report](https://github.com/j6k4m8/goshen-moses/blob/master/report/report.md), or contact Jordan Matelsky (@j6k4m8).

Binary file not shown.

After

Width:  |  Height:  |  Size: 644 B

View File

@ -0,0 +1,29 @@
{
"manifest_version": 2,
"name": "Goshen Web Translator",
"description": "Translate entire webpages with a casmacat-moses backend",
"version": "1.0",
"browser_action": {
"default_icon": "icon.png",
"default_popup": "popup/popup.html"
},
"permissions": [
"activeTab",
"storage",
"https://ajax.googleapis.com/"
],
"options_page" : "options/index.html",
"content_scripts": [{
"matches": ["http://*/*", "https://*/*", "file:///*"],
"css": ["onpage/onpage.css"],
"js": [
"onpage/onpage.js",
"onpage/goshen.js",
"onpage/chromegoshen.js"
],
"all_frames": true
}]
}

View File

@ -0,0 +1,166 @@
(function(window) {
var demo_url = "ec2-52-23-242-15.compute-1.amazonaws.com:8081";
var _goshen = window._goshen;
on = function(event, cb) {
window.addEventListener(event, cb);
}
off = function(event, cb) {
window.removeEventListener(event, cb);
}
class ChromeGoshen {
constructor() {
this.G = new _goshen.Goshen(demo_url);
console.info("Goshenjs engine loaded successfully.")
}
/**
* Begin interactive dom node selection.
*/
selectMode() {
var self = this;
var selection = [];
var previousElement = null;
var showSelection = function() {
var olds = document.querySelectorAll('._goshen-selected');
for (var i = 0; i < olds.length; i++) {
olds[i].classList.remove('_goshen-selected');
}
for (var i = 0; i < selection.length; i++) {
selection[i].classList.add('_goshen-selected');
}
};
var setSelection = function(sel) {
selection = sel;
showSelection();
};
var validParents = [
"DIV", "ARTICLE", "BLOCKQUOTE", "MAIN",
"SECTION", "UL", "OL", "DL"
];
var validChildren = [
"P", "H1", "H2", "H3", "H4", "H5", "H6", "SPAN", "DL",
"OL", "UL", "BLOCKQUOTE", "SECTION"
];
var selectSiblings = function(el) {
var firstChild = el;
var parent = el.parentNode;
while (parent && !~validParents.indexOf(parent.tagName)) {
firstChild = parent;
parent = firstChild.parentNode;
}
if (parent) {
var kids = parent.childNodes,
len = kids.length,
result = [],
i = 0;
while (kids[i] !== firstChild) { i++; }
for (; i < len; i++) {
var kid = kids[i];
if (!!~validChildren.indexOf(kid.tagName)) {
result.push(kid);
}
}
return result;
} else { return [el]; }
};
var stop = function() {
off("mouseover", mouseoverHandler);
off("mousemove", moveHandler);
off("keydown", keydownHandler);
off("keyup", keyupHandler);
off("click", clickHandler);
self.performSelectTranslation(selection);
};
var mouseoverHandler = function(ev) {
previousElement = ev.target;
if (ev.altKey) {
setSelection([ev.target]);
} else {
setSelection(selectSiblings(ev.target));
}
};
var clickHandler = function(ev) {
stop();
};
var moveHandler = function(ev) {
mouseoverHandler(ev);
off("mousemove", moveHandler);
};
var keydownHandler = function(ev) {
if (ev.keyCode === 27) {
stop();
} else if (ev.altKey && selection.length > 1) {
setSelection([selection[0]]);
}
};
var keyupHandler = function(ev) {
if (!ev.altKey && selection.length === 1) {
setSelection(selectSiblings(selection[0]));
}
};
on("mouseover", mouseoverHandler);
on("click", clickHandler);
on("mousemove", moveHandler);
on("keydown", keydownHandler);
on("keyup", keyupHandler);
}
select(contextData) {
var text;
if (contextData === undefined) {
text = window.getSelection().toString();
} else {
text = contextData.selectionText;
}
if (text.trim().length > 0) {
this.init(this.parse.string(text));
window.getSelection().removeAllRanges();
} else {
selectMode();
}
};
_chunkedTranslation(text) {
// We need to find a way to split on sentences, or long things.
var texts = text.split('.');
for (var i = 0; i < texts.length; i++) {
texts[i] = this.G.translate(texts[i]);
}
return texts.join('.');
}
performSelectTranslation(selection) {
for (var i = 0; i < selection.length; i++) {
selection[i].classList.add('_goshen-active');
selection[i].innerText = this._chunkedTranslation(selection[i].innerText);
selection[i].classList.remove('_goshen-active');
selection[i].classList.remove('_goshen-selected');
}
}
};
_goshen._cg = new ChromeGoshen();
})(this);

View File

@ -0,0 +1,107 @@
(function (root) {
var _goshen = root._goshen;
LANGUAGES = {
English: 'en',
en: 'en',
German: 'de',
de: 'de'
}
LOCALES = {
English: 'en-US',
en: 'en-US',
German: 'de',
de: 'de'
}
serialize = function(obj) {
var str = [];
for (var p in obj) {
str.push(encodeURIComponent(p) + "=" + encodeURIComponent(obj[p]));
}
return str.join("&");
};
class MosesGoshenAdapter {
constructor(hostname, protocol, opts) {
this.hostname = hostname;
this.protocol = protocol || 'http';
}
url(suffix) {
suffix = suffix || '';
return `${this.protocol}://${this.hostname}/translate?${suffix}`;
}
translate(text, target, source, callback) {
/* Translate a string `text`, using `opts` as corequisite options.
Arguments:
text (str): The text to translate.
target (str): The language to translate to
source (str): The language to translate from
callback (function): The function to call on the translated text
Returns:
str: The translated text
*/
var requestURL = this.url(serialize({
q: text,
key: 'x',
target: target || LANGUAGES.en,
source: source || LANGUAGES.de
}));
if (!!root.Meteor && !!root.HTTP) {
var response = HTTP.call('GET', requestURL, {});
var translated = response.data;
if (callback) callback(text, translated);
} else if (!!root.XMLHttpRequest) {
var request = new XMLHttpRequest();
request.open('GET', requestURL, false);
request.send(null);
if (request.status === 200) {
var translated = root.JSON.parse(request.responseText);
if (callback) callback(text, translated);
}
}
return translated.data.translations[0].translatedText
}
}
_goshen.Goshen = class Goshen {
constructor(hostname, protocol, type, opts) {
/* Create a new Goshen object.
Arguments:
hostname (str): A protocol-less URI such as `255.255.0.0:3000`
protocol (str: 'http'): An http protocol (either 'http' or 'https')
type (class): The type of adapter to use by default.
opts (dict): Options for configuration.
The options configuration dictionary can contain
*/
type = type || MosesGoshenAdapter;
this.ga = new type(hostname, protocol, opts);
}
url(suffix) {
return this.ga.url(suffix);
}
translate(text, target, source, callback) {
/* Calls the local GoshenAdapter#translate. */
return this.ga.translate(text, target, source, callback);
}
};
})(this);

View File

@ -0,0 +1,7 @@
._goshen-selected {
background-color: rgba(100, 250, 250, 0.2);
}
._goshen-selected._goshen-active {
background-color: rgba(250, 100, 250, 0.2);
}

View File

@ -0,0 +1,25 @@
// This is run inside the scope of a page, and so we have direct access to the
// page's HTML from here.
(function (window) {
if (typeof window._goshen !== 'undefined') {
console.warn("_goshen unable to initialize!");
return;
} else {
window._goshen = {};
}
// We can now request the contents of window.
window.addEventListener('keyup', function(ev) {
// This is a bit heavy-handed, and we almost assuredly don't need to be
// capturing every keyup event. But it's lightweight, and serves as a
// decent proof of concept.
if (ev.altKey && ev.keyCode == 84) {
// They pressed Alt+T. Call _goshen's get-text function!
window._goshen._cg.selectMode();
}
});
})(this);

View File

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Goshen Options</title>
</head>
<body>
</body>
</html>

View File

@ -0,0 +1,44 @@
<html>
<head>
<script src="../vendor/ustr.min.js"></script>
<!-- <script src="https://ajax.googleapis.com/ajax/libs/jquery/1.10.2/jquery.min.js"></script> -->
<script src="popup.js"></script>
<link rel="stylesheet/less" type="text/css" href="style.less" />
<script src="../vendor/less.js"></script>
</head>
<body style="width: 400px">
<div class="container">
<h1>Goshen Translator</h1>
<p>
Goshen uses <code>python-mt-server</code> and <code>moses</code> to
translate webpage text. For more information, see the repository
on <a href="https://github.com/j6k4m8/en600.468-final/">GitHub</a>.
</p>
<hr>
<div class="dropdown-container from-container">
<h2>Translate from:</h2>
<select class="from-select">
<!-- <option value="English">English</option> -->
<!-- <option value="French">French</option> -->
<option value="German">German</option>
</select>
</div>
<div class="dropdown-container to-container">
<h2>Translate to:</h2>
<select class="to-select">
<option value="English">English</option>
<!-- <option value="French">French</option> -->
<!-- <option value="German">German</option> -->
</select>
</div>
<hr>
<p>
To translate the webpage, press the <kbd>Alt</kbd>+<kbd>T</kbd>
keychord and mouse over the element(s) that you want to queue for
translation. Click to begin the translation &mdash; the selected
elements will turn blue to indicate that they're queued.
</p>
<!-- <button type="button" style="float: right;" class="js-translate">Translate</button> -->
</div>
</body>
</html>

View File

View File

@ -0,0 +1,22 @@
* {
box-sizing: border-box;
}
.container {
width: 100%;
.dropdown-container, .arrow-container {
display: inline-block;
width: 40%;
}
button {
border-radius: 0;
background: #09f;
color: white;
text-transform: uppercase;
padding: 1em;
border: none;
cursor: pointer;
letter-spacing: 0.1em;
font-size: 1.1em;
}
}

21
contrib/goshen-chrome/vendor/less.js vendored Normal file

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,174 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.597260676">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.597260676" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.597260676" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.597260676." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1894543739" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.607512381" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/moses2-cmd}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.219597164" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2087910158" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1546967275" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.826148068" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1303802900" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.368826329" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include/"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/include&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.758438174" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.123491630" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.848723608" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1977842293" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.322285470" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1011859741" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1706155110" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.24079646" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.587418382" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses2"/>
<listOptionValue builtIn="false" value="xmlrpc_xmltok"/>
<listOptionValue builtIn="false" value="xmlrpc_xmlparse"/>
<listOptionValue builtIn="false" value="xmlrpc_util++"/>
<listOptionValue builtIn="false" value="xmlrpc_util"/>
<listOptionValue builtIn="false" value="xmlrpc_server_abyss++"/>
<listOptionValue builtIn="false" value="xmlrpc_server_abyss"/>
<listOptionValue builtIn="false" value="xmlrpc_server++"/>
<listOptionValue builtIn="false" value="xmlrpc_server"/>
<listOptionValue builtIn="false" value="xmlrpc_abyss"/>
<listOptionValue builtIn="false" value="xmlrpc++"/>
<listOptionValue builtIn="false" value="xmlrpc"/>
<listOptionValue builtIn="false" value="cmph"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.paths.1920945405" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../moses2/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/lib"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1508244207" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.994919684" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.2015973846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.347900682">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.347900682" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.347900682" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.347900682." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.19950210" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.201761026" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/moses2-cmd}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.249336616" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.475854190" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1047605391" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.881009789" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.695719104" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2077834205" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.534514015" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.301062410" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.1891262877" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.176623232" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1762742642" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.563722476" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1771116495" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.167166289" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.659838834" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="moses2-cmd.cdt.managedbuild.target.gnu.exe.1380079855" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.597260676;cdt.managedbuild.config.gnu.exe.debug.597260676.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1546967275;cdt.managedbuild.tool.gnu.cpp.compiler.input.123491630">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.597260676;cdt.managedbuild.config.gnu.exe.debug.597260676.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.848723608;cdt.managedbuild.tool.gnu.c.compiler.input.1011859741">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.347900682;cdt.managedbuild.config.gnu.exe.release.347900682.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.534514015;cdt.managedbuild.tool.gnu.c.compiler.input.176623232">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.347900682;cdt.managedbuild.config.gnu.exe.release.347900682.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1047605391;cdt.managedbuild.tool.gnu.cpp.compiler.input.2077834205">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/moses2-cmd"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/moses2-cmd"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
</cproject>

View File

@ -0,0 +1,43 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>moses2-cmd</name>
<comment></comment>
<projects>
<project>lm</project>
<project>moses</project>
<project>moses2</project>
<project>util</project>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-1-PROJECT_LOC/moses2/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
<locationURI>PARENT-1-PROJECT_LOC/moses2/Main.h</locationURI>
</link>
</linkedResources>
</projectDescription>

180
contrib/moses2/.cproject Normal file
View File

@ -0,0 +1,180 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/moses2"/>
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/moses2/Debug"/>
<entry flags="RESOLVED" kind="libraryFile" name="moses2" srcPrefixMapping="" srcRootPath=""/>
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.329828208" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.targetPlatform.gnu.cross.389137927" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/moses2}/Debug" id="cdt.managedbuild.builder.gnu.cross.2144359329" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1430831084" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.354944414" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.639588389" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<option id="gnu.c.compiler.option.include.paths.7696150" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths" useByScannerDiscovery="false"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1538601099" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1686613508" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.299605809" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.769854045" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1502531988" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" useByScannerDiscovery="false" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../xmlrpc-c/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/darts-clone&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.1025143565" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" useByScannerDiscovery="false" valueType="definedSymbols">
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="HAVE_CMPH"/>
<listOptionValue builtIn="false" value="HAVE_PROBINGPT"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="WITH_THREADS"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1439481930" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.351063004" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.paths.1260140770" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/lib"/>
</option>
<option id="gnu.cpp.link.option.libs.1671854463" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="cmph"/>
<listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system"/>
<listOptionValue builtIn="false" value="boost_thread"/>
<listOptionValue builtIn="false" value="boost_filesystem"/>
<listOptionValue builtIn="false" value="boost_program_options"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1955045545" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1028669671" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.917359146" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.254745364" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041.1123771618" name="HypothesisColl.h" rcbsApplicability="disable" resourcePath="HypothesisColl.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="LM/LanguageModelDALM.cpp|defer|Main.cpp|CreateProbingPT2.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe,org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1445209421." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.662721996" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF;org.eclipse.cdt.core.MachO64" id="cdt.managedbuild.targetPlatform.gnu.cross.895874625" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/moses2}/Release" id="cdt.managedbuild.builder.gnu.cross.468799862" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1943249236" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1011693969" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1339551360" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1175448562" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.2103617063" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.13836904" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.763147930" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.946001537" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1462232829" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1359778241" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.89443491" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.762494367" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.140795725" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.95131148" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="moses2.cdt.managedbuild.target.gnu.cross.exe.1741914059" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1445209421;cdt.managedbuild.config.gnu.cross.exe.release.1445209421.;cdt.managedbuild.tool.gnu.cross.c.compiler.1943249236;cdt.managedbuild.tool.gnu.c.compiler.input.1175448562">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1445209421;cdt.managedbuild.config.gnu.cross.exe.release.1445209421.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.2103617063;cdt.managedbuild.tool.gnu.cpp.compiler.input.946001537">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041;cdt.managedbuild.config.gnu.cross.exe.debug.1097293041.;cdt.managedbuild.tool.gnu.cross.c.compiler.1430831084;cdt.managedbuild.tool.gnu.c.compiler.input.1538601099">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1097293041;cdt.managedbuild.config.gnu.cross.exe.debug.1097293041.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1686613508;cdt.managedbuild.tool.gnu.cpp.compiler.input.2101942464">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/moses2"/>
</configuration>
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/moses2"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
</cproject>

29
contrib/moses2/.project Normal file
View File

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>moses2</name>
<comment></comment>
<projects>
<project>moses</project>
<project>util</project>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
</projectDescription>

View File

@ -0,0 +1,176 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <algorithm>
#include <set>
#include <sstream>
#include "AlignmentInfo.h"
#include "legacy/Util2.h"
#include "util/exception.hh"
namespace Moses2
{
AlignmentInfo::AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs)
: m_collection(pairs)
{
BuildNonTermIndexMaps();
}
AlignmentInfo::AlignmentInfo(const std::vector<unsigned char> &aln)
{
assert(aln.size()%2==0);
for (size_t i = 0; i < aln.size(); i+= 2)
m_collection.insert(std::make_pair(size_t(aln[i]),size_t(aln[i+1])));
BuildNonTermIndexMaps();
}
AlignmentInfo::AlignmentInfo(const std::string &str)
{
std::vector<std::string> points = Tokenize(str, " ");
std::vector<std::string>::const_iterator iter;
for (iter = points.begin(); iter != points.end(); iter++) {
std::vector<size_t> point = Tokenize<size_t>(*iter, "-");
UTIL_THROW_IF2(point.size() != 2, "Bad format of word alignment point: " << *iter);
Add(point[0], point[1]);
}
}
void AlignmentInfo::BuildNonTermIndexMaps()
{
if (m_collection.empty()) {
return;
}
const_iterator p = begin();
size_t maxIndex = p->second;
for (++p; p != end(); ++p) {
if (p->second > maxIndex) {
maxIndex = p->second;
}
}
m_nonTermIndexMap.resize(maxIndex+1, NOT_FOUND);
m_nonTermIndexMap2.resize(maxIndex+1, NOT_FOUND);
size_t i = 0;
for (p = begin(); p != end(); ++p) {
if (m_nonTermIndexMap[p->second] != NOT_FOUND) {
// 1-to-many. Definitely a set of terminals. Don't bother storing 1-to-1 index map
m_nonTermIndexMap.clear();
m_nonTermIndexMap2.clear();
return;
}
m_nonTermIndexMap[p->second] = i++;
m_nonTermIndexMap2[p->second] = p->first;
}
}
std::set<size_t> AlignmentInfo::GetAlignmentsForSource(size_t sourcePos) const
{
std::set<size_t> ret;
CollType::const_iterator iter;
for (iter = begin(); iter != end(); ++iter) {
// const std::pair<size_t,size_t> &align = *iter;
if (iter->first == sourcePos) {
ret.insert(iter->second);
}
}
return ret;
}
std::set<size_t> AlignmentInfo::GetAlignmentsForTarget(size_t targetPos) const
{
std::set<size_t> ret;
CollType::const_iterator iter;
for (iter = begin(); iter != end(); ++iter) {
// const std::pair<size_t,size_t> &align = *iter;
if (iter->second == targetPos) {
ret.insert(iter->first);
}
}
return ret;
}
bool
compare_target(std::pair<size_t,size_t> const* a,
std::pair<size_t,size_t> const* b)
{
if(a->second < b->second) return true;
if(a->second == b->second) return (a->first < b->first);
return false;
}
std::vector< const std::pair<size_t,size_t>* >
AlignmentInfo::
GetSortedAlignments(WordAlignmentSort SortOrder) const
{
std::vector< const std::pair<size_t,size_t>* > ret;
CollType::const_iterator iter;
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
const std::pair<size_t,size_t> &alignPair = *iter;
ret.push_back(&alignPair);
}
switch (SortOrder) {
case NoSort:
break;
case TargetOrder:
std::sort(ret.begin(), ret.end(), compare_target);
break;
default:
UTIL_THROW(util::Exception, "Unknown word alignment sort option: "
<< SortOrder);
}
return ret;
}
std::vector<size_t> AlignmentInfo::GetSourceIndex2PosMap() const
{
std::set<size_t> sourcePoses;
CollType::const_iterator iter;
for (iter = m_collection.begin(); iter != m_collection.end(); ++iter) {
size_t sourcePos = iter->first;
sourcePoses.insert(sourcePos);
}
std::vector<size_t> ret(sourcePoses.begin(), sourcePoses.end());
return ret;
}
std::string AlignmentInfo::Debug(const System &system) const
{
std::stringstream out;
out << *this;
return out.str();
}
std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj)
{
AlignmentInfo::const_iterator iter;
for (iter = obj.begin(); iter != obj.end(); ++iter) {
out << iter->first << "-" << iter->second << " ";
}
return out;
}
}

View File

@ -0,0 +1,148 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include <ostream>
#include <set>
#include <vector>
#include <cstdlib>
#include <boost/functional/hash.hpp>
#include "TypeDef.h"
namespace Moses2
{
class AlignmentInfoCollection;
class System;
/** Collection of non-terminal alignment pairs, ordered by source index.
* Usually held by a TargetPhrase to map non-terms in hierarchical/syntax models
*/
class AlignmentInfo
{
friend struct AlignmentInfoOrderer;
friend struct AlignmentInfoHasher;
friend class AlignmentInfoCollection;
friend class VW;
friend std::ostream& operator<<(std::ostream& out, const AlignmentInfo& obj);
public:
typedef std::set<std::pair<size_t,size_t> > CollType;
typedef std::vector<size_t> NonTermIndexMap;
typedef CollType::const_iterator const_iterator;
const_iterator begin() const {
return m_collection.begin();
}
const_iterator end() const {
return m_collection.end();
}
void Add(size_t sourcePos, size_t targetPos) {
m_collection.insert(std::pair<size_t, size_t>(sourcePos, targetPos));
}
/** Provides a map from target-side to source-side non-terminal indices.
* The target-side index should be the rule symbol index (COUNTING terminals).
* The index returned is the rule non-terminal index (IGNORING terminals).
*/
const NonTermIndexMap &GetNonTermIndexMap() const {
return m_nonTermIndexMap;
}
/** Like GetNonTermIndexMap but the return value is the symbol index (i.e.
* the index counting both terminals and non-terminals) */
const NonTermIndexMap &GetNonTermIndexMap2() const {
return m_nonTermIndexMap2;
}
const CollType &GetAlignments() const {
return m_collection;
}
std::set<size_t> GetAlignmentsForSource(size_t sourcePos) const;
std::set<size_t> GetAlignmentsForTarget(size_t targetPos) const;
size_t GetSize() const {
return m_collection.size();
}
std::vector< const std::pair<size_t,size_t>* >
GetSortedAlignments(Moses2::WordAlignmentSort SortOrder) const;
std::vector<size_t> GetSourceIndex2PosMap() const;
bool operator==(const AlignmentInfo& rhs) const {
return m_collection == rhs.m_collection &&
m_nonTermIndexMap == rhs.m_nonTermIndexMap;
}
std::string Debug(const System &system) const;
private:
//! AlignmentInfo objects should only be created by an AlignmentInfoCollection
explicit AlignmentInfo(const std::set<std::pair<size_t,size_t> > &pairs);
explicit AlignmentInfo(const std::vector<unsigned char> &aln);
// used only by VW to load word alignment between sentences
explicit AlignmentInfo(const std::string &str);
void BuildNonTermIndexMaps();
CollType m_collection;
NonTermIndexMap m_nonTermIndexMap;
NonTermIndexMap m_nonTermIndexMap2;
};
/** Define an arbitrary strict weak ordering between AlignmentInfo objects
* for use by AlignmentInfoCollection.
*/
struct AlignmentInfoOrderer {
bool operator()(const AlignmentInfo &a, const AlignmentInfo &b) const {
if (a.m_collection == b.m_collection) {
return a.m_nonTermIndexMap < b.m_nonTermIndexMap;
} else {
return a.m_collection < b.m_collection;
}
}
};
/**
* Hashing functoid
**/
struct AlignmentInfoHasher {
size_t operator()(const AlignmentInfo& a) const {
size_t seed = 0;
boost::hash_combine(seed,a.m_collection);
boost::hash_combine(seed,a.m_nonTermIndexMap);
return seed;
}
};
inline size_t hash_value(const AlignmentInfo& a)
{
static AlignmentInfoHasher hasher;
return hasher(a);
}
}

View File

@ -0,0 +1,62 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "AlignmentInfoCollection.h"
using namespace std;
namespace Moses2
{
AlignmentInfoCollection AlignmentInfoCollection::s_instance;
AlignmentInfoCollection::AlignmentInfoCollection()
{
std::set<std::pair<size_t,size_t> > pairs;
m_emptyAlignmentInfo = Add(pairs);
}
AlignmentInfoCollection::~AlignmentInfoCollection()
{}
const AlignmentInfo &AlignmentInfoCollection::GetEmptyAlignmentInfo() const
{
return *m_emptyAlignmentInfo;
}
AlignmentInfo const *
AlignmentInfoCollection::
Add(AlignmentInfo const& ainfo)
{
#ifdef WITH_THREADS
{
boost::shared_lock<boost::shared_mutex> read_lock(m_accessLock);
AlignmentInfoSet::const_iterator i = m_collection.find(ainfo);
if (i != m_collection.end())
return &*i;
}
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
#endif
std::pair<AlignmentInfoSet::iterator, bool> ret = m_collection.insert(ainfo);
return &(*ret.first);
}
}

View File

@ -0,0 +1,81 @@
/***********************************************************************
Moses - statistical machine translation system
Copyright (C) 2006-2011 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include "AlignmentInfo.h"
#include <set>
#ifdef WITH_THREADS
#include <boost/thread/shared_mutex.hpp>
#include <boost/thread/locks.hpp>
#endif
namespace Moses2
{
/** Singleton collection of all AlignmentInfo objects.
* Used as a cache of all alignment info to save space.
*/
class AlignmentInfoCollection
{
public:
static AlignmentInfoCollection &Instance() {
return s_instance;
}
/** Returns a pointer to an AlignmentInfo object with the same source-target
* alignment pairs as given in the argument. If the collection already
* contains such an object then returns a pointer to it; otherwise a new
* one is inserted.
*/
private:
const AlignmentInfo* Add(AlignmentInfo const& ainfo);
public:
template<typename ALNREP>
AlignmentInfo const *
Add(ALNREP const & aln) {
return this->Add(AlignmentInfo(aln));
}
//! Returns a pointer to an empty AlignmentInfo object.
const AlignmentInfo &GetEmptyAlignmentInfo() const;
private:
typedef std::set<AlignmentInfo, AlignmentInfoOrderer> AlignmentInfoSet;
//! Only a single static variable should be created.
AlignmentInfoCollection();
~AlignmentInfoCollection();
static AlignmentInfoCollection s_instance;
#ifdef WITH_THREADS
//reader-writer lock
mutable boost::shared_mutex m_accessLock;
#endif
AlignmentInfoSet m_collection;
const AlignmentInfo *m_emptyAlignmentInfo;
};
}

129
contrib/moses2/ArcLists.cpp Normal file
View File

@ -0,0 +1,129 @@
/*
* ArcList.cpp
*
* Created on: 26 Oct 2015
* Author: hieu
*/
#include <iostream>
#include <sstream>
#include <algorithm>
#include <boost/foreach.hpp>
#include "ArcLists.h"
#include "HypothesisBase.h"
#include "util/exception.hh"
using namespace std;
namespace Moses2
{
ArcLists::ArcLists()
{
// TODO Auto-generated constructor stub
}
ArcLists::~ArcLists()
{
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
const ArcList *arcList = collPair.second;
delete arcList;
}
}
void ArcLists::AddArc(bool added, const HypothesisBase *currHypo,
const HypothesisBase *otherHypo)
{
//cerr << added << " " << currHypo << " " << otherHypo << endl;
ArcList *arcList;
if (added) {
// we're winners!
if (otherHypo) {
// there was a existing losing hypo
arcList = &GetAndDetachArcList(otherHypo);
}
else {
// there was no existing hypo
arcList = new ArcList;
}
m_coll[currHypo] = arcList;
}
else {
// we're losers!
// there should be a winner, we're not doing beam pruning
UTIL_THROW_IF2(otherHypo == NULL, "There must have been a winning hypo");
arcList = &GetArcList(otherHypo);
}
// in any case, add the curr hypo
arcList->push_back(currHypo);
}
ArcList &ArcLists::GetArcList(const HypothesisBase *hypo)
{
Coll::iterator iter = m_coll.find(hypo);
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
ArcList &arcList = *iter->second;
return arcList;
}
const ArcList &ArcLists::GetArcList(const HypothesisBase *hypo) const
{
Coll::const_iterator iter = m_coll.find(hypo);
if (iter == m_coll.end()) {
cerr << "looking for:" << hypo << " have " << m_coll.size() << " :";
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
const HypothesisBase *hypo = collPair.first;
cerr << hypo << " ";
}
}
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list for " << hypo);
ArcList &arcList = *iter->second;
return arcList;
}
ArcList &ArcLists::GetAndDetachArcList(const HypothesisBase *hypo)
{
Coll::iterator iter = m_coll.find(hypo);
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
ArcList &arcList = *iter->second;
m_coll.erase(iter);
return arcList;
}
void ArcLists::Sort()
{
BOOST_FOREACH(Coll::value_type &collPair, m_coll){
ArcList &list = *collPair.second;
std::sort(list.begin(), list.end(), HypothesisFutureScoreOrderer() );
}
}
void ArcLists::Delete(const HypothesisBase *hypo)
{
//cerr << "hypo=" << hypo->Debug() << endl;
//cerr << "m_coll=" << m_coll.size() << endl;
Coll::iterator iter = m_coll.find(hypo);
UTIL_THROW_IF2(iter == m_coll.end(), "Can't find arc list");
ArcList *arcList = iter->second;
m_coll.erase(iter);
delete arcList;
}
std::string ArcLists::Debug(const System &system) const
{
stringstream strm;
BOOST_FOREACH(const Coll::value_type &collPair, m_coll){
const ArcList *arcList = collPair.second;
strm << arcList << "(" << arcList->size() << ") ";
}
return strm.str();
}
}

43
contrib/moses2/ArcLists.h Normal file
View File

@ -0,0 +1,43 @@
/*
* ArcList.h
*
* Created on: 26 Oct 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include <boost/unordered_map.hpp>
namespace Moses2
{
class System;
class HypothesisBase;
typedef std::vector<const HypothesisBase*> ArcList;
class ArcLists
{
public:
ArcLists();
virtual ~ArcLists();
void AddArc(bool added, const HypothesisBase *currHypo,
const HypothesisBase *otherHypo);
void Sort();
void Delete(const HypothesisBase *hypo);
const ArcList &GetArcList(const HypothesisBase *hypo) const;
std::string Debug(const System &system) const;
protected:
typedef boost::unordered_map<const HypothesisBase*, ArcList*> Coll;
Coll m_coll;
ArcList &GetArcList(const HypothesisBase *hypo);
ArcList &GetAndDetachArcList(const HypothesisBase *hypo);
};
}

91
contrib/moses2/Array.h Normal file
View File

@ -0,0 +1,91 @@
#pragma once
#include <cassert>
#include <boost/functional/hash.hpp>
#include "MemPool.h"
namespace Moses2
{
template<typename T>
class Array
{
public:
typedef T* iterator;
typedef const T* const_iterator;
//! iterators
const_iterator begin() const
{
return m_arr;
}
const_iterator end() const
{
return m_arr + m_size;
}
iterator begin()
{
return m_arr;
}
iterator end()
{
return m_arr + m_size;
}
Array(MemPool &pool, size_t size = 0, const T &val = T())
{
m_size = size;
m_maxSize = size;
m_arr = pool.Allocate<T>(size);
for (size_t i = 0; i < size; ++i) {
m_arr[i] = val;
}
}
size_t size() const
{
return m_size;
}
const T& operator[](size_t ind) const
{
return m_arr[ind];
}
T& operator[](size_t ind)
{
return m_arr[ind];
}
size_t hash() const
{
size_t seed = 0;
for (size_t i = 0; i < m_size; ++i) {
boost::hash_combine(seed, m_arr[i]);
}
return seed;
}
int Compare(const Array &compare) const
{
int cmp = memcmp(m_arr, compare.m_arr, sizeof(T) * m_size);
return cmp;
}
bool operator==(const Array &compare) const
{
int cmp = Compare(compare);
return cmp == 0;
}
void resize(size_t newSize)
{
assert(m_size < m_maxSize);
m_size = newSize;
}
protected:
size_t m_size, m_maxSize;
T *m_arr;
};
}

View File

@ -0,0 +1,117 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <string>
#include <iostream>
#include "EstimatedScores.h"
using namespace std;
namespace Moses2
{
/**
* Calculate future score estimate for a given coverage bitmap
*
* /param bitmap coverage bitmap
*/
float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap) const
{
const size_t notInGap = numeric_limits<size_t>::max();
size_t startGap = notInGap;
float estimatedScore = 0.0f;
for (size_t currPos = 0; currPos < bitmap.GetSize(); currPos++) {
// start of a new gap?
if (bitmap.GetValue(currPos) == false && startGap == notInGap) {
startGap = currPos;
}
// end of a gap?
else if (bitmap.GetValue(currPos) == true && startGap != notInGap) {
estimatedScore += GetValue(startGap, currPos - 1);
startGap = notInGap;
}
}
// coverage ending with gap?
if (startGap != notInGap) {
estimatedScore += GetValue(startGap, bitmap.GetSize() - 1);
}
return estimatedScore;
}
/**
* Calculare future score estimate for a given coverage bitmap
* and an additional span that is also covered. This function is used
* to compute future score estimates for hypotheses that we may want
* build, but first want to check.
*
* Note: this function is implemented a bit more complex than
* the basic one (w/o additional phrase) for speed reasons,
* which is probably overkill.
*
* /param bitmap coverage bitmap
* /param startPos start of the span that is added to the coverage
* /param endPos end of the span that is added to the coverage
*/
float EstimatedScores::CalcEstimatedScore(Bitmap const &bitmap, size_t startPos,
size_t endPos) const
{
const size_t notInGap = numeric_limits<size_t>::max();
float estimatedScore = 0.0f;
size_t startGap = bitmap.GetFirstGapPos();
if (startGap == NOT_FOUND) return estimatedScore; // everything filled
// start loop at first gap
size_t startLoop = startGap + 1;
if (startPos == startGap) { // unless covered by phrase
startGap = notInGap;
startLoop = endPos + 1; // -> postpone start
}
size_t lastCovered = bitmap.GetLastPos();
if (endPos > lastCovered || lastCovered == NOT_FOUND) lastCovered = endPos;
for (size_t currPos = startLoop; currPos <= lastCovered; currPos++) {
// start of a new gap?
if (startGap == notInGap && bitmap.GetValue(currPos) == false
&& (currPos < startPos || currPos > endPos)) {
startGap = currPos;
}
// end of a gap?
else if (startGap != notInGap
&& (bitmap.GetValue(currPos) == true
|| (startPos <= currPos && currPos <= endPos))) {
estimatedScore += GetValue(startGap, currPos - 1);
startGap = notInGap;
}
}
// coverage ending with gap?
if (lastCovered != bitmap.GetSize() - 1) {
estimatedScore += GetValue(lastCovered + 1, bitmap.GetSize() - 1);
}
return estimatedScore;
}
}

View File

@ -0,0 +1,61 @@
// $Id$
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <iostream>
#include "legacy/Util2.h"
#include "legacy/Bitmap.h"
#include "legacy/Matrix.h"
namespace Moses2
{
class MemPool;
class System;
//! A square array of floats to store future costs in the phrase-based decoder
class EstimatedScores: public Matrix<float>
{
public:
EstimatedScores(MemPool &pool, size_t size) :
Matrix<float>(pool, size, size)
{
}
~EstimatedScores(); // not implemented
float CalcEstimatedScore(Bitmap const&) const;
float CalcEstimatedScore(Bitmap const&, size_t startPos, size_t endPos) const;
std::ostream &Debug(std::ostream &out, const System &system) const
{
for (size_t endPos = 0; endPos < GetSize(); endPos++) {
for (size_t startPos = 0; startPos < GetSize(); startPos++)
out << GetValue(startPos, endPos) << " ";
out << std::endl;
}
return out;
}
};
}

View File

@ -0,0 +1,189 @@
/*
* Distortion.cpp
*
* Created on: 28 Oct 2015
* Author: hieu
*/
#include <sstream>
#include "Distortion.h"
#include "../PhraseBased/Hypothesis.h"
#include "../PhraseBased/Manager.h"
#include "../legacy/Range.h"
#include "../legacy/Bitmap.h"
using namespace std;
namespace Moses2
{
struct DistortionState_traditional: public FFState
{
Range range;
int first_gap;
DistortionState_traditional() :
range()
{
// uninitialised
}
void Set(const Range& wr, int fg)
{
range = wr;
first_gap = fg;
}
size_t hash() const
{
return range.GetEndPos();
}
virtual bool operator==(const FFState& other) const
{
const DistortionState_traditional& o =
static_cast<const DistortionState_traditional&>(other);
return range.GetEndPos() == o.range.GetEndPos();
}
virtual std::string ToString() const
{
stringstream sb;
sb << first_gap << " " << range;
return sb.str();
}
};
///////////////////////////////////////////////////////////////////////
Distortion::Distortion(size_t startInd, const std::string &line) :
StatefulFeatureFunction(startInd, line)
{
ReadParameters();
}
Distortion::~Distortion()
{
// TODO Auto-generated destructor stub
}
FFState* Distortion::BlankState(MemPool &pool, const System &sys) const
{
return new (pool.Allocate<DistortionState_traditional>()) DistortionState_traditional();
}
void Distortion::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const
{
DistortionState_traditional &stateCast =
static_cast<DistortionState_traditional&>(state);
// fake previous translated phrase start and end
size_t start = NOT_FOUND;
size_t end = NOT_FOUND;
/*
if (input.m_frontSpanCoveredLength > 0) {
// can happen with --continue-partial-translation
start = 0;
end = input.m_frontSpanCoveredLength -1;
}
*/
stateCast.range = Range(start, end);
stateCast.first_gap = NOT_FOUND;
}
void Distortion::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void Distortion::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void Distortion::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
const DistortionState_traditional &prev =
static_cast<const DistortionState_traditional&>(prevState);
SCORE distortionScore = CalculateDistortionScore(prev.range,
hypo.GetInputPath().range, prev.first_gap);
//cerr << "distortionScore=" << distortionScore << endl;
scores.PlusEquals(mgr.system, *this, distortionScore);
DistortionState_traditional &stateCast =
static_cast<DistortionState_traditional&>(state);
stateCast.Set(hypo.GetInputPath().range, hypo.GetBitmap().GetFirstGapPos());
}
SCORE Distortion::CalculateDistortionScore(const Range &prev, const Range &curr,
const int FirstGap) const
{
bool useEarlyDistortionCost = false;
if (!useEarlyDistortionCost) {
return -(SCORE) ComputeDistortionDistance(prev, curr);
}
else {
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
Definitions:
S : current source range
S' : last translated source phrase range
S'' : longest fully-translated initial segment
*/
int prefixEndPos = (int) FirstGap - 1;
if ((int) FirstGap == -1) prefixEndPos = -1;
// case1: S is adjacent to S'' => return 0
if ((int) curr.GetStartPos() == prefixEndPos + 1) {
//IFVERBOSE(4) std::cerr<< "MQ07disto:case1" << std::endl;
return 0;
}
// case2: S is to the left of S' => return 2(length(S))
if ((int) curr.GetEndPos() < (int) prev.GetEndPos()) {
//IFVERBOSE(4) std::cerr<< "MQ07disto:case2" << std::endl;
return (float) -2 * (int) curr.GetNumWordsCovered();
}
// case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
if ((int) prev.GetEndPos() <= prefixEndPos) {
//IFVERBOSE(4) std::cerr<< "MQ07disto:case3" << std::endl;
int z = (int) curr.GetStartPos() - prefixEndPos - 1;
return (float) -2 * (z + (int) curr.GetNumWordsCovered());
}
// case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
//IFVERBOSE(4) std::cerr<< "MQ07disto:case4" << std::endl;
return (float) -2
* ((int) curr.GetNumWordsBetween(prev) + (int) curr.GetNumWordsCovered());
}
}
int Distortion::ComputeDistortionDistance(const Range& prev,
const Range& current) const
{
int dist = 0;
if (prev.GetNumWordsCovered() == 0) {
dist = current.GetStartPos();
}
else {
dist = (int) prev.GetEndPos() - (int) current.GetStartPos() + 1;
}
return abs(dist);
}
void Distortion::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
}

View File

@ -0,0 +1,60 @@
/*
* Distortion.h
*
* Created on: 28 Oct 2015
* Author: hieu
*/
#ifndef DISTORTION_H_
#define DISTORTION_H_
#include "StatefulFeatureFunction.h"
#include "../legacy/Range.h"
#include "../TypeDef.h"
namespace Moses2
{
class Distortion: public StatefulFeatureFunction
{
public:
Distortion(size_t startInd, const std::string &line);
virtual ~Distortion();
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const std::deque<Hypothesis*> &hypos) const
{
}
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
protected:
SCORE CalculateDistortionScore(const Range &prev, const Range &curr,
const int FirstGap) const;
int ComputeDistortionDistance(const Range& prev, const Range& current) const;
};
}
#endif /* DISTORTION_H_ */

View File

View File

@ -0,0 +1,55 @@
#pragma once
#include <vector>
#include <stddef.h>
#include "util/exception.hh"
namespace Moses2
{
class FFState
{
public:
virtual ~FFState()
{
}
virtual size_t hash() const = 0;
virtual bool operator==(const FFState& other) const = 0;
virtual bool operator!=(const FFState& other) const
{
return !(*this == other);
}
virtual std::string ToString() const = 0;
};
////////////////////////////////////////////////////////////////////////////////////////
inline std::ostream& operator<<(std::ostream& out, const FFState& obj)
{
out << obj.ToString();
return out;
}
////////////////////////////////////////////////////////////////////////////////////////
class DummyState: public FFState
{
public:
DummyState()
{
}
virtual size_t hash() const
{
return 0;
}
virtual bool operator==(const FFState& other) const
{
return true;
}
};
}

View File

@ -0,0 +1,85 @@
/*
* FeatureFunction.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include <string>
#include <vector>
#include "FeatureFunction.h"
#include "../System.h"
#include "../legacy/Util2.h"
#include "util/exception.hh"
using namespace std;
namespace Moses2
{
FeatureFunction::FeatureFunction(size_t startInd, const std::string &line)
:m_startInd(startInd)
,m_numScores(1)
,m_PhraseTableInd(NOT_FOUND)
,m_tuneable(true)
{
ParseLine(line);
//cerr << GetName() << " " << m_startInd << "-" << (m_startInd + m_numScores - 1) << endl;
}
FeatureFunction::~FeatureFunction()
{
// TODO Auto-generated destructor stub
}
void FeatureFunction::ParseLine(const std::string &line)
{
vector<string> toks = Tokenize(line);
UTIL_THROW_IF2(toks.empty(), "Empty line");
string nameStub = toks[0];
set<string> keys;
for (size_t i = 1; i < toks.size(); ++i) {
vector<string> args = TokenizeFirstOnly(toks[i], "=");
UTIL_THROW_IF2(args.size() != 2,
"Incorrect format for feature function arg: " << toks[i]);
pair<set<string>::iterator, bool> ret = keys.insert(args[0]);
UTIL_THROW_IF2(!ret.second, "Duplicate key in line " << line);
if (args[0] == "num-features") {
m_numScores = Scan<size_t>(args[1]);
}
else if (args[0] == "name") {
m_name = args[1];
}
else {
m_args.push_back(args);
}
}
}
void FeatureFunction::ReadParameters()
{
while (!m_args.empty()) {
const vector<string> &args = m_args[0];
SetParameter(args[0], args[1]);
m_args.erase(m_args.begin());
}
}
void FeatureFunction::SetParameter(const std::string& key,
const std::string& value)
{
if (key == "tuneable") {
m_tuneable = Scan<bool>(value);
}
else {
UTIL_THROW2(GetName() << ": Unknown argument " << key << "=" << value);
}
}
}

View File

@ -0,0 +1,127 @@
/*
* FeatureFunction.h
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#pragma once
#include <cstddef>
#include <string>
#include <vector>
#include "../TypeDef.h"
#include "../Phrase.h"
namespace Moses2
{
template<typename WORD>
class TargetPhrase;
class System;
class PhraseImpl;
class TargetPhrases;
class TargetPhraseImpl;
class Scores;
class ManagerBase;
class MemPool;
namespace SCFG
{
class TargetPhrase;
class TargetPhrases;
class Word;
}
class FeatureFunction
{
public:
FeatureFunction(size_t startInd, const std::string &line);
virtual ~FeatureFunction();
virtual void Load(System &system)
{
}
size_t GetStartInd() const
{
return m_startInd;
}
size_t GetNumScores() const
{
return m_numScores;
}
const std::string &GetName() const
{
return m_name;
}
void SetName(const std::string &val)
{
m_name = val;
}
virtual size_t HasPhraseTableInd() const
{
return false;
}
void SetPhraseTableInd(size_t ind)
{
m_PhraseTableInd = ind;
}
size_t GetPhraseTableInd() const
{
return m_PhraseTableInd;
}
//! if false, then this feature is not displayed in the n-best list.
// use with care
virtual bool IsTuneable() const
{
return m_tuneable;
}
// may have more factors than actually need, but not guaranteed.
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const = 0;
// For SCFG decoding, the source can contain non-terminals, NOT the raw
// source from the input sentence
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const = 0;
// used by lexicalised reordering model to add scores to tp data structures
virtual void EvaluateAfterTablePruning(MemPool &pool,
const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
{
}
virtual void EvaluateAfterTablePruning(MemPool &pool,
const SCFG::TargetPhrases &tps, const Phrase<SCFG::Word> &sourcePhrase) const
{
}
// clean up temporary memory, called after processing each sentence
virtual void CleanUpAfterSentenceProcessing() const
{
}
protected:
size_t m_startInd;
size_t m_numScores;
size_t m_PhraseTableInd;
std::string m_name;
std::vector<std::vector<std::string> > m_args;
bool m_tuneable;
virtual void SetParameter(const std::string& key, const std::string& value);
virtual void ReadParameters();
void ParseLine(const std::string &line);
};
}

View File

@ -0,0 +1,238 @@
/*
* FeatureFunctions.cpp
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "FeatureFunctions.h"
#include "StatefulFeatureFunction.h"
#include "../System.h"
#include "../Scores.h"
#include "../MemPool.h"
#include "../TranslationModel/PhraseTable.h"
#include "../TranslationModel/UnknownWordPenalty.h"
#include "../SCFG/TargetPhraseImpl.h"
#include "../SCFG/Word.h"
#include "../PhraseBased/TargetPhraseImpl.h"
#include "util/exception.hh"
using namespace std;
namespace Moses2
{
FeatureFunctions::FeatureFunctions(System &system) :
m_system(system), m_ffStartInd(0)
{
//m_registry.PrintFF();
}
FeatureFunctions::~FeatureFunctions()
{
RemoveAllInColl(m_featureFunctions);
}
void FeatureFunctions::Load()
{
// load, everything but pts
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
FeatureFunction *nonConstFF = const_cast<FeatureFunction*>(ff);
PhraseTable *pt = dynamic_cast<PhraseTable*>(nonConstFF);
if (pt) {
// do nothing. load pt last
}
else {
cerr << "Loading " << nonConstFF->GetName() << endl;
nonConstFF->Load(m_system);
cerr << "Finished loading " << nonConstFF->GetName() << endl;
}
}
// load pt
BOOST_FOREACH(const PhraseTable *pt, m_phraseTables) {
PhraseTable *nonConstPT = const_cast<PhraseTable*>(pt);
cerr << "Loading " << nonConstPT->GetName() << endl;
nonConstPT->Load(m_system);
cerr << "Finished loading " << nonConstPT->GetName() << endl;
}
}
void FeatureFunctions::Create()
{
const Parameter &params = m_system.params;
const PARAM_VEC *ffParams = params.GetParam("feature");
UTIL_THROW_IF2(ffParams == NULL, "Must have [feature] section");
BOOST_FOREACH(const std::string &line, *ffParams){
//cerr << "line=" << line << endl;
FeatureFunction *ff = Create(line);
m_featureFunctions.push_back(ff);
StatefulFeatureFunction *sfff = dynamic_cast<StatefulFeatureFunction*>(ff);
if (sfff) {
sfff->SetStatefulInd(m_statefulFeatureFunctions.size());
m_statefulFeatureFunctions.push_back(sfff);
}
if (ff->HasPhraseTableInd()) {
ff->SetPhraseTableInd(m_withPhraseTableInd.size());
m_withPhraseTableInd.push_back(ff);
}
PhraseTable *pt = dynamic_cast<PhraseTable*>(ff);
if (pt) {
pt->SetPtInd(m_phraseTables.size());
m_phraseTables.push_back(pt);
}
const UnknownWordPenalty *unkWP = dynamic_cast<const UnknownWordPenalty *>(pt);
if (unkWP) {
m_unkWP = unkWP;
}
}
}
FeatureFunction *FeatureFunctions::Create(const std::string &line)
{
vector<string> toks = Tokenize(line);
FeatureFunction *ff = m_registry.Construct(m_ffStartInd, toks[0], line);
UTIL_THROW_IF2(ff == NULL, "Feature function not created");
// name
if (ff->GetName() == "") {
ff->SetName(GetDefaultName(toks[0]));
}
m_ffStartInd += ff->GetNumScores();
return ff;
}
std::string FeatureFunctions::GetDefaultName(const std::string &stub)
{
size_t ind;
boost::unordered_map<std::string, size_t>::iterator iter =
m_defaultNames.find(stub);
if (iter == m_defaultNames.end()) {
m_defaultNames[stub] = 0;
ind = 0;
}
else {
ind = ++(iter->second);
}
return stub + SPrint(ind);
}
const FeatureFunction *FeatureFunctions::FindFeatureFunction(
const std::string &name) const
{
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
if (ff->GetName() == name) {
return ff;
}
}
return NULL;
}
const PhraseTable *FeatureFunctions::GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd)
{
// assume only 1 unk wp
std::vector<const PhraseTable*> tmpVec(m_phraseTables);
std::vector<const PhraseTable*>::iterator iter;
for (iter = tmpVec.begin(); iter != tmpVec.end(); ++iter) {
const PhraseTable *pt = *iter;
if (pt == m_unkWP) {
tmpVec.erase(iter);
break;
}
}
const PhraseTable *pt = tmpVec[ptInd];
return pt;
}
void FeatureFunctions::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const
{
SCORE estimatedScore = 0;
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
Scores& scores = targetPhrase.GetScores();
ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
}
targetPhrase.SetEstimatedScore(estimatedScore);
}
void FeatureFunctions::EvaluateInIsolation(
MemPool &pool,
const System &system,
const Phrase<SCFG::Word> &source,
SCFG::TargetPhraseImpl &targetPhrase) const
{
SCORE estimatedScore = 0;
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions){
Scores& scores = targetPhrase.GetScores();
ff->EvaluateInIsolation(pool, system, source, targetPhrase, scores, estimatedScore);
}
targetPhrase.SetEstimatedScore(estimatedScore);
}
void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool,
const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
{
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
}
}
void FeatureFunctions::EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
const Phrase<SCFG::Word> &sourcePhrase) const
{
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
ff->EvaluateAfterTablePruning(pool, tps, sourcePhrase);
}
}
void FeatureFunctions::EvaluateWhenAppliedBatch(const Batch &batch) const
{
BOOST_FOREACH(const StatefulFeatureFunction *ff, m_statefulFeatureFunctions) {
ff->EvaluateWhenAppliedBatch(m_system, batch);
}
}
void FeatureFunctions::CleanUpAfterSentenceProcessing() const
{
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
ff->CleanUpAfterSentenceProcessing();
}
}
void FeatureFunctions::ShowWeights(const Weights &allWeights)
{
BOOST_FOREACH(const FeatureFunction *ff, m_featureFunctions) {
cout << ff->GetName();
if (ff->IsTuneable()) {
cout << "=";
vector<SCORE> weights = allWeights.GetWeights(*ff);
for (size_t i = 0; i < weights.size(); ++i) {
cout << " " << weights[i];
}
cout << endl;
} else {
cout << " UNTUNEABLE" << endl;
}
}
}
}

View File

@ -0,0 +1,105 @@
/*
* FeatureFunctions.h
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include <string>
#include "../legacy/Parameter.h"
#include "FeatureRegistry.h"
#include "../Phrase.h"
namespace Moses2
{
template<typename WORD>
class TargetPhrase;
class System;
class FeatureFunction;
class StatefulFeatureFunction;
class PhraseTable;
class Manager;
class MemPool;
class PhraseImpl;
class TargetPhrases;
class TargetPhraseImpl;
class Scores;
class Hypothesis;
class UnknownWordPenalty;
class Weights;
namespace SCFG
{
class TargetPhraseImpl;
class TargetPhrases;
class Word;
}
class FeatureFunctions
{
public:
std::vector<const PhraseTable*> m_phraseTables;
FeatureFunctions(System &system);
virtual ~FeatureFunctions();
const std::vector<const FeatureFunction*> &GetFeatureFunctions() const
{ return m_featureFunctions; }
const std::vector<const StatefulFeatureFunction*> &GetStatefulFeatureFunctions() const
{ return m_statefulFeatureFunctions; }
const std::vector<const FeatureFunction*> &GetWithPhraseTableInd() const
{ return m_withPhraseTableInd; }
size_t GetNumScores() const
{ return m_ffStartInd; }
void Create();
void Load();
const FeatureFunction *FindFeatureFunction(const std::string &name) const;
const PhraseTable *GetPhraseTableExcludeUnknownWordPenalty(size_t ptInd);
const UnknownWordPenalty *GetUnknownWordPenalty() const
{ return m_unkWP; }
// the pool here must be the system pool if the rule was loaded during load, or the mgr pool if it was loaded on demand
void EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, TargetPhraseImpl &targetPhrase) const;
void EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<SCFG::Word> &source, SCFG::TargetPhraseImpl &targetPhrase) const;
void EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
const Phrase<Moses2::Word> &sourcePhrase) const;
void EvaluateAfterTablePruning(MemPool &pool, const SCFG::TargetPhrases &tps,
const Phrase<SCFG::Word> &sourcePhrase) const;
void EvaluateWhenAppliedBatch(const Batch &batch) const;
void CleanUpAfterSentenceProcessing() const;
void ShowWeights(const Weights &allWeights);
protected:
std::vector<const FeatureFunction*> m_featureFunctions;
std::vector<const StatefulFeatureFunction*> m_statefulFeatureFunctions;
std::vector<const FeatureFunction*> m_withPhraseTableInd;
const UnknownWordPenalty *m_unkWP;
boost::unordered_map<std::string, size_t> m_defaultNames;
System &m_system;
size_t m_ffStartInd;
FeatureFunction *Create(const std::string &line);
std::string GetDefaultName(const std::string &stub);
FeatureRegistry m_registry;
};
}

View File

@ -0,0 +1,127 @@
#include "FeatureRegistry.h"
#include "../TranslationModel/Memory/PhraseTableMemory.h"
#include "../TranslationModel/CompactPT/PhraseTableCompact.h"
#include "../TranslationModel/ProbingPT/ProbingPT.h"
#include "../TranslationModel/UnknownWordPenalty.h"
#include "../TranslationModel/Transliteration.h"
#include "../LM/KENLM.h"
#include "../LM/KENLMBatch.h"
#include "../LM/LanguageModel.h"
#include "../LM/GPULM.h"
#include "Distortion.h"
#include "LexicalReordering/LexicalReordering.h"
#include "PhrasePenalty.h"
#include "WordPenalty.h"
#include "OSM/OpSequenceModel.h"
#include "SkeletonStatefulFF.h"
#include "SkeletonStatelessFF.h"
using namespace std;
namespace Moses2
{
template<class F>
class DefaultFeatureFactory: public FeatureFactory
{
public:
FeatureFunction *Create(size_t startInd, const std::string &line)
{
return new F(startInd, line);
}
};
////////////////////////////////////////////////////////////////////
class KenFactory: public FeatureFactory
{
public:
FeatureFunction *Create(size_t startInd, const std::string &line)
{
ConstructKenLM(startInd, line);
}
};
////////////////////////////////////////////////////////////////////
FeatureRegistry::FeatureRegistry()
{
// Feature with same name as class
#define MOSES_FNAME(name) Add(#name, new DefaultFeatureFactory< name >());
// Feature with different name than class.
#define MOSES_FNAME2(name, type) Add(name, new DefaultFeatureFactory< type >());
MOSES_FNAME2("PhraseDictionaryCompact", PhraseTableCompact);
MOSES_FNAME2("PhraseDictionaryMemory", PhraseTableMemory);
MOSES_FNAME(ProbingPT);
MOSES_FNAME2("PhraseDictionaryTransliteration", Transliteration);
MOSES_FNAME(UnknownWordPenalty);
Add("KENLM", new KenFactory());
MOSES_FNAME(KENLMBatch);
MOSES_FNAME(GPULM);
MOSES_FNAME(LanguageModel);
MOSES_FNAME(Distortion);
MOSES_FNAME(LexicalReordering);
MOSES_FNAME(PhrasePenalty);
MOSES_FNAME(WordPenalty);
MOSES_FNAME(OpSequenceModel);
MOSES_FNAME(SkeletonStatefulFF);
MOSES_FNAME(SkeletonStatelessFF);
}
FeatureRegistry::~FeatureRegistry()
{
}
void FeatureRegistry::Add(const std::string &name, FeatureFactory *factory)
{
std::pair<std::string, boost::shared_ptr<FeatureFactory> > to_ins(name,
boost::shared_ptr<FeatureFactory>(factory));
if (!registry_.insert(to_ins).second) {
cerr << "Duplicate feature name " << name << endl;
abort();
}
}
FeatureFunction *FeatureRegistry::Construct(size_t startInd,
const std::string &name, const std::string &line)
{
Map::iterator i = registry_.find(name);
if (i == registry_.end()) {
cerr << "Feature name " << name << " is not registered.";
abort();
}
FeatureFactory *fact = i->second.get();
FeatureFunction *ff = fact->Create(startInd, line);
return ff;
}
void FeatureRegistry::PrintFF() const
{
std::vector<std::string> ffs;
std::cerr << "Available feature functions:" << std::endl;
Map::const_iterator iter;
for (iter = registry_.begin(); iter != registry_.end(); ++iter) {
const std::string &ffName = iter->first;
ffs.push_back(ffName);
}
std::vector<std::string>::const_iterator iterVec;
std::sort(ffs.begin(), ffs.end());
for (iterVec = ffs.begin(); iterVec != ffs.end(); ++iterVec) {
const std::string &ffName = *iterVec;
std::cerr << ffName << " ";
}
std::cerr << std::endl;
}
}

View File

@ -0,0 +1,48 @@
#pragma once
#include <boost/unordered_map.hpp>
#include <boost/shared_ptr.hpp>
namespace Moses2
{
class FeatureFunction;
////////////////////////////////////////////////////////////////////
class FeatureFactory
{
public:
virtual ~FeatureFactory()
{
}
virtual FeatureFunction *Create(size_t startInd, const std::string &line) = 0;
protected:
FeatureFactory()
{
}
};
////////////////////////////////////////////////////////////////////
class FeatureRegistry
{
public:
FeatureRegistry();
~FeatureRegistry();
FeatureFunction *Construct(size_t startInd, const std::string &name,
const std::string &line);
void PrintFF() const;
private:
void Add(const std::string &name, FeatureFactory *factory);
typedef boost::unordered_map<std::string, boost::shared_ptr<FeatureFactory> > Map;
Map registry_;
};
////////////////////////////////////////////////////////////////////
}

View File

@ -0,0 +1,79 @@
/*
* BidirectionalReorderingState.cpp
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#include <boost/functional/hash_fwd.hpp>
#include "BidirectionalReorderingState.h"
#include "../../legacy/Util2.h"
#include "../../PhraseBased/Manager.h"
using namespace std;
namespace Moses2
{
BidirectionalReorderingState::BidirectionalReorderingState(
const LRModel &config, LRState *bw, LRState *fw, size_t offset) :
LRState(config, LRModel::Bidirectional, offset), m_backward(bw), m_forward(
fw)
{
}
BidirectionalReorderingState::~BidirectionalReorderingState()
{
// TODO Auto-generated destructor stub
}
void BidirectionalReorderingState::Init(const LRState *prev,
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
const Bitmap *coverage)
{
if (m_backward) {
m_backward->Init(prev, topt, path, first, coverage);
}
if (m_forward) {
m_forward->Init(prev, topt, path, first, coverage);
}
}
std::string BidirectionalReorderingState::ToString() const
{
return "BidirectionalReorderingState " + SPrint(this) + " "
+ SPrint(m_backward) + " " + SPrint(m_forward);
}
size_t BidirectionalReorderingState::hash() const
{
size_t ret = m_backward->hash();
boost::hash_combine(ret, m_forward->hash());
return ret;
}
bool BidirectionalReorderingState::operator==(const FFState& o) const
{
if (&o == this) return 0;
BidirectionalReorderingState const &other =
static_cast<BidirectionalReorderingState const&>(o);
bool ret = (*m_backward == *other.m_backward)
&& (*m_forward == *other.m_forward);
return ret;
}
void BidirectionalReorderingState::Expand(const ManagerBase &mgr,
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
Scores &scores, FFState &state) const
{
BidirectionalReorderingState &stateCast =
static_cast<BidirectionalReorderingState&>(state);
m_backward->Expand(mgr, ff, hypo, phraseTableInd, scores,
*stateCast.m_backward);
m_forward->Expand(mgr, ff, hypo, phraseTableInd, scores,
*stateCast.m_forward);
}
} /* namespace Moses2 */

View File

@ -0,0 +1,40 @@
/*
* BidirectionalReorderingState.h
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#pragma once
#include "LRState.h"
namespace Moses2
{
class BidirectionalReorderingState: public LRState
{
public:
BidirectionalReorderingState(const LRModel &config, LRState *bw, LRState *fw,
size_t offset);
virtual ~BidirectionalReorderingState();
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
const InputPathBase &path, bool first, const Bitmap *coverage);
size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual std::string ToString() const;
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
FFState &state) const;
protected:
LRState *m_backward;
LRState *m_forward;
};
} /* namespace Moses2 */

View File

@ -0,0 +1,71 @@
/*
* HReorderingBackwardState.cpp
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#include "HReorderingBackwardState.h"
#include "../../PhraseBased/Hypothesis.h"
#include "../../PhraseBased/Manager.h"
namespace Moses2
{
HReorderingBackwardState::HReorderingBackwardState(MemPool &pool,
const LRModel &config, size_t offset) :
LRState(config, LRModel::Backward, offset), reoStack(pool)
{
// TODO Auto-generated constructor stub
}
HReorderingBackwardState::~HReorderingBackwardState()
{
// TODO Auto-generated destructor stub
}
void HReorderingBackwardState::Init(const LRState *prev,
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
const Bitmap *coverage)
{
prevTP = &topt;
reoStack.Init();
}
size_t HReorderingBackwardState::hash() const
{
size_t ret = reoStack.hash();
return ret;
}
bool HReorderingBackwardState::operator==(const FFState& o) const
{
const HReorderingBackwardState& other =
static_cast<const HReorderingBackwardState&>(o);
bool ret = reoStack == other.reoStack;
return ret;
}
std::string HReorderingBackwardState::ToString() const
{
return "HReorderingBackwardState " + SPrint(m_offset);
}
void HReorderingBackwardState::Expand(const ManagerBase &mgr,
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
Scores &scores, FFState &state) const
{
HReorderingBackwardState &nextState =
static_cast<HReorderingBackwardState&>(state);
nextState.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
NULL);
nextState.reoStack = reoStack;
const Range &swrange = hypo.GetInputPath().range;
int reoDistance = nextState.reoStack.ShiftReduce(swrange);
ReorderingType reoType = m_configuration.GetOrientation(reoDistance);
CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
}
} /* namespace Moses2 */

View File

@ -0,0 +1,37 @@
/*
* HReorderingBackwardState.h
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#pragma once
#include "LRState.h"
#include "ReorderingStack.h"
namespace Moses2
{
class HReorderingBackwardState: public LRState
{
private:
ReorderingStack reoStack;
public:
HReorderingBackwardState(MemPool &pool, const LRModel &config, size_t offset);
virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
const InputPathBase &path, bool first, const Bitmap *coverage);
virtual ~HReorderingBackwardState();
size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual std::string ToString() const;
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
FFState &state) const;
};
} /* namespace Moses2 */

View File

@ -0,0 +1,87 @@
/*
* HReorderingForwardState.cpp
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#include "HReorderingForwardState.h"
#include "../../InputPathBase.h"
#include "../../PhraseBased/Manager.h"
#include "../../PhraseBased/Hypothesis.h"
namespace Moses2
{
HReorderingForwardState::HReorderingForwardState(const LRModel &config,
size_t offset) :
LRState(config, LRModel::Forward, offset), m_first(true)
{
prevPath = NULL;
m_coverage = NULL;
}
HReorderingForwardState::~HReorderingForwardState()
{
// TODO Auto-generated destructor stub
}
void HReorderingForwardState::Init(const LRState *prev,
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
const Bitmap *coverage)
{
prevTP = &topt;
prevPath = &path;
m_first = first;
m_coverage = coverage;
}
size_t HReorderingForwardState::hash() const
{
size_t ret;
ret = hash_value(prevPath->range);
return ret;
}
bool HReorderingForwardState::operator==(const FFState& o) const
{
if (&o == this) return true;
HReorderingForwardState const& other =
static_cast<HReorderingForwardState const&>(o);
int compareScores = (
(prevPath->range == other.prevPath->range) ?
ComparePrevScores(other.prevTP) :
(prevPath->range < other.prevPath->range) ? -1 : 1);
return compareScores == 0;
}
std::string HReorderingForwardState::ToString() const
{
return "HReorderingForwardState " + SPrint(m_offset);
}
void HReorderingForwardState::Expand(const ManagerBase &mgr,
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
Scores &scores, FFState &state) const
{
const Range &cur = hypo.GetInputPath().range;
// keep track of the current coverage ourselves so we don't need the hypothesis
Manager &mgrCast = const_cast<Manager&>(static_cast<const Manager&>(mgr));
Bitmaps &bms = mgrCast.GetBitmaps();
const Bitmap &cov = bms.GetBitmap(*m_coverage, cur);
if (!m_first) {
LRModel::ReorderingType reoType;
reoType = m_configuration.GetOrientation(prevPath->range, cur, cov);
CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
}
HReorderingForwardState &stateCast =
static_cast<HReorderingForwardState&>(state);
stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
&cov);
}
} /* namespace Moses2 */

View File

@ -0,0 +1,41 @@
/*
* HReorderingForwardState.h
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#pragma once
#include "LRState.h"
namespace Moses2
{
class Range;
class Bitmap;
class InputPathBase;
class HReorderingForwardState: public LRState
{
public:
HReorderingForwardState(const LRModel &config, size_t offset);
virtual ~HReorderingForwardState();
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
const InputPathBase &path, bool first, const Bitmap *coverage);
size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual std::string ToString() const;
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
FFState &state) const;
protected:
bool m_first;
//const Range &m_prevRange;
const InputPathBase *prevPath;
const Bitmap *m_coverage;
};
} /* namespace Moses2 */

View File

@ -0,0 +1,209 @@
/*
* LRModel.cpp
*
* Created on: 23 Mar 2016
* Author: hieu
*/
#include "LRModel.h"
#include "../../legacy/Util2.h"
#include "../../legacy/Range.h"
#include "../../legacy/Bitmap.h"
#include "../../MemPool.h"
#include "util/exception.hh"
#include "PhraseBasedReorderingState.h"
#include "BidirectionalReorderingState.h"
#include "HReorderingBackwardState.h"
#include "HReorderingForwardState.h"
using namespace std;
namespace Moses2
{
bool IsMonotonicStep(Range const& prev, // words range of last source phrase
Range const& cur, // words range of current source phrase
Bitmap const& cov) // coverage bitmap
{
size_t e = prev.GetEndPos() + 1;
size_t s = cur.GetStartPos();
return (s == e || (s >= e && !cov.GetValue(e)));
}
bool IsSwap(Range const& prev, Range const& cur, Bitmap const& cov)
{
size_t s = prev.GetStartPos();
size_t e = cur.GetEndPos();
return (e + 1 == s || (e < s && !cov.GetValue(s - 1)));
}
LRModel::LRModel(const std::string &modelType, LexicalReordering &ff) :
m_modelType(None), m_phraseBased(true), m_collapseScores(false), m_direction(
Backward), m_scoreProducer(&ff)
{
std::vector<std::string> config = Tokenize(modelType, "-");
for (size_t i = 0; i < config.size(); ++i) {
if (config[i] == "hier") {
m_phraseBased = false;
}
else if (config[i] == "phrase") {
m_phraseBased = true;
}
else if (config[i] == "wbe") {
m_phraseBased = true;
}
// no word-based decoding available, fall-back to phrase-based
// This is the old lexical reordering model combination of moses
else if (config[i] == "msd") {
m_modelType = MSD;
}
else if (config[i] == "mslr") {
m_modelType = MSLR;
}
else if (config[i] == "monotonicity") {
m_modelType = Monotonic;
}
else if (config[i] == "leftright") {
m_modelType = LeftRight;
}
// unidirectional is deprecated, use backward instead
else if (config[i] == "unidirectional") {
m_direction = Backward;
}
else if (config[i] == "backward") {
m_direction = Backward;
}
else if (config[i] == "forward") {
m_direction = Forward;
}
else if (config[i] == "bidirectional") {
m_direction = Bidirectional;
}
else if (config[i] == "f") {
m_condition = F;
}
else if (config[i] == "fe") {
m_condition = FE;
}
else if (config[i] == "collapseff") {
m_collapseScores = true;
}
else if (config[i] == "allff") {
m_collapseScores = false;
}
else {
std::cerr
<< "Illegal part in the lexical reordering configuration string: "
<< config[i] << std::endl;
exit(1);
}
}
if (m_modelType == None) {
std::cerr << "You need to specify the type of the reordering model "
<< "(msd, monotonicity,...)" << std::endl;
exit(1);
}
}
LRModel::~LRModel()
{
// TODO Auto-generated destructor stub
}
size_t LRModel::GetNumberOfTypes() const
{
return ((m_modelType == MSD) ? 3 : (m_modelType == MSLR) ? 4 : 2);
}
/// return orientation for the first phrase
LRModel::ReorderingType LRModel::GetOrientation(Range const& cur) const
{
UTIL_THROW_IF2(m_modelType == None, "Reordering Model Type is None");
return ((m_modelType == LeftRight) ? R : (cur.GetStartPos() == 0) ? M :
(m_modelType == MSD) ? D : (m_modelType == MSLR) ? DR : NM);
}
LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
Range const& cur) const
{
UTIL_THROW_IF2(m_modelType == None, "No reordering model type specified");
return (
(m_modelType == LeftRight) ? prev.GetEndPos() <= cur.GetStartPos() ? R : L
: (cur.GetStartPos() == prev.GetEndPos() + 1) ? M :
(m_modelType == Monotonic) ? NM :
(prev.GetStartPos() == cur.GetEndPos() + 1) ? S :
(m_modelType == MSD) ? D :
(cur.GetStartPos() > prev.GetEndPos()) ? DR : DL);
}
LRModel::ReorderingType LRModel::GetOrientation(int const reoDistance) const
{
// this one is for HierarchicalReorderingBackwardState
return ((m_modelType == LeftRight) ? (reoDistance >= 1) ? R : L
: (reoDistance == 1) ? M : (m_modelType == Monotonic) ? NM :
(reoDistance == -1) ? S : (m_modelType == MSD) ? D :
(reoDistance > 1) ? DR : DL);
}
LRState *LRModel::CreateLRState(MemPool &pool) const
{
LRState *bwd = NULL, *fwd = NULL;
size_t offset = 0;
switch (m_direction) {
case Backward:
case Bidirectional:
if (m_phraseBased) {
bwd =
new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
*this, Backward, offset);
//cerr << "bwd=" << bwd << bwd->ToString() << endl;
}
else {
bwd =
new (pool.Allocate<HReorderingBackwardState>()) HReorderingBackwardState(
pool, *this, offset);
}
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Backward) return bwd; // else fall through
case Forward:
if (m_phraseBased) {
fwd =
new (pool.Allocate<PhraseBasedReorderingState>()) PhraseBasedReorderingState(
*this, Forward, offset);
//cerr << "fwd=" << fwd << fwd->ToString() << endl;
}
else {
fwd =
new (pool.Allocate<HReorderingForwardState>()) HReorderingForwardState(
*this, offset);
}
offset += m_collapseScores ? 1 : GetNumberOfTypes();
if (m_direction == Forward) return fwd;
}
//cerr << "LRStates:" << *bwd << endl << *fwd << endl;
BidirectionalReorderingState *ret =
new (pool.Allocate<BidirectionalReorderingState>()) BidirectionalReorderingState(
*this, bwd, fwd, 0);
return ret;
}
LRModel::ReorderingType LRModel::GetOrientation(Range const& prev,
Range const& cur, Bitmap const& cov) const
{
return (
(m_modelType == LeftRight) ? cur.GetStartPos() > prev.GetEndPos() ? R : L
: IsMonotonicStep(prev, cur, cov) ? M : (m_modelType == Monotonic) ? NM :
IsSwap(prev, cur, cov) ? S : (m_modelType == MSD) ? D :
cur.GetStartPos() > prev.GetEndPos() ? DR : DL);
}
} /* namespace Moses2 */

View File

@ -0,0 +1,109 @@
/*
* LRModel.h
*
* Created on: 23 Mar 2016
* Author: hieu
*/
#pragma once
#include <string>
namespace Moses2
{
class MemPool;
class Range;
class Bitmap;
class LRState;
class LexicalReordering;
class LRModel
{
public:
enum ModelType
{
Monotonic, MSD, MSLR, LeftRight, None
};
enum Direction
{
Forward, Backward, Bidirectional
};
enum Condition
{
F, E, FE
};
enum ReorderingType
{
M = 0, // monotonic
NM = 1, // non-monotonic
S = 1, // swap
D = 2, // discontinuous
DL = 2, // discontinuous, left
DR = 3, // discontinuous, right
R = 0, // right
L = 1, // left
MAX = 3, // largest possible
NONE = 4 // largest possible
};
LRModel(const std::string &modelType, LexicalReordering &ff);
virtual ~LRModel();
ModelType GetModelType() const
{
return m_modelType;
}
Direction GetDirection() const
{
return m_direction;
}
Condition GetCondition() const
{
return m_condition;
}
bool IsPhraseBased() const
{
return m_phraseBased;
}
bool CollapseScores() const
{
return m_collapseScores;
}
size_t GetNumberOfTypes() const;
LexicalReordering*
GetScoreProducer() const
{
return m_scoreProducer;
}
LRState *CreateLRState(MemPool &pool) const;
ReorderingType // for first phrase in phrase-based
GetOrientation(Range const& cur) const;
ReorderingType // for non-first phrases in phrase-based
GetOrientation(Range const& prev, Range const& cur) const;
ReorderingType // for HReorderingForwardState
GetOrientation(Range const& prev, Range const& cur, Bitmap const& cov) const;
ReorderingType // for HReorderingBackwarddState
GetOrientation(int const reoDistance) const;
protected:
ModelType m_modelType;
bool m_phraseBased;
bool m_collapseScores;
Direction m_direction;
Condition m_condition;
LexicalReordering *m_scoreProducer;
};
} /* namespace Moses2 */

View File

@ -0,0 +1,93 @@
/*
* LRState.cpp
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#include "LRState.h"
#include "LexicalReordering.h"
#include "../../Scores.h"
#include "../../TargetPhrase.h"
using namespace std;
namespace Moses2
{
class InputType;
LRState::LRState(const LRModel &config, LRModel::Direction dir, size_t offset) :
m_configuration(config), m_direction(dir), m_offset(offset)
{
}
int LRState::ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const
{
LexicalReordering* producer = m_configuration.GetScoreProducer();
size_t phraseTableInd = producer->GetPhraseTableInd();
const SCORE *myScores = (const SCORE*) prevTP->ffData[phraseTableInd]; //producer->
const SCORE *yrScores = (const SCORE*) other->ffData[phraseTableInd]; //producer->
if (myScores == yrScores) return 0;
// The pointers are NULL if a phrase pair isn't found in the reordering table.
if (yrScores == NULL) return -1;
if (myScores == NULL) return 1;
size_t stop = m_offset + m_configuration.GetNumberOfTypes();
for (size_t i = m_offset; i < stop; i++) {
if ((myScores)[i] < (yrScores)[i]) return -1;
if ((myScores)[i] > (yrScores)[i]) return 1;
}
return 0;
}
void LRState::CopyScores(const System &system, Scores &accum,
const TargetPhrase<Moses2::Word> &topt, ReorderingType reoType) const
{
// don't call this on a bidirectional object
UTIL_THROW_IF2(
m_direction != LRModel::Backward && m_direction != LRModel::Forward,
"Unknown direction: " << m_direction);
TargetPhrase<Moses2::Word> const* relevantOpt = (
(m_direction == LRModel::Backward) ? &topt : prevTP);
LexicalReordering* producer = m_configuration.GetScoreProducer();
size_t phraseTableInd = producer->GetPhraseTableInd();
const SCORE *cached = (const SCORE*) relevantOpt->ffData[phraseTableInd]; //producer->
if (cached == NULL) {
return;
}
size_t off_remote = m_offset + reoType;
size_t off_local = m_configuration.CollapseScores() ? m_offset : off_remote;
UTIL_THROW_IF2(off_local >= producer->GetNumScores(),
"offset out of vector bounds!");
// look up applicable score from vector of scores
//UTIL_THROW_IF2(off_remote >= cached->size(), "offset out of vector bounds!");
//Scores scores(producer->GetNumScoreComponents(),0);
SCORE score = cached[off_remote];
accum.PlusEquals(system, *producer, score, off_local);
// else: use default scores (if specified)
/*
else if (producer->GetHaveDefaultScores()) {
Scores scores(producer->GetNumScoreComponents(),0);
scores[off_local] = producer->GetDefaultScore(off_remote);
accum->PlusEquals(m_configuration.GetScoreProducer(), scores);
}
*/
// note: if no default score, no cost
/*
const SparseReordering* sparse = m_configuration.GetSparseReordering();
if (sparse) sparse->CopyScores(*relevantOpt, m_prevOption, input, reoType,
m_direction, accum);
*/
}
}

View File

@ -0,0 +1,48 @@
#pragma once
#include "../FFState.h"
#include "LRModel.h"
namespace Moses2
{
template<typename WORD>
class TargetPhrase;
class LexicalReordering;
class Hypothesis;
class System;
class Scores;
class Bitmap;
class ManagerBase;
class InputType;
class InputPathBase;
class Word;
class LRState: public FFState
{
public:
typedef LRModel::ReorderingType ReorderingType;
const TargetPhrase<Moses2::Word> *prevTP;
LRState(const LRModel &config, LRModel::Direction dir, size_t offset);
virtual void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
const InputPathBase &path, bool first, const Bitmap *coverage) = 0;
virtual void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
FFState &state) const = 0;
void CopyScores(const System &system, Scores &accum, const TargetPhrase<Moses2::Word> &topt,
ReorderingType reoType) const;
protected:
const LRModel& m_configuration;
LRModel::Direction m_direction;
size_t m_offset;
int
ComparePrevScores(const TargetPhrase<Moses2::Word> *other) const;
};
}

View File

@ -0,0 +1,222 @@
/*
* LexicalReordering.cpp
*
* Created on: 15 Dec 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "util/exception.hh"
#include "LexicalReordering.h"
#include "LRModel.h"
#include "PhraseBasedReorderingState.h"
#include "BidirectionalReorderingState.h"
#include "../../TranslationModel/PhraseTable.h"
#include "../../TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
#include "../../System.h"
#include "../../PhraseBased/PhraseImpl.h"
#include "../../PhraseBased/Manager.h"
#include "../../PhraseBased/Hypothesis.h"
#include "../../PhraseBased/TargetPhrases.h"
#include "../../PhraseBased/TargetPhraseImpl.h"
#include "../../legacy/InputFileStream.h"
#include "../../legacy/Util2.h"
using namespace std;
namespace Moses2
{
///////////////////////////////////////////////////////////////////////
LexicalReordering::LexicalReordering(size_t startInd, const std::string &line) :
StatefulFeatureFunction(startInd, line), m_compactModel(NULL), m_blank(
NULL), m_propertyInd(-1), m_coll(NULL), m_configuration(NULL)
{
ReadParameters();
assert(m_configuration);
//assert(m_numScores == 6);
}
LexicalReordering::~LexicalReordering()
{
delete m_compactModel;
delete m_coll;
delete m_configuration;
}
void LexicalReordering::Load(System &system)
{
MemPool &pool = system.GetSystemPool();
if (m_propertyInd >= 0) {
// Using integrate Lex RO. No loading needed
}
else if (FileExists(m_path + ".minlexr")) {
m_compactModel = new LexicalReorderingTableCompact(m_path + ".minlexr",
m_FactorsF, m_FactorsE, m_FactorsC);
m_blank = new (pool.Allocate<PhraseImpl>()) PhraseImpl(pool, 0);
}
else {
m_coll = new Coll();
InputFileStream file(m_path);
string line;
size_t lineNum = 0;
while (getline(file, line)) {
if (++lineNum % 1000000 == 0) {
cerr << lineNum << " ";
}
std::vector<std::string> toks = TokenizeMultiCharSeparator(line, "|||");
assert(toks.size() == 3);
PhraseImpl *source = PhraseImpl::CreateFromString(pool, system.GetVocab(),
system, toks[0]);
PhraseImpl *target = PhraseImpl::CreateFromString(pool, system.GetVocab(),
system, toks[1]);
std::vector<SCORE> scores = Tokenize<SCORE>(toks[2]);
std::transform(scores.begin(), scores.end(), scores.begin(),
TransformScore);
std::transform(scores.begin(), scores.end(), scores.begin(), FloorScore);
Key key(source, target);
(*m_coll)[key] = scores;
}
}
}
void LexicalReordering::SetParameter(const std::string& key,
const std::string& value)
{
if (key == "path") {
m_path = value;
}
else if (key == "type") {
m_configuration = new LRModel(value, *this);
}
else if (key == "input-factor") {
m_FactorsF = Tokenize<FactorType>(value);
}
else if (key == "output-factor") {
m_FactorsE = Tokenize<FactorType>(value);
}
else if (key == "property-index") {
m_propertyInd = Scan<int>(value);
}
else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
FFState* LexicalReordering::BlankState(MemPool &pool, const System &sys) const
{
FFState *ret = m_configuration->CreateLRState(pool);
return ret;
}
void LexicalReordering::EmptyHypothesisState(FFState &state,
const ManagerBase &mgr, const InputType &input,
const Hypothesis &hypo) const
{
BidirectionalReorderingState &stateCast =
static_cast<BidirectionalReorderingState&>(state);
stateCast.Init(NULL, hypo.GetTargetPhrase(), hypo.GetInputPath(), true,
&hypo.GetBitmap());
}
void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void LexicalReordering::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
UTIL_THROW2("Don't use with SCFG models");
}
void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
const TargetPhrases &tps, const Phrase<Moses2::Word> &sourcePhrase) const
{
BOOST_FOREACH(const TargetPhraseImpl *tp, tps){
EvaluateAfterTablePruning(pool, *tp, sourcePhrase);
}
}
void LexicalReordering::EvaluateAfterTablePruning(MemPool &pool,
const TargetPhraseImpl &targetPhrase, const Phrase<Moses2::Word> &sourcePhrase) const
{
if (m_propertyInd >= 0) {
SCORE *scoreArr = targetPhrase.GetScoresProperty(m_propertyInd);
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
}
else if (m_compactModel) {
// using external compact binary model
const Values values = m_compactModel->GetScore(sourcePhrase, targetPhrase,
*m_blank);
if (values.size()) {
assert(values.size() == m_numScores);
SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
for (size_t i = 0; i < m_numScores; ++i) {
scoreArr[i] = values[i];
}
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
}
else {
targetPhrase.ffData[m_PhraseTableInd] = NULL;
}
}
else if (m_coll) {
// using external memory model
// cache data in target phrase
const Values *values = GetValues(sourcePhrase, targetPhrase);
assert(values->size() == m_numScores);
if (values) {
SCORE *scoreArr = pool.Allocate<SCORE>(m_numScores);
for (size_t i = 0; i < m_numScores; ++i) {
scoreArr[i] = (*values)[i];
}
targetPhrase.ffData[m_PhraseTableInd] = scoreArr;
}
else {
targetPhrase.ffData[m_PhraseTableInd] = NULL;
}
}
}
void LexicalReordering::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
const LRState &prevStateCast = static_cast<const LRState&>(prevState);
prevStateCast.Expand(mgr, *this, hypo, m_PhraseTableInd, scores, state);
}
const LexicalReordering::Values *LexicalReordering::GetValues(
const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const
{
Key key(&source, &target);
Coll::const_iterator iter;
iter = m_coll->find(key);
if (iter == m_coll->end()) {
return NULL;
}
else {
return &iter->second;
}
}
void LexicalReordering::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
} /* namespace Moses2 */

View File

@ -0,0 +1,116 @@
/*
* LexicalReordering.h
*
* Created on: 15 Dec 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include <boost/unordered_map.hpp>
#include "../StatefulFeatureFunction.h"
#include "../../TypeDef.h"
#include "../../Phrase.h"
#include "../../legacy/Range.h"
namespace Moses2
{
class LexicalReorderingTableCompact;
class LRModel;
class TargetPhraseImpl;
class LexicalReordering: public StatefulFeatureFunction
{
public:
LexicalReordering(size_t startInd, const std::string &line);
virtual ~LexicalReordering();
virtual void Load(System &system);
virtual void SetParameter(const std::string& key, const std::string& value);
virtual size_t HasPhraseTableInd() const
{
return true;
}
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateAfterTablePruning(MemPool &pool, const TargetPhrases &tps,
const Phrase<Moses2::Word> &sourcePhrase) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
protected:
std::string m_path;
FactorList m_FactorsF;
FactorList m_FactorsE;
FactorList m_FactorsC;
LRModel *m_configuration;
virtual void
EvaluateAfterTablePruning(MemPool &pool, const TargetPhraseImpl &targetPhrase,
const Phrase<Moses2::Word> &sourcePhrase) const;
// PROPERTY IN PT
int m_propertyInd;
// COMPACT MODEL
LexicalReorderingTableCompact *m_compactModel;
Phrase<Moses2::Word> *m_blank;
// MEMORY MODEL
typedef std::pair<const Phrase<Moses2::Word>*, const Phrase<Moses2::Word>* > Key;
typedef std::vector<SCORE> Values;
struct KeyComparer
{
size_t operator()(const Key &obj) const
{
size_t seed = obj.first->hash();
boost::hash_combine(seed, obj.second->hash());
return seed;
}
bool operator()(const Key& a, const Key& b) const
{
if ((*a.first) != (*b.first)) {
return false;
}
if ((*a.second) != (*b.second)) {
return false;
}
return true;
}
};
typedef boost::unordered_map<Key, Values, KeyComparer, KeyComparer> Coll;
Coll *m_coll;
const Values *GetValues(const Phrase<Moses2::Word> &source, const Phrase<Moses2::Word> &target) const;
};
} /* namespace Moses2 */

View File

@ -0,0 +1,86 @@
/*
* PhraseLR.cpp
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#include "PhraseBasedReorderingState.h"
#include "LexicalReordering.h"
#include "../../PhraseBased/Hypothesis.h"
#include "../../InputPathBase.h"
#include "../../PhraseBased/Manager.h"
using namespace std;
namespace Moses2
{
PhraseBasedReorderingState::PhraseBasedReorderingState(const LRModel &config,
LRModel::Direction dir, size_t offset) :
LRState(config, dir, offset)
{
// uninitialised
prevPath = NULL;
prevTP = NULL;
}
void PhraseBasedReorderingState::Init(const LRState *prev,
const TargetPhrase<Moses2::Word> &topt, const InputPathBase &path, bool first,
const Bitmap *coverage)
{
prevTP = &topt;
prevPath = &path;
m_first = first;
}
size_t PhraseBasedReorderingState::hash() const
{
size_t ret;
ret = (size_t) &prevPath->range;
boost::hash_combine(ret, m_direction);
return ret;
}
bool PhraseBasedReorderingState::operator==(const FFState& o) const
{
if (&o == this) return true;
const PhraseBasedReorderingState &other =
static_cast<const PhraseBasedReorderingState&>(o);
if (&prevPath->range == &other.prevPath->range) {
if (m_direction == LRModel::Forward) {
int compareScore = ComparePrevScores(other.prevTP);
return compareScore == 0;
}
else {
return true;
}
}
else {
return false;
}
}
void PhraseBasedReorderingState::Expand(const ManagerBase &mgr,
const LexicalReordering &ff, const Hypothesis &hypo, size_t phraseTableInd,
Scores &scores, FFState &state) const
{
if ((m_direction != LRModel::Forward) || !m_first) {
LRModel const& lrmodel = m_configuration;
Range const &cur = hypo.GetInputPath().range;
LRModel::ReorderingType reoType = (
m_first ?
lrmodel.GetOrientation(cur) :
lrmodel.GetOrientation(prevPath->range, cur));
CopyScores(mgr.system, scores, hypo.GetTargetPhrase(), reoType);
}
PhraseBasedReorderingState &stateCast =
static_cast<PhraseBasedReorderingState&>(state);
stateCast.Init(this, hypo.GetTargetPhrase(), hypo.GetInputPath(), false,
NULL);
}
} /* namespace Moses2 */

View File

@ -0,0 +1,45 @@
/*
* PhraseLR.h
*
* Created on: 22 Mar 2016
* Author: hieu
*/
#pragma once
#include "LRState.h"
namespace Moses2
{
class InputPathBase;
class PhraseBasedReorderingState: public LRState
{
public:
const InputPathBase *prevPath;
bool m_first;
PhraseBasedReorderingState(const LRModel &config, LRModel::Direction dir,
size_t offset);
void Init(const LRState *prev, const TargetPhrase<Moses2::Word> &topt,
const InputPathBase &path, bool first, const Bitmap *coverage);
size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual std::string ToString() const
{
return "PhraseBasedReorderingState";
}
void Expand(const ManagerBase &mgr, const LexicalReordering &ff,
const Hypothesis &hypo, size_t phraseTableInd, Scores &scores,
FFState &state) const;
protected:
};
} /* namespace Moses2 */

View File

@ -0,0 +1,104 @@
/*
* ReorderingStack.cpp
** Author: Ankit K. Srivastava
** Date: Jan 26, 2010
*/
#include <vector>
#include "ReorderingStack.h"
#include "../../MemPool.h"
namespace Moses2
{
ReorderingStack::ReorderingStack(MemPool &pool) :
m_stack(pool)
{
}
void ReorderingStack::Init()
{
m_stack.clear();
}
size_t ReorderingStack::hash() const
{
std::size_t ret = boost::hash_range(m_stack.begin(), m_stack.end());
return ret;
}
bool ReorderingStack::operator==(const ReorderingStack& o) const
{
const ReorderingStack& other = static_cast<const ReorderingStack&>(o);
return m_stack == other.m_stack;
}
// Method to push (shift element into the stack and reduce if reqd)
int ReorderingStack::ShiftReduce(const Range &input_span)
{
int distance; // value to return: the initial distance between this and previous span
// stack is empty
if (m_stack.empty()) {
m_stack.push_back(input_span);
return input_span.GetStartPos() + 1; // - (-1)
}
// stack is non-empty
Range prev_span = m_stack.back(); //access last element added
//calculate the distance we are returning
if (input_span.GetStartPos() > prev_span.GetStartPos()) {
distance = input_span.GetStartPos() - prev_span.GetEndPos();
}
else {
distance = input_span.GetEndPos() - prev_span.GetStartPos();
}
if (distance == 1) { //monotone
m_stack.pop_back();
Range new_span(prev_span.GetStartPos(), input_span.GetEndPos());
Reduce(new_span);
}
else if (distance == -1) { //swap
m_stack.pop_back();
Range new_span(input_span.GetStartPos(), prev_span.GetEndPos());
Reduce(new_span);
}
else { // discontinuous
m_stack.push_back(input_span);
}
return distance;
}
// Method to reduce, if possible the spans
void ReorderingStack::Reduce(Range current)
{
bool cont_loop = true;
while (cont_loop && m_stack.size() > 0) {
Range previous = m_stack.back();
if (current.GetStartPos() - previous.GetEndPos() == 1) { //mono&merge
m_stack.pop_back();
Range t(previous.GetStartPos(), current.GetEndPos());
current = t;
}
else if (previous.GetStartPos() - current.GetEndPos() == 1) { //swap&merge
m_stack.pop_back();
Range t(current.GetStartPos(), previous.GetEndPos());
current = t;
}
else { // discontinuous, no more merging
cont_loop = false;
}
} // finished reducing, exit
// add to stack
m_stack.push_back(current);
}
}

View File

@ -0,0 +1,41 @@
/*
* ReorderingStack.h
** Author: Ankit K. Srivastava
** Date: Jan 26, 2010
*/
#pragma once
//#include <string>
#include <vector>
//#include "Factor.h"
//#include "Phrase.h"
//#include "TypeDef.h"
//#include "Util.h"
#include "../../legacy/Range.h"
#include "../../Vector.h"
namespace Moses2
{
class MemPool;
class ReorderingStack
{
private:
Vector<Range> m_stack;
public:
ReorderingStack(MemPool &pool);
size_t hash() const;
bool operator==(const ReorderingStack& other) const;
void Init();
int ShiftReduce(const Range &input_span);
private:
void Reduce(Range input_span);
};
}

View File

@ -0,0 +1,33 @@
#include "KenOSM.h"
namespace Moses2
{
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method)
{
lm::ngram::ModelType model_type;
lm::ngram::Config config;
config.load_method = load_method;
if (lm::ngram::RecognizeBinary(file, model_type)) {
switch(model_type) {
case lm::ngram::PROBING:
return new KenOSM<lm::ngram::ProbingModel>(file, config);
case lm::ngram::REST_PROBING:
return new KenOSM<lm::ngram::RestProbingModel>(file, config);
case lm::ngram::TRIE:
return new KenOSM<lm::ngram::TrieModel>(file, config);
case lm::ngram::QUANT_TRIE:
return new KenOSM<lm::ngram::QuantTrieModel>(file, config);
case lm::ngram::ARRAY_TRIE:
return new KenOSM<lm::ngram::ArrayTrieModel>(file, config);
case lm::ngram::QUANT_ARRAY_TRIE:
return new KenOSM<lm::ngram::QuantArrayTrieModel>(file, config);
default:
UTIL_THROW2("Unrecognized kenlm model type " << model_type);
}
} else {
return new KenOSM<lm::ngram::ProbingModel>(file, config);
}
}
} // namespace

View File

@ -0,0 +1,53 @@
#pragma once
#include <string>
#include "lm/model.hh"
namespace Moses2
{
class KenOSMBase
{
public:
virtual ~KenOSMBase() {}
virtual float Score(const lm::ngram::State&, StringPiece,
lm::ngram::State&) const = 0;
virtual const lm::ngram::State &BeginSentenceState() const = 0;
virtual const lm::ngram::State &NullContextState() const = 0;
};
template <class KenModel>
class KenOSM : public KenOSMBase
{
public:
KenOSM(const char *file, const lm::ngram::Config &config)
: m_kenlm(file, config) {}
float Score(const lm::ngram::State &in_state,
StringPiece word,
lm::ngram::State &out_state) const {
return m_kenlm.Score(in_state, m_kenlm.GetVocabulary().Index(word),
out_state);
}
const lm::ngram::State &BeginSentenceState() const {
return m_kenlm.BeginSentenceState();
}
const lm::ngram::State &NullContextState() const {
return m_kenlm.NullContextState();
}
private:
KenModel m_kenlm;
};
typedef KenOSMBase OSMLM;
OSMLM* ConstructOSMLM(const char *file, util::LoadMethod load_method);
} // namespace

View File

@ -0,0 +1,248 @@
#include <sstream>
#include "OpSequenceModel.h"
#include "osmHyp.h"
#include "lm/state.hh"
#include "../../PhraseBased/Manager.h"
#include "../../PhraseBased/Hypothesis.h"
#include "../../PhraseBased/TargetPhraseImpl.h"
#include "../../PhraseBased/Sentence.h"
#include "../../TranslationModel/UnknownWordPenalty.h"
#include "../../System.h"
using namespace std;
namespace Moses2
{
////////////////////////////////////////////////////////////////////////////////////////
OpSequenceModel::OpSequenceModel(size_t startInd, const std::string &line) :
StatefulFeatureFunction(startInd, line)
{
sFactor = 0;
tFactor = 0;
numFeatures = 5;
load_method = util::READ;
ReadParameters();
}
OpSequenceModel::~OpSequenceModel()
{
// TODO Auto-generated destructor stub
}
void OpSequenceModel::Load(System &system)
{
readLanguageModel(m_lmPath.c_str());
}
FFState* OpSequenceModel::BlankState(MemPool &pool, const System &sys) const
{
return new (pool.Allocate<osmState>()) osmState();
}
void OpSequenceModel::EmptyHypothesisState(FFState &state,
const ManagerBase &mgr, const InputType &input,
const Hypothesis &hypo) const
{
lm::ngram::State startState = OSM->BeginSentenceState();
osmState &stateCast = static_cast<osmState&>(state);
stateCast.setState(startState);
}
void OpSequenceModel::EvaluateInIsolation(MemPool &pool,
const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
osmHypothesis obj;
obj.setState(OSM->NullContextState());
Bitmap myBitmap (pool, source.GetSize());
myBitmap.Init(std::vector<bool>());
vector <string> mySourcePhrase;
vector <string> myTargetPhrase;
vector<float> scoresVec;
vector <int> alignments;
int startIndex = 0;
int endIndex = source.GetSize();
const AlignmentInfo &align = targetPhrase.GetAlignTerm();
AlignmentInfo::const_iterator iter;
for (iter = align.begin(); iter != align.end(); ++iter) {
alignments.push_back(iter->first);
alignments.push_back(iter->second);
}
for (size_t i = 0; i < targetPhrase.GetSize(); i++) {
if (&targetPhrase.pt == system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
myTargetPhrase.push_back("_TRANS_SLF_");
else
myTargetPhrase.push_back(targetPhrase[i][tFactor]->GetString().as_string());
}
for (size_t i = 0; i < source.GetSize(); i++) {
mySourcePhrase.push_back(source[i][sFactor]->GetString().as_string());
}
obj.setPhrases(mySourcePhrase , myTargetPhrase);
obj.constructCepts(alignments,startIndex,endIndex-1,targetPhrase.GetSize());
obj.computeOSMFeature(startIndex,myBitmap);
obj.calculateOSMProb(*OSM);
obj.populateScores(scoresVec,numFeatures);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
scoresVec.data());
estimatedScore += weightedScore;
}
void OpSequenceModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
UTIL_THROW2("Not implemented");
}
void OpSequenceModel::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
const TargetPhrase<Moses2::Word> &target = hypo.GetTargetPhrase();
const Bitmap &bitmap = hypo.GetBitmap();
Bitmap myBitmap(bitmap);
const ManagerBase &manager = hypo.GetManager();
const InputType &source = manager.GetInput();
const Sentence &sourceSentence = static_cast<const Sentence&>(source);
osmHypothesis obj;
vector <string> mySourcePhrase;
vector <string> myTargetPhrase;
vector<float> scoresVec;
//target.GetWord(0)
//cerr << target <<" --- "<<target.GetSourcePhrase()<< endl; // English ...
//cerr << align << endl; // Alignments ...
//cerr << cur_hypo.GetCurrSourceWordsRange() << endl;
//cerr << source <<endl;
// int a = sourceRange.GetStartPos();
// cerr << source.GetWord(a);
//cerr <<a<<endl;
//const Sentence &sentence = static_cast<const Sentence&>(curr_hypo.GetManager().GetSource());
const Range & sourceRange = hypo.GetInputPath().range;
int startIndex = sourceRange.GetStartPos();
int endIndex = sourceRange.GetEndPos();
const AlignmentInfo &align = hypo.GetTargetPhrase().GetAlignTerm();
// osmState * statePtr;
vector <int> alignments;
AlignmentInfo::const_iterator iter;
for (iter = align.begin(); iter != align.end(); ++iter) {
//cerr << iter->first << "----" << iter->second << " ";
alignments.push_back(iter->first);
alignments.push_back(iter->second);
}
//cerr<<bitmap<<endl;
//cerr<<startIndex<<" "<<endIndex<<endl;
for (int i = startIndex; i <= endIndex; i++) {
myBitmap.SetValue(i,0); // resetting coverage of this phrase ...
mySourcePhrase.push_back(sourceSentence[i][sFactor]->GetString().as_string());
// cerr<<mySourcePhrase[i]<<endl;
}
for (size_t i = 0; i < target.GetSize(); i++) {
if (&target.pt == mgr.system.featureFunctions.GetUnknownWordPenalty() && sFactor == 0 && tFactor == 0)
myTargetPhrase.push_back("_TRANS_SLF_");
else
myTargetPhrase.push_back(target[i][tFactor]->GetString().as_string());
}
//cerr<<myBitmap<<endl;
obj.setState(&prevState);
obj.constructCepts(alignments,startIndex,endIndex,target.GetSize());
obj.setPhrases(mySourcePhrase , myTargetPhrase);
obj.computeOSMFeature(startIndex,myBitmap);
obj.calculateOSMProb(*OSM);
obj.populateScores(scoresVec,numFeatures);
//obj.print();
scores.PlusEquals(mgr.system, *this, scoresVec);
osmState &stateCast = static_cast<osmState&>(state);
obj.saveState(stateCast);
}
void OpSequenceModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
void OpSequenceModel::SetParameter(const std::string& key, const std::string& value)
{
if (key == "path") {
m_lmPath = value;
} else if (key == "support-features") {
if(value == "no")
numFeatures = 1;
else
numFeatures = 5;
} else if (key == "input-factor") {
sFactor = Scan<int>(value);
} else if (key == "output-factor") {
tFactor = Scan<int>(value);
} else if (key == "load") {
if (value == "lazy") {
load_method = util::LAZY;
} else if (value == "populate_or_lazy") {
load_method = util::POPULATE_OR_LAZY;
} else if (value == "populate_or_read" || value == "populate") {
load_method = util::POPULATE_OR_READ;
} else if (value == "read") {
load_method = util::READ;
} else if (value == "parallel_read") {
load_method = util::PARALLEL_READ;
} else {
UTIL_THROW2("Unknown KenLM load method " << value);
}
} else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
void OpSequenceModel :: readLanguageModel(const char *lmFile)
{
string unkOp = "_TRANS_SLF_";
OSM = ConstructOSMLM(m_lmPath.c_str(), load_method);
lm::ngram::State startState = OSM->NullContextState();
lm::ngram::State endState;
unkOpProb = OSM->Score(startState,unkOp,endState);
}
}

View File

@ -0,0 +1,57 @@
#include "../StatefulFeatureFunction.h"
#include "util/mmap.hh"
#include "KenOSM.h"
namespace Moses2
{
class OpSequenceModel : public StatefulFeatureFunction
{
public:
OSMLM* OSM;
float unkOpProb;
int numFeatures; // Number of features used ...
int sFactor; // Source Factor ...
int tFactor; // Target Factor ...
util::LoadMethod load_method; // method to load model
OpSequenceModel(size_t startInd, const std::string &line);
virtual ~OpSequenceModel();
virtual void Load(System &system);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
void SetParameter(const std::string& key, const std::string& value);
protected:
std::string m_lmPath;
void readLanguageModel(const char *);
};
}

View File

@ -0,0 +1,601 @@
#include "osmHyp.h"
#include <sstream>
using namespace std;
using namespace lm::ngram;
namespace Moses2
{
void osmState::setState(const lm::ngram::State & val)
{
j = 0;
E = 0;
lmState = val;
}
void osmState::saveState(int jVal, int eVal, map <int , string> & gapVal)
{
gap.clear();
gap = gapVal;
j = jVal;
E = eVal;
}
size_t osmState::hash() const
{
size_t ret = j;
boost::hash_combine(ret, E);
boost::hash_combine(ret, gap);
boost::hash_combine(ret, lmState.length);
return ret;
}
bool osmState::operator==(const FFState& otherBase) const
{
const osmState &other = static_cast<const osmState&>(otherBase);
if (j != other.j)
return false;
if (E != other.E)
return false;
if (gap != other.gap)
return false;
if (lmState.length != other.lmState.length)
return false;
return true;
}
std::string osmState :: getName() const
{
return "done";
}
//////////////////////////////////////////////////
osmHypothesis :: osmHypothesis()
{
opProb = 0;
gapWidth = 0;
gapCount = 0;
openGapCount = 0;
deletionCount = 0;
gapCount = 0;
j = 0;
E = 0;
gap.clear();
}
void osmHypothesis :: setState(const FFState* prev_state)
{
if(prev_state != NULL) {
j = static_cast <const osmState *> (prev_state)->getJ();
E = static_cast <const osmState *> (prev_state)->getE();
gap = static_cast <const osmState *> (prev_state)->getGap();
lmState = static_cast <const osmState *> (prev_state)->getLMState();
}
}
void osmHypothesis :: saveState(osmState &state)
{
state.setState(lmState);
state.saveState(j,E,gap);
}
int osmHypothesis :: isTranslationOperation(int x)
{
if (operations[x].find("_JMP_BCK_") != -1)
return 0;
if (operations[x].find("_JMP_FWD_") != -1)
return 0;
if (operations[x].find("_CONT_CEPT_") != -1)
return 0;
if (operations[x].find("_INS_GAP_") != -1)
return 0;
return 1;
}
void osmHypothesis :: removeReorderingOperations()
{
gapCount = 0;
deletionCount = 0;
openGapCount = 0;
gapWidth = 0;
std::vector <std::string> tupleSequence;
for (int x = 0; x < operations.size(); x++) {
// cout<<operations[x]<<endl;
if(isTranslationOperation(x) == 1) {
tupleSequence.push_back(operations[x]);
}
}
operations.clear();
operations = tupleSequence;
}
void osmHypothesis :: calculateOSMProb(OSMLM& ptrOp)
{
opProb = 0;
State currState = lmState;
State temp;
for (size_t i = 0; i<operations.size(); i++) {
temp = currState;
opProb += ptrOp.Score(temp,operations[i],currState);
}
lmState = currState;
//print();
}
int osmHypothesis :: firstOpenGap(vector <int> & coverageVector)
{
int firstOG =-1;
for(int nd = 0; nd < coverageVector.size(); nd++) {
if(coverageVector[nd]==0) {
firstOG = nd;
return firstOG;
}
}
return firstOG;
}
string osmHypothesis :: intToString(int num)
{
return SPrint(num);
}
void osmHypothesis :: generateOperations(int & startIndex , int j1 , int contFlag , Bitmap & coverageVector , string english , string german , set <int> & targetNullWords , vector <string> & currF)
{
int gFlag = 0;
int gp = 0;
int ans;
if ( j < j1) { // j1 is the index of the source word we are about to generate ...
//if(coverageVector[j]==0) // if source word at j is not generated yet ...
if(coverageVector.GetValue(j)==0) { // if source word at j is not generated yet ...
operations.push_back("_INS_GAP_");
gFlag++;
gap[j]="Unfilled";
}
if (j == E) {
j = j1;
} else {
operations.push_back("_JMP_FWD_");
j=E;
}
}
if (j1 < j) {
// if(j < E && coverageVector[j]==0)
if(j < E && coverageVector.GetValue(j)==0) {
operations.push_back("_INS_GAP_");
gFlag++;
gap[j]="Unfilled";
}
j=closestGap(gap,j1,gp);
operations.push_back("_JMP_BCK_"+ intToString(gp));
//cout<<"I am j "<<j<<endl;
//cout<<"I am j1 "<<j1<<endl;
if(j==j1)
gap[j]="Filled";
}
if (j < j1) {
operations.push_back("_INS_GAP_");
gap[j] = "Unfilled";
gFlag++;
j=j1;
}
if(contFlag == 0) { // First words of the multi-word cept ...
if(english == "_TRANS_SLF_") { // Unknown word ...
operations.push_back("_TRANS_SLF_");
} else {
operations.push_back("_TRANS_" + english + "_TO_" + german);
}
//ans = firstOpenGap(coverageVector);
ans = coverageVector.GetFirstGapPos();
if (ans != -1)
gapWidth += j - ans;
} else if (contFlag == 2) {
operations.push_back("_INS_" + german);
ans = coverageVector.GetFirstGapPos();
if (ans != -1)
gapWidth += j - ans;
deletionCount++;
} else {
operations.push_back("_CONT_CEPT_");
}
//coverageVector[j]=1;
coverageVector.SetValue(j,1);
j+=1;
if(E<j)
E=j;
if (gFlag > 0)
gapCount++;
openGapCount += getOpenGaps();
//if (coverageVector[j] == 0 && targetNullWords.find(j) != targetNullWords.end())
if (j < coverageVector.GetSize()) {
if (coverageVector.GetValue(j) == 0 && targetNullWords.find(j) != targetNullWords.end()) {
j1 = j;
german = currF[j1-startIndex];
english = "_INS_";
generateOperations(startIndex, j1, 2 , coverageVector , english , german , targetNullWords , currF);
}
}
}
void osmHypothesis :: print()
{
for (int i = 0; i< operations.size(); i++) {
cerr<<operations[i]<<" ";
}
cerr<<endl<<endl;
cerr<<"Operation Probability "<<opProb<<endl;
cerr<<"Gap Count "<<gapCount<<endl;
cerr<<"Open Gap Count "<<openGapCount<<endl;
cerr<<"Gap Width "<<gapWidth<<endl;
cerr<<"Deletion Count "<<deletionCount<<endl;
cerr<<"_______________"<<endl;
}
int osmHypothesis :: closestGap(map <int,string> gap, int j1, int & gp)
{
int dist=1172;
int value=-1;
int temp=0;
gp=0;
int opGap=0;
map <int,string> :: iterator iter;
iter=gap.end();
do {
iter--;
//cout<<"Trapped "<<iter->first<<endl;
if(iter->first==j1 && iter->second== "Unfilled") {
opGap++;
gp = opGap;
return j1;
}
if(iter->second =="Unfilled") {
opGap++;
temp = iter->first - j1;
if(temp<0)
temp=temp * -1;
if(dist>temp && iter->first < j1) {
dist=temp;
value=iter->first;
gp=opGap;
}
}
} while(iter!=gap.begin());
return value;
}
int osmHypothesis :: getOpenGaps()
{
map <int,string> :: iterator iter;
int nd = 0;
for (iter = gap.begin(); iter!=gap.end(); iter++) {
if(iter->second == "Unfilled")
nd++;
}
return nd;
}
void osmHypothesis :: generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes)
{
operations.push_back("_DEL_" + english);
currTargetIndex++;
while(doneTargetIndexes.find(currTargetIndex) != doneTargetIndexes.end()) {
currTargetIndex++;
}
if (sourceNullWords.find(currTargetIndex) != sourceNullWords.end()) {
english = currE[currTargetIndex];
generateDeleteOperations(english,currTargetIndex,doneTargetIndexes);
}
}
void osmHypothesis :: computeOSMFeature(int startIndex , Bitmap & coverageVector)
{
set <int> doneTargetIndexes;
set <int> eSide;
set <int> fSide;
set <int> :: iterator iter;
string english;
string source;
int j1;
int targetIndex = 0;
doneTargetIndexes.clear();
if (targetNullWords.size() != 0) { // Source words to be deleted in the start of this phrase ...
iter = targetNullWords.begin();
if (*iter == startIndex) {
j1 = startIndex;
source = currF[j1-startIndex];
english = "_INS_";
generateOperations(startIndex, j1, 2 , coverageVector , english , source , targetNullWords , currF);
}
}
if (sourceNullWords.find(targetIndex) != sourceNullWords.end()) { // first word has to be deleted ...
english = currE[targetIndex];
generateDeleteOperations(english,targetIndex, doneTargetIndexes);
}
for (size_t i = 0; i < ceptsInPhrase.size(); i++) {
source = "";
english = "";
fSide = ceptsInPhrase[i].first;
eSide = ceptsInPhrase[i].second;
iter = eSide.begin();
targetIndex = *iter;
english += currE[*iter];
iter++;
for (; iter != eSide.end(); iter++) {
if(*iter == targetIndex+1)
targetIndex++;
else
doneTargetIndexes.insert(*iter);
english += "^_^";
english += currE[*iter];
}
iter = fSide.begin();
source += currF[*iter];
iter++;
for (; iter != fSide.end(); iter++) {
source += "^_^";
source += currF[*iter];
}
iter = fSide.begin();
j1 = *iter + startIndex;
iter++;
generateOperations(startIndex, j1, 0 , coverageVector , english , source , targetNullWords , currF);
for (; iter != fSide.end(); iter++) {
j1 = *iter + startIndex;
generateOperations(startIndex, j1, 1 , coverageVector , english , source , targetNullWords , currF);
}
targetIndex++; // Check whether the next target word is unaligned ...
while(doneTargetIndexes.find(targetIndex) != doneTargetIndexes.end()) {
targetIndex++;
}
if(sourceNullWords.find(targetIndex) != sourceNullWords.end()) {
english = currE[targetIndex];
generateDeleteOperations(english,targetIndex, doneTargetIndexes);
}
}
//removeReorderingOperations();
//print();
}
void osmHypothesis :: getMeCepts ( set <int> & eSide , set <int> & fSide , map <int , vector <int> > & tS , map <int , vector <int> > & sT)
{
set <int> :: iterator iter;
int sz = eSide.size();
vector <int> t;
for (iter = eSide.begin(); iter != eSide.end(); iter++) {
t = tS[*iter];
for (size_t i = 0; i < t.size(); i++) {
fSide.insert(t[i]);
}
}
for (iter = fSide.begin(); iter != fSide.end(); iter++) {
t = sT[*iter];
for (size_t i = 0 ; i<t.size(); i++) {
eSide.insert(t[i]);
}
}
if (eSide.size () > sz) {
getMeCepts(eSide,fSide,tS,sT);
}
}
void osmHypothesis :: constructCepts(vector <int> & align , int startIndex , int endIndex, int targetPhraseLength)
{
std::map <int , vector <int> > sT;
std::map <int , vector <int> > tS;
std::set <int> eSide;
std::set <int> fSide;
std::set <int> :: iterator iter;
std :: map <int , vector <int> > :: iterator iter2;
std :: pair < set <int> , set <int> > cept;
int src;
int tgt;
for (size_t i = 0; i < align.size(); i+=2) {
src = align[i];
tgt = align[i+1];
tS[tgt].push_back(src);
sT[src].push_back(tgt);
}
for (int i = startIndex; i<= endIndex; i++) { // What are unaligned source words in this phrase ...
if (sT.find(i-startIndex) == sT.end()) {
targetNullWords.insert(i);
}
}
for (int i = 0; i < targetPhraseLength; i++) { // What are unaligned target words in this phrase ...
if (tS.find(i) == tS.end()) {
sourceNullWords.insert(i);
}
}
while (tS.size() != 0 && sT.size() != 0) {
iter2 = tS.begin();
eSide.clear();
fSide.clear();
eSide.insert (iter2->first);
getMeCepts(eSide, fSide, tS , sT);
for (iter = eSide.begin(); iter != eSide.end(); iter++) {
iter2 = tS.find(*iter);
tS.erase(iter2);
}
for (iter = fSide.begin(); iter != fSide.end(); iter++) {
iter2 = sT.find(*iter);
sT.erase(iter2);
}
cept = make_pair (fSide , eSide);
ceptsInPhrase.push_back(cept);
}
/*
cerr<<"Extracted Cepts "<<endl;
for (int i = 0; i < ceptsInPhrase.size(); i++)
{
fSide = ceptsInPhrase[i].first;
eSide = ceptsInPhrase[i].second;
for (iter = eSide.begin(); iter != eSide.end(); iter++)
{
cerr<<*iter<<" ";
}
cerr<<"<---> ";
for (iter = fSide.begin(); iter != fSide.end(); iter++)
{
cerr<<*iter<<" ";
}
cerr<<endl;
}
cerr<<endl;
cerr<<"Unaligned Target Words"<<endl;
for (iter = sourceNullWords.begin(); iter != sourceNullWords.end(); iter++)
cerr<<*iter<<"<--->"<<endl;
cerr<<"Unaligned Source Words"<<endl;
for (iter = targetNullWords.begin(); iter != targetNullWords.end(); iter++)
cerr<<*iter<<"<--->"<<endl;
*/
}
void osmHypothesis :: populateScores(vector <float> & scores , const int numFeatures)
{
scores.clear();
scores.push_back(opProb);
if (numFeatures == 1)
return;
scores.push_back(gapWidth);
scores.push_back(gapCount);
scores.push_back(openGapCount);
scores.push_back(deletionCount);
}
} // namespace

View File

@ -0,0 +1,111 @@
#pragma once
# include <set>
# include <map>
# include <string>
# include <vector>
#include "KenOSM.h"
# include "../FFState.h"
# include "../../legacy/Bitmap.h"
namespace Moses2
{
class osmState : public FFState
{
public:
osmState()
{}
void setState(const lm::ngram::State & val);
virtual size_t hash() const;
virtual bool operator==(const FFState& other) const;
virtual std::string ToString() const
{ return "osmState"; }
void saveState(int jVal, int eVal, std::map <int , std::string> & gapVal);
int getJ()const {
return j;
}
int getE()const {
return E;
}
std::map <int , std::string> getGap() const {
return gap;
}
lm::ngram::State getLMState() const {
return lmState;
}
void print() const;
std::string getName() const;
protected:
int j, E;
std::map <int,std::string> gap;
lm::ngram::State lmState;
};
class osmHypothesis
{
private:
std::vector <std::string> operations; // List of operations required to generated this hyp ...
std::map <int,std::string> gap; // Maintains gap history ...
int j; // Position after the last source word generated ...
int E; // Position after the right most source word so far generated ...
lm::ngram::State lmState; // KenLM's Model State ...
int gapCount; // Number of gaps inserted ...
int deletionCount;
int openGapCount;
int gapWidth;
double opProb;
std::vector <std::string> currE;
std::vector <std::string> currF;
std::vector < std::pair < std::set <int> , std::set <int> > > ceptsInPhrase;
std::set <int> targetNullWords;
std::set <int> sourceNullWords;
int closestGap(std::map <int,std::string> gap,int j1, int & gp);
int firstOpenGap(std::vector <int> & coverageVector);
std::string intToString(int);
int getOpenGaps();
int isTranslationOperation(int j);
void removeReorderingOperations();
void getMeCepts ( std::set <int> & eSide , std::set <int> & fSide , std::map <int , std::vector <int> > & tS , std::map <int , std::vector <int> > & sT);
public:
osmHypothesis();
~osmHypothesis() {};
void generateOperations(int & startIndex, int j1 , int contFlag , Bitmap & coverageVector , std::string english , std::string german , std::set <int> & targetNullWords , std::vector <std::string> & currF);
void generateDeleteOperations(std::string english, int currTargetIndex, std::set <int> doneTargetIndexes);
void calculateOSMProb(OSMLM& ptrOp);
void computeOSMFeature(int startIndex , Bitmap & coverageVector);
void constructCepts(std::vector <int> & align , int startIndex , int endIndex, int targetPhraseLength);
void setPhrases(std::vector <std::string> & val1 , std::vector <std::string> & val2) {
currF = val1;
currE = val2;
}
void setState(const FFState* prev_state);
void saveState(osmState &state);
void print();
void populateScores(std::vector <float> & scores , const int numFeatures);
void setState(const lm::ngram::State & val) {
lmState = val;
}
};
} // namespace

View File

@ -0,0 +1,40 @@
/*
* SkeletonStatefulFF.cpp
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#include "PhrasePenalty.h"
#include "../Scores.h"
namespace Moses2
{
PhrasePenalty::PhrasePenalty(size_t startInd, const std::string &line) :
StatelessFeatureFunction(startInd, line)
{
ReadParameters();
}
PhrasePenalty::~PhrasePenalty()
{
// TODO Auto-generated destructor stub
}
void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
scores.PlusEquals(system, *this, 1);
}
void PhrasePenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
scores.PlusEquals(system, *this, 1);
}
}

View File

@ -0,0 +1,34 @@
/*
* SkeletonStatefulFF.h
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#pragma once
#include "StatelessFeatureFunction.h"
namespace Moses2
{
class PhrasePenalty: public StatelessFeatureFunction
{
public:
PhrasePenalty(size_t startInd, const std::string &line);
virtual ~PhrasePenalty();
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
};
}

View File

View File

@ -0,0 +1,42 @@
#pragma once
#include <sstream>
#include "FFState.h"
namespace Moses2
{
struct PointerState: public FFState
{
const void* lmstate;
explicit PointerState()
{
// uninitialised
}
PointerState(const void* lms)
{
lmstate = lms;
}
virtual size_t hash() const
{
return (size_t) lmstate;
}
virtual bool operator==(const FFState& other) const
{
const PointerState& o = static_cast<const PointerState&>(other);
return lmstate == o.lmstate;
}
virtual std::string ToString() const
{
std::stringstream sb;
sb << lmstate;
return sb.str();
}
};
}

View File

@ -0,0 +1,100 @@
/*
* SkeletonStatefulFF.cpp
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#include <sstream>
#include "SkeletonStatefulFF.h"
#include "../PhraseBased/Manager.h"
#include "../PhraseBased/Hypothesis.h"
using namespace std;
namespace Moses2
{
class SkeletonState: public FFState
{
public:
int targetLen;
SkeletonState()
{
// uninitialised
}
virtual size_t hash() const
{
return (size_t) targetLen;
}
virtual bool operator==(const FFState& o) const
{
const SkeletonState& other = static_cast<const SkeletonState&>(o);
return targetLen == other.targetLen;
}
virtual std::string ToString() const
{
stringstream sb;
sb << targetLen;
return sb.str();
}
};
////////////////////////////////////////////////////////////////////////////////////////
SkeletonStatefulFF::SkeletonStatefulFF(size_t startInd, const std::string &line) :
StatefulFeatureFunction(startInd, line)
{
ReadParameters();
}
SkeletonStatefulFF::~SkeletonStatefulFF()
{
// TODO Auto-generated destructor stub
}
FFState* SkeletonStatefulFF::BlankState(MemPool &pool, const System &sys) const
{
return new (pool.Allocate<SkeletonState>()) SkeletonState();
}
void SkeletonStatefulFF::EmptyHypothesisState(FFState &state,
const ManagerBase &mgr, const InputType &input,
const Hypothesis &hypo) const
{
SkeletonState &stateCast = static_cast<SkeletonState&>(state);
stateCast.targetLen = 0;
}
void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool,
const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void SkeletonStatefulFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void SkeletonStatefulFF::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
SkeletonState &stateCast = static_cast<SkeletonState&>(state);
stateCast.targetLen = hypo.GetTargetPhrase().GetSize();
}
void SkeletonStatefulFF::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
}

View File

@ -0,0 +1,48 @@
/*
* SkeletonStatefulFF.h
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#ifndef SKELETONSTATEFULFF_H_
#define SKELETONSTATEFULFF_H_
#include "StatefulFeatureFunction.h"
namespace Moses2
{
class SkeletonStatefulFF: public StatefulFeatureFunction
{
public:
SkeletonStatefulFF(size_t startInd, const std::string &line);
virtual ~SkeletonStatefulFF();
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
};
}
#endif /* SKELETONSTATEFULFF_H_ */

View File

@ -0,0 +1,40 @@
/*
* SkeletonStatefulFF.cpp
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#include "../Scores.h"
#include "SkeletonStatelessFF.h"
namespace Moses2
{
SkeletonStatelessFF::SkeletonStatelessFF(size_t startInd,
const std::string &line) :
StatelessFeatureFunction(startInd, line)
{
ReadParameters();
}
SkeletonStatelessFF::~SkeletonStatelessFF()
{
// TODO Auto-generated destructor stub
}
void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool,
const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void SkeletonStatelessFF::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
}

View File

@ -0,0 +1,34 @@
/*
* SkeletonStatefulFF.h
*
* Created on: 27 Oct 2015
* Author: hieu
*/
#pragma once
#include "StatelessFeatureFunction.h"
namespace Moses2
{
class SkeletonStatelessFF: public StatelessFeatureFunction
{
public:
SkeletonStatelessFF(size_t startInd, const std::string &line);
virtual ~SkeletonStatelessFF();
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
};
}

View File

@ -0,0 +1,67 @@
/*
* StatefulFeatureFunction.cpp
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <boost/foreach.hpp>
#include "StatefulFeatureFunction.h"
#include "../PhraseBased/Hypothesis.h"
using namespace std;
namespace Moses2
{
StatefulFeatureFunction::StatefulFeatureFunction(size_t startInd,
const std::string &line) :
FeatureFunction(startInd, line)
{
}
StatefulFeatureFunction::~StatefulFeatureFunction()
{
// TODO Auto-generated destructor stub
}
void StatefulFeatureFunction::EvaluateWhenAppliedBatch(
const System &system,
const Batch &batch) const
{
//cerr << "EvaluateWhenAppliedBatch:" << m_name << endl;
#ifdef __linux
/*
pthread_t handle;
handle = pthread_self();
int s;
cpu_set_t cpusetOrig, cpuset;
s = pthread_getaffinity_np(handle, sizeof(cpu_set_t), &cpusetOrig);
CPU_ZERO(&cpuset);
int core = handle % 8;
core += 24;
CPU_SET(core, &cpuset);
s = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpuset);
*/
#endif
for (size_t i = 0; i < batch.size(); ++i) {
Hypothesis *hypo = batch[i];
hypo->EvaluateWhenApplied(*this);
}
#ifdef __linux
// s = pthread_setaffinity_np(handle, sizeof(cpu_set_t), &cpusetOrig);
#endif
}
}

View File

@ -0,0 +1,68 @@
/*
* StatefulFeatureFunction.h
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#ifndef STATEFULFEATUREFUNCTION_H_
#define STATEFULFEATUREFUNCTION_H_
#include "FeatureFunction.h"
#include "FFState.h"
#include "../MemPool.h"
namespace Moses2
{
class Hypothesis;
class InputType;
namespace SCFG
{
class Hypothesis;
class Manager;
}
class StatefulFeatureFunction: public FeatureFunction
{
public:
StatefulFeatureFunction(size_t startInd, const std::string &line);
virtual ~StatefulFeatureFunction();
void SetStatefulInd(size_t ind)
{
m_statefulInd = ind;
}
size_t GetStatefulInd() const
{
return m_statefulInd;
}
//! return uninitialise state
virtual FFState* BlankState(MemPool &pool, const System &sys) const = 0;
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const = 0;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const = 0;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const = 0;
virtual void EvaluateWhenAppliedBatch(
const System &system,
const Batch &batch) const;
protected:
size_t m_statefulInd;
};
}
#endif /* STATEFULFEATUREFUNCTION_H_ */

View File

@ -0,0 +1,27 @@
/*
* StatelessFeatureFunction.cpp
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#include "StatelessFeatureFunction.h"
namespace Moses2
{
StatelessFeatureFunction::StatelessFeatureFunction(size_t startInd,
const std::string &line) :
FeatureFunction(startInd, line)
{
// TODO Auto-generated constructor stub
}
StatelessFeatureFunction::~StatelessFeatureFunction()
{
// TODO Auto-generated destructor stub
}
}

View File

@ -0,0 +1,25 @@
/*
* StatelessFeatureFunction.h
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#ifndef STATELESSFEATUREFUNCTION_H_
#define STATELESSFEATUREFUNCTION_H_
#include "FeatureFunction.h"
namespace Moses2
{
class StatelessFeatureFunction: public FeatureFunction
{
public:
StatelessFeatureFunction(size_t startInd, const std::string &line);
virtual ~StatelessFeatureFunction();
};
}
#endif /* STATELESSFEATUREFUNCTION_H_ */

View File

@ -0,0 +1,53 @@
/*
* WordPenalty.cpp
*
* Created on: 28 Oct 2015
* Author: hieu
*/
#include "WordPenalty.h"
#include "../TypeDef.h"
#include "../Scores.h"
#include "../Phrase.h"
#include "../TargetPhrase.h"
#include "../SCFG/Word.h"
#include "../PhraseBased/TargetPhraseImpl.h"
namespace Moses2
{
WordPenalty::WordPenalty(size_t startInd, const std::string &line) :
StatelessFeatureFunction(startInd, line)
{
ReadParameters();
}
WordPenalty::~WordPenalty()
{
// TODO Auto-generated destructor stub
}
void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
SCORE score = -(SCORE) targetPhrase.GetSize();
scores.PlusEquals(system, *this, score);
}
void WordPenalty::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
size_t count = 0;
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const SCFG::Word &word = targetPhrase[i];
if (!word.isNonTerminal) {
++count;
}
}
scores.PlusEquals(system, *this, -(SCORE) count);
}
}

View File

@ -0,0 +1,37 @@
/*
* WordPenalty.h
*
* Created on: 28 Oct 2015
* Author: hieu
*/
#ifndef WORDPENALTY_H_
#define WORDPENALTY_H_
#include "StatelessFeatureFunction.h"
namespace Moses2
{
class WordPenalty: public StatelessFeatureFunction
{
public:
WordPenalty(size_t startInd, const std::string &line);
virtual ~WordPenalty();
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
};
}
#endif /* WORDPENALTY_H_ */

View File

@ -0,0 +1,81 @@
/*
* Hypothesis.cpp
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include <stdlib.h>
#include <deque>
#include "HypothesisBase.h"
#include "System.h"
#include "Scores.h"
#include "ManagerBase.h"
#include "MemPool.h"
#include "FF/StatefulFeatureFunction.h"
using namespace std;
namespace Moses2
{
//size_t g_numHypos = 0;
HypothesisBase::HypothesisBase(MemPool &pool, const System &system)
{
m_scores = new (pool.Allocate<Scores>()) Scores(system, pool,
system.featureFunctions.GetNumScores());
// FF states
const std::vector<const StatefulFeatureFunction*> &sfffs =
system.featureFunctions.GetStatefulFeatureFunctions();
size_t numStatefulFFs = sfffs.size();
m_ffStates = (FFState **) pool.Allocate(sizeof(FFState*) * numStatefulFFs);
BOOST_FOREACH(const StatefulFeatureFunction *sfff, sfffs){
size_t statefulInd = sfff->GetStatefulInd();
FFState *state = sfff->BlankState(pool, system);
m_ffStates[statefulInd] = state;
}
}
size_t HypothesisBase::hash() const
{
return hash(0);
}
size_t HypothesisBase::hash(size_t seed) const
{
size_t numStatefulFFs =
GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
// states
for (size_t i = 0; i < numStatefulFFs; ++i) {
const FFState *state = m_ffStates[i];
size_t hash = state->hash();
boost::hash_combine(seed, hash);
}
return seed;
}
bool HypothesisBase::operator==(const HypothesisBase &other) const
{
size_t numStatefulFFs =
GetManager().system.featureFunctions.GetStatefulFeatureFunctions().size();
// states
for (size_t i = 0; i < numStatefulFFs; ++i) {
const FFState &thisState = *m_ffStates[i];
const FFState &otherState = *other.m_ffStates[i];
if (thisState != otherState) {
return false;
}
}
return true;
}
}

View File

@ -0,0 +1,74 @@
/*
* Hypothesis.h
*
* Created on: 24 Oct 2015
* Author: hieu
*/
#pragma once
#include <iostream>
#include <cstddef>
#include "FF/FFState.h"
#include "Scores.h"
namespace Moses2
{
class ManagerBase;
class Scores;
class HypothesisBase
{
public:
virtual ~HypothesisBase()
{
}
inline ManagerBase &GetManager() const
{
return *m_mgr;
}
template<typename T>
const T &Cast() const
{ return static_cast<const T&>(*this); }
const Scores &GetScores() const
{ return *m_scores; }
Scores &GetScores()
{ return *m_scores; }
const FFState *GetState(size_t ind) const
{ return m_ffStates[ind]; }
FFState *GetState(size_t ind)
{ return m_ffStates[ind]; }
virtual size_t hash() const;
virtual size_t hash(size_t seed) const;
virtual bool operator==(const HypothesisBase &other) const;
virtual SCORE GetFutureScore() const = 0;
virtual void EvaluateWhenApplied() = 0;
virtual std::string Debug(const System &system) const = 0;
protected:
ManagerBase *m_mgr;
Scores *m_scores;
FFState **m_ffStates;
HypothesisBase(MemPool &pool, const System &system);
};
////////////////////////////////////////////////////////////////////////////////////
class HypothesisFutureScoreOrderer
{
public:
bool operator()(const HypothesisBase* a, const HypothesisBase* b) const
{
return a->GetFutureScore() > b->GetFutureScore();
}
};
}

View File

@ -0,0 +1,189 @@
/*
* HypothesisColl.cpp
*
* Created on: 26 Feb 2016
* Author: hieu
*/
#include <iostream>
#include <sstream>
#include <algorithm>
#include <boost/foreach.hpp>
#include "HypothesisColl.h"
#include "ManagerBase.h"
#include "System.h"
#include "MemPoolAllocator.h"
using namespace std;
namespace Moses2
{
HypothesisColl::HypothesisColl(const ManagerBase &mgr) :
m_coll(MemPoolAllocator<const HypothesisBase*>(mgr.GetPool())), m_sortedHypos(
NULL)
{
}
const HypothesisBase *HypothesisColl::GetBestHypo() const
{
if (GetSize() == 0) {
return NULL;
}
if (m_sortedHypos) {
return (*m_sortedHypos)[0];
}
SCORE bestScore = -std::numeric_limits<SCORE>::infinity();
const HypothesisBase *bestHypo;
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
if (hypo->GetFutureScore() > bestScore) {
bestScore = hypo->GetFutureScore();
bestHypo = hypo;
}
}
return bestHypo;
}
void HypothesisColl::Add(
const System &system,
HypothesisBase *hypo,
Recycler<HypothesisBase*> &hypoRecycle,
ArcLists &arcLists)
{
StackAdd added = Add(hypo);
size_t nbestSize = system.options.nbest.nbest_size;
if (nbestSize) {
arcLists.AddArc(added.added, hypo, added.other);
}
else {
if (!added.added) {
hypoRecycle.Recycle(hypo);
}
else if (added.other) {
hypoRecycle.Recycle(added.other);
}
}
}
StackAdd HypothesisColl::Add(const HypothesisBase *hypo)
{
std::pair<_HCType::iterator, bool> addRet = m_coll.insert(hypo);
// CHECK RECOMBINATION
if (addRet.second) {
// equiv hypo doesn't exists
return StackAdd(true, NULL);
}
else {
HypothesisBase *hypoExisting = const_cast<HypothesisBase*>(*addRet.first);
if (hypo->GetFutureScore() > hypoExisting->GetFutureScore()) {
// incoming hypo is better than the one we have
const HypothesisBase * const &hypoExisting1 = *addRet.first;
const HypothesisBase *&hypoExisting2 =
const_cast<const HypothesisBase *&>(hypoExisting1);
hypoExisting2 = hypo;
return StackAdd(true, hypoExisting);
}
else {
// already storing the best hypo. discard incoming hypo
return StackAdd(false, hypoExisting);
}
}
assert(false);
}
const Hypotheses &HypothesisColl::GetSortedAndPruneHypos(
const ManagerBase &mgr,
ArcLists &arcLists) const
{
if (m_sortedHypos == NULL) {
// create sortedHypos first
MemPool &pool = mgr.GetPool();
m_sortedHypos = new (pool.Allocate<Hypotheses>()) Hypotheses(pool,
m_coll.size());
size_t ind = 0;
BOOST_FOREACH(const HypothesisBase *hypo, m_coll){
(*m_sortedHypos)[ind] = hypo;
++ind;
}
SortAndPruneHypos(mgr, arcLists);
}
return *m_sortedHypos;
}
const Hypotheses &HypothesisColl::GetSortedAndPrunedHypos() const
{
UTIL_THROW_IF2(m_sortedHypos == NULL, "m_sortedHypos must be sorted beforehand");
return *m_sortedHypos;
}
void HypothesisColl::SortAndPruneHypos(const ManagerBase &mgr,
ArcLists &arcLists) const
{
size_t stackSize = mgr.system.options.search.stack_size;
Recycler<HypothesisBase*> &recycler = mgr.GetHypoRecycle();
/*
cerr << "UNSORTED hypos: ";
BOOST_FOREACH(const HypothesisBase *hypo, m_coll) {
cerr << hypo << "(" << hypo->GetFutureScore() << ")" << " ";
}
cerr << endl;
*/
Hypotheses::iterator iterMiddle;
iterMiddle =
(stackSize == 0 || m_sortedHypos->size() < stackSize) ?
m_sortedHypos->end() : m_sortedHypos->begin() + stackSize;
std::partial_sort(m_sortedHypos->begin(), iterMiddle, m_sortedHypos->end(),
HypothesisFutureScoreOrderer());
// prune
if (stackSize && m_sortedHypos->size() > stackSize) {
for (size_t i = stackSize; i < m_sortedHypos->size(); ++i) {
HypothesisBase *hypo = const_cast<HypothesisBase*>((*m_sortedHypos)[i]);
recycler.Recycle(hypo);
// delete from arclist
if (mgr.system.options.nbest.nbest_size) {
arcLists.Delete(hypo);
}
}
m_sortedHypos->resize(stackSize);
}
/*
cerr << "sorted hypos: ";
for (size_t i = 0; i < m_sortedHypos->size(); ++i) {
const HypothesisBase *hypo = (*m_sortedHypos)[i];
cerr << hypo << " ";
}
cerr << endl;
*/
}
void HypothesisColl::Clear()
{
m_sortedHypos = NULL;
m_coll.clear();
}
std::string HypothesisColl::Debug(const System &system) const
{
stringstream out;
BOOST_FOREACH (const HypothesisBase *hypo, m_coll) {
out << hypo->Debug(system);
out << std::endl << std::endl;
}
return out.str();
}
} /* namespace Moses2 */

View File

@ -0,0 +1,69 @@
/*
* HypothesisColl.h
*
* Created on: 26 Feb 2016
* Author: hieu
*/
#pragma once
#include <boost/unordered_set.hpp>
#include "HypothesisBase.h"
#include "MemPoolAllocator.h"
#include "Recycler.h"
#include "Array.h"
#include "legacy/Util2.h"
namespace Moses2
{
class ManagerBase;
class ArcLists;
typedef Array<const HypothesisBase*> Hypotheses;
class HypothesisColl
{
public:
HypothesisColl(const ManagerBase &mgr);
void Add(const System &system,
HypothesisBase *hypo,
Recycler<HypothesisBase*> &hypoRecycle,
ArcLists &arcLists);
size_t GetSize() const
{ return m_coll.size(); }
void Clear();
const Hypotheses &GetSortedAndPruneHypos(
const ManagerBase &mgr,
ArcLists &arcLists) const;
const Hypotheses &GetSortedAndPrunedHypos() const;
const HypothesisBase *GetBestHypo() const;
template<typename T>
const T *GetBestHypo() const
{
const HypothesisBase *hypo = GetBestHypo();
return hypo ? &hypo->Cast<T>() : NULL;
}
std::string Debug(const System &system) const;
protected:
typedef boost::unordered_set<const HypothesisBase*,
UnorderedComparer<HypothesisBase>, UnorderedComparer<HypothesisBase>,
MemPoolAllocator<const HypothesisBase*> > _HCType;
_HCType m_coll;
mutable Hypotheses *m_sortedHypos;
StackAdd Add(const HypothesisBase *hypo);
void SortAndPruneHypos(const ManagerBase &mgr, ArcLists &arcLists) const;
};
} /* namespace Moses2 */

View File

@ -0,0 +1,21 @@
/*
* InputPath.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include "InputPathBase.h"
#include "TranslationModel/PhraseTable.h"
namespace Moses2
{
InputPathBase::InputPathBase(MemPool &pool,
const Range &range, size_t numPt, const InputPathBase *prefixPath) :
range(range), prefixPath(prefixPath)
{
}
}

View File

@ -0,0 +1,32 @@
/*
* InputPath.h
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#pragma once
#include <iostream>
#include <vector>
#include "SubPhrase.h"
#include "legacy/Range.h"
namespace Moses2
{
class PhraseTable;
class InputPathBase
{
public:
const InputPathBase *prefixPath;
Range range;
InputPathBase(MemPool &pool, const Range &range,
size_t numPt, const InputPathBase *prefixPath);
};
}

View File

@ -0,0 +1,20 @@
/*
* InputPaths.cpp
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#include <iostream>
#include "InputPathsBase.h"
using namespace std;
namespace Moses2
{
InputPathsBase::~InputPathsBase()
{
}
}

View File

@ -0,0 +1,59 @@
/*
* InputPaths.h
*
* Created on: 23 Oct 2015
* Author: hieu
*/
#pragma once
#include <vector>
#include "MemPool.h"
namespace Moses2
{
class InputType;
class System;
class ManagerBase;
class InputPathBase;
class InputPathsBase
{
typedef std::vector<InputPathBase*> Coll;
public:
InputPathsBase()
{
}
virtual ~InputPathsBase();
//! iterators
typedef Coll::iterator iterator;
typedef Coll::const_iterator const_iterator;
const_iterator begin() const
{
return m_inputPaths.begin();
}
const_iterator end() const
{
return m_inputPaths.end();
}
iterator begin()
{
return m_inputPaths.begin();
}
iterator end()
{
return m_inputPaths.end();
}
virtual void Init(const InputType &input, const ManagerBase &mgr) = 0;
protected:
Coll m_inputPaths;
};
}

View File

@ -0,0 +1,92 @@
/*
* InputType.cpp
*
* Created on: 14 Dec 2015
* Author: hieu
*/
#include "InputType.h"
#include "System.h"
namespace Moses2
{
//////////////////////////////////////////////////////////////////////////////
InputType::XMLOption::XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos)
:startPos(vStartPos)
,prob(0)
,m_entity(NULL)
{
m_nodeName = pool.Allocate<char>(nodeName.size() + 1);
strcpy(m_nodeName, nodeName.c_str());
}
void InputType::XMLOption::SetTranslation(MemPool &pool, const std::string &val)
{
m_translation = pool.Allocate<char>(val.size() + 1);
strcpy(m_translation, val.c_str());
}
void InputType::XMLOption::SetEntity(MemPool &pool, const std::string &val)
{
m_entity = pool.Allocate<char>(val.size() + 1);
strcpy(m_entity, val.c_str());
}
std::string InputType::XMLOption::Debug(const System &system) const
{
std::stringstream out;
out << "[" << startPos << "," << phraseSize << "]="
<< m_nodeName << ","
<< m_translation << ","
<< prob;
if (m_entity) {
out << "," << m_entity;
}
return out.str();
}
//////////////////////////////////////////////////////////////////////////////
InputType::InputType(MemPool &pool)
:m_reorderingConstraint(pool)
,m_xmlOptions(pool)
,m_xmlCoverageMap(pool)
{
}
InputType::~InputType()
{
// TODO Auto-generated destructor stub
}
void InputType::Init(const System &system, size_t size, int max_distortion)
{
m_reorderingConstraint.InitializeWalls(size, max_distortion);
if (system.options.input.xml_policy != XmlPassThrough) {
m_xmlCoverageMap.assign(size, false);
}
}
void InputType::AddXMLOption(const System &system, const XMLOption *xmlOption)
{
m_xmlOptions.push_back(xmlOption);
if (system.options.input.xml_policy != XmlPassThrough) {
for(size_t j = xmlOption->startPos; j < xmlOption->startPos + xmlOption->phraseSize; ++j) {
m_xmlCoverageMap[j]=true;
}
}
}
bool InputType::XmlOverlap(size_t startPos, size_t endPos) const
{
for (size_t pos = startPos; pos <= endPos ; pos++) {
if (pos < m_xmlCoverageMap.size() && m_xmlCoverageMap[pos]) {
return true;
}
}
return false;
}
} /* namespace Moses2 */

View File

@ -0,0 +1,78 @@
/*
* InputType.h
*
* Created on: 14 Dec 2015
* Author: hieu
*/
#pragma once
#include "PhraseBased/ReorderingConstraint.h"
#include "TypeDef.h"
namespace Moses2
{
class InputType
{
public:
//////////////////////////////////////////////////////////////////////////////
class XMLOption
{
public:
size_t startPos, phraseSize;
SCORE prob;
XMLOption(MemPool &pool, const std::string &nodeName, size_t vStartPos);
const char *GetNodeName() const
{ return m_nodeName; }
const char *GetTranslation() const
{ return m_translation; }
const char *GetEntity() const
{ return m_entity; }
void SetTranslation(MemPool &pool, const std::string &val);
void SetEntity(MemPool &pool, const std::string &val);
std::string Debug(const System &system) const;
public:
char *m_nodeName;
char *m_translation;
char *m_entity;
};
//////////////////////////////////////////////////////////////////////////////
InputType(MemPool &pool);
virtual ~InputType();
virtual void Init(const System &system, size_t size, int max_distortion);
ReorderingConstraint &GetReorderingConstraint()
{ return m_reorderingConstraint; }
const ReorderingConstraint &GetReorderingConstraint() const
{ return m_reorderingConstraint; }
const Vector<const XMLOption*> &GetXMLOptions() const
{ return m_xmlOptions; }
void AddXMLOption(const System &system, const XMLOption *xmlOption);
//! Returns true if there were any XML tags parsed that at least partially covered the range passed
bool XmlOverlap(size_t startPos, size_t endPos) const;
protected:
ReorderingConstraint m_reorderingConstraint; /**< limits on reordering specified either by "-mp" switch or xml tags */
Vector<const XMLOption*> m_xmlOptions;
Vector<bool> m_xmlCoverageMap;
};
} /* namespace Moses2 */

185
contrib/moses2/Jamfile Normal file
View File

@ -0,0 +1,185 @@
alias deps : ../..//z ../..//boost_iostreams ../..//boost_filesystem ../../moses/TranslationModel/CompactPT//cmph ../../moses//moses ;
lib moses2_lib :
AlignmentInfo.cpp
AlignmentInfoCollection.cpp
ArcLists.cpp
EstimatedScores.cpp
HypothesisBase.cpp
HypothesisColl.cpp
InputPathBase.cpp
InputPathsBase.cpp
InputType.cpp
ManagerBase.cpp
MemPool.cpp
Phrase.cpp
pugixml.cpp
Scores.cpp
SubPhrase.cpp
System.cpp
TargetPhrase.cpp
TranslationTask.cpp
TrellisPaths.cpp
TypeDef.cpp
Vector.cpp
Weights.cpp
Word.cpp
FF/Distortion.cpp
FF/FeatureFunction.cpp
FF/FeatureFunctions.cpp
FF/FeatureRegistry.cpp
FF/PhrasePenalty.cpp
FF/SkeletonStatefulFF.cpp
FF/SkeletonStatelessFF.cpp
FF/StatefulFeatureFunction.cpp
FF/StatelessFeatureFunction.cpp
FF/WordPenalty.cpp
FF/LexicalReordering/BidirectionalReorderingState.cpp
FF/LexicalReordering/HReorderingBackwardState.cpp
FF/LexicalReordering/HReorderingForwardState.cpp
FF/LexicalReordering/LexicalReordering.cpp
FF/LexicalReordering/LRModel.cpp
FF/LexicalReordering/LRState.cpp
FF/LexicalReordering/PhraseBasedReorderingState.cpp
FF/LexicalReordering/ReorderingStack.cpp
FF/OSM/OpSequenceModel.cpp
FF/OSM/KenOSM.cpp
FF/OSM/osmHyp.cpp
# LM/LanguageModelDALM.cpp
LM/LanguageModel.cpp
LM/KENLM.cpp
LM/KENLMBatch.cpp
LM/GPULM.cpp
TranslationModel/PhraseTable.cpp
TranslationModel/Transliteration.cpp
TranslationModel/UnknownWordPenalty.cpp
TranslationModel/Memory/PhraseTableMemory.cpp
TranslationModel/CompactPT/PhraseTableCompact.cpp
TranslationModel/CompactPT/BlockHashIndex.cpp
TranslationModel/CompactPT/CmphStringVectorAdapter.cpp
TranslationModel/CompactPT/LexicalReorderingTableCompact.cpp
TranslationModel/CompactPT/MurmurHash3.cpp
TranslationModel/CompactPT/PhraseDecoder.cpp
TranslationModel/CompactPT/TargetPhraseCollectionCache.cpp
TranslationModel/CompactPT/ThrowingFwrite.cpp
TranslationModel/ProbingPT/ProbingPT.cpp
TranslationModel/ProbingPT/hash.cpp
TranslationModel/ProbingPT/line_splitter.cpp
TranslationModel/ProbingPT/probing_hash_utils.cpp
TranslationModel/ProbingPT/querying.cpp
TranslationModel/ProbingPT/storing.cpp
TranslationModel/ProbingPT/StoreVocab.cpp
TranslationModel/ProbingPT/StoreTarget.cpp
TranslationModel/ProbingPT/vocabid.cpp
parameters/AllOptions.cpp
parameters/BookkeepingOptions.cpp
parameters/ContextParameters.cpp
parameters/CubePruningOptions.cpp
parameters/InputOptions.cpp
parameters/LMBR_Options.cpp
parameters/MBR_Options.cpp
parameters/NBestOptions.cpp
parameters/OOVHandlingOptions.cpp
parameters/OptionsBaseClass.cpp
parameters/ReorderingOptions.cpp
parameters/ReportingOptions.cpp
parameters/SearchOptions.cpp
parameters/ServerOptions.cpp
parameters/SyntaxOptions.cpp
PhraseBased/Hypothesis.cpp
PhraseBased/InputPath.cpp
PhraseBased/InputPaths.cpp
PhraseBased/Manager.cpp
PhraseBased/PhraseImpl.cpp
PhraseBased/ReorderingConstraint.cpp
PhraseBased/TargetPhrases.cpp
PhraseBased/Search.cpp
PhraseBased/Sentence.cpp
PhraseBased/TargetPhraseImpl.cpp
PhraseBased/TrellisPath.cpp
PhraseBased/Normal/Search.cpp
PhraseBased/Normal/Stack.cpp
PhraseBased/Normal/Stacks.cpp
PhraseBased/CubePruningMiniStack/Misc.cpp
PhraseBased/CubePruningMiniStack/Search.cpp
PhraseBased/CubePruningMiniStack/Stack.cpp
PhraseBased/Batch/Search.cpp
PhraseBased/Batch/Stack.cpp
PhraseBased/Batch/Stacks.cpp
# PhraseBased/CubePruningCardinalStack/Misc.cpp
# PhraseBased/CubePruningCardinalStack/Search.cpp
# PhraseBased/CubePruningCardinalStack/Stack.cpp
# PhraseBased/CubePruningBitmapStack/Misc.cpp
# PhraseBased/CubePruningBitmapStack/Search.cpp
# PhraseBased/CubePruningBitmapStack/Stack.cpp
# PhraseBased/CubePruningPerBitmap/Misc.cpp
# PhraseBased/CubePruningPerBitmap/Search.cpp
# PhraseBased/CubePruningPerBitmap/Stacks.cpp
# PhraseBased/CubePruningPerMiniStack/Misc.cpp
# PhraseBased/CubePruningPerMiniStack/Search.cpp
# PhraseBased/CubePruningPerMiniStack/Stacks.cpp
legacy/Bitmap.cpp
legacy/Bitmaps.cpp
legacy/Factor.cpp
legacy/FactorCollection.cpp
legacy/InputFileStream.cpp
legacy/Matrix.cpp
legacy/OutputFileStream.cpp
legacy/Parameter.cpp
legacy/Range.cpp
legacy/Range.cpp
legacy/ThreadPool.cpp
legacy/Timer.cpp
legacy/Util2.cpp
SCFG/ActiveChart.cpp
SCFG/Hypothesis.cpp
SCFG/InputPath.cpp
SCFG/InputPaths.cpp
SCFG/Manager.cpp
SCFG/Misc.cpp
SCFG/PhraseImpl.cpp
SCFG/Sentence.cpp
SCFG/Stack.cpp
SCFG/Stacks.cpp
SCFG/TargetPhraseImpl.cpp
SCFG/TargetPhrases.cpp
SCFG/Word.cpp
SCFG/nbest/KBestExtractor.cpp
SCFG/nbest/NBest.cpp
SCFG/nbest/NBests.cpp
SCFG/nbest/NBestColl.cpp
server/Server.cpp
server/Translator.cpp
server/TranslationRequest.cpp
deps ;
exe moses2 : Main.cpp moses2_lib ;
if [ xmlrpc ] {
echo "Building Moses2" ;
alias programs : moses2 ;
}
else {
echo "Not building Moses2" ;
alias programs : ;
}

249
contrib/moses2/LM/GPULM.cpp Normal file
View File

@ -0,0 +1,249 @@
/*
* GPULM.cpp
*
* Created on: 4 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include <sstream>
#include <vector>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include "GPULM.h"
#include "../Phrase.h"
#include "../Scores.h"
#include "../System.h"
#include "../PhraseBased/Hypothesis.h"
#include "../PhraseBased/Manager.h"
#include "../PhraseBased/TargetPhraseImpl.h"
#include "util/exception.hh"
#include "../legacy/FactorCollection.h"
using namespace std;
namespace Moses2
{
struct GPULMState: public FFState
{
virtual std::string ToString() const
{
return "GPULMState";
}
virtual size_t hash() const
{
return boost::hash_value(lastWords);
}
virtual bool operator==(const FFState& other) const
{
const GPULMState &otherCast = static_cast<const GPULMState&>(other);
bool ret = lastWords == otherCast.lastWords;
return ret;
}
void SetContext(const Context &context)
{
lastWords = context;
if (lastWords.size()) {
lastWords.resize(lastWords.size() - 1);
}
}
Context lastWords;
};
/////////////////////////////////////////////////////////////////
GPULM::GPULM(size_t startInd, const std::string &line)
:StatefulFeatureFunction(startInd, line)
{
cerr << "GPULM::GPULM" << endl;
ReadParameters();
}
GPULM::~GPULM()
{
// TODO Auto-generated destructor stub
}
void GPULM::Load(System &system)
{
cerr << "GPULM::Load" << endl;
FactorCollection &fc = system.GetVocab();
m_bos = fc.AddFactor(BOS_, system, false);
m_eos = fc.AddFactor(EOS_, system, false);
FactorCollection &collection = system.GetVocab();
}
FFState* GPULM::BlankState(MemPool &pool, const System &sys) const
{
GPULMState *ret = new (pool.Allocate<GPULMState>()) GPULMState();
return ret;
}
//! return the state associated with the empty hypothesis for a given sentence
void GPULM::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const
{
GPULMState &stateCast = static_cast<GPULMState&>(state);
stateCast.lastWords.push_back(m_bos);
}
void GPULM::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
if (targetPhrase.GetSize() == 0) {
return;
}
SCORE score = 0;
SCORE nonFullScore = 0;
Context context;
// context.push_back(m_bos);
context.reserve(m_order);
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Factor *factor = targetPhrase[i][m_factorType];
ShiftOrPush(context, factor);
if (context.size() == m_order) {
//std::pair<SCORE, void*> fromScoring = Score(context);
//score += fromScoring.first;
}
else {
//std::pair<SCORE, void*> fromScoring = Score(context);
//nonFullScore += fromScoring.first;
}
}
}
void GPULM::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
UTIL_THROW2("Not implemented");
}
void GPULM::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
void GPULM::SetParameter(const std::string& key,
const std::string& value)
{
//cerr << "key=" << key << " " << value << endl;
if (key == "path") {
m_path = value;
}
else if (key == "order") {
m_order = Scan<size_t>(value);
}
else if (key == "factor") {
m_factorType = Scan<FactorType>(value);
}
else {
StatefulFeatureFunction::SetParameter(key, value);
}
//cerr << "SetParameter done" << endl;
}
void GPULM::EvaluateWhenAppliedBatch(
const System &system,
const Batch &batch) const
{
// create list of ngrams
std::vector<std::pair<Hypothesis*, Context> > contexts;
for (size_t i = 0; i < batch.size(); ++i) {
Hypothesis *hypo = batch[i];
CreateNGram(contexts, *hypo);
}
// score ngrams
for (size_t i = 0; i < contexts.size(); ++i) {
const Context &context = contexts[i].second;
Hypothesis *hypo = contexts[i].first;
SCORE score = Score(context);
Scores &scores = hypo->GetScores();
scores.PlusEquals(system, *this, score);
}
}
void GPULM::CreateNGram(std::vector<std::pair<Hypothesis*, Context> > &contexts, Hypothesis &hypo) const
{
const TargetPhrase<Moses2::Word> &tp = hypo.GetTargetPhrase();
if (tp.GetSize() == 0) {
return;
}
const Hypothesis *prevHypo = hypo.GetPrevHypo();
assert(prevHypo);
const FFState *prevState = prevHypo->GetState(GetStatefulInd());
assert(prevState);
const GPULMState &prevStateCast = static_cast<const GPULMState&>(*prevState);
Context context = prevStateCast.lastWords;
context.reserve(m_order);
for (size_t i = 0; i < tp.GetSize(); ++i) {
const Word &word = tp[i];
const Factor *factor = word[m_factorType];
ShiftOrPush(context, factor);
std::pair<Hypothesis*, Context> ele(&hypo, context);
contexts.push_back(ele);
}
FFState *state = hypo.GetState(GetStatefulInd());
GPULMState &stateCast = static_cast<GPULMState&>(*state);
stateCast.SetContext(context);
}
void GPULM::ShiftOrPush(std::vector<const Factor*> &context,
const Factor *factor) const
{
if (context.size() < m_order) {
context.resize(context.size() + 1);
}
assert(context.size());
for (size_t i = context.size() - 1; i > 0; --i) {
context[i] = context[i - 1];
}
context[0] = factor;
}
SCORE GPULM::Score(const Context &context) const
{
return 444;
}
void GPULM::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
}

91
contrib/moses2/LM/GPULM.h Normal file
View File

@ -0,0 +1,91 @@
/*
* KENLM.h
*
* Created on: 4 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/shared_ptr.hpp>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include <pthread.h>
#include "../FF/StatefulFeatureFunction.h"
#include "lm/model.hh"
#include "../legacy/Factor.h"
#include "../legacy/Util2.h"
#include "../Word.h"
#include "../TypeDef.h"
namespace Moses2
{
class Word;
class GPULM: public StatefulFeatureFunction
{
public:
GPULM(size_t startInd, const std::string &line);
virtual ~GPULM();
virtual void Load(System &system);
void SetParameter(const std::string& key,
const std::string& value);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenAppliedBatch(
const System &system,
const Batch &batch) const;
protected:
std::string m_path;
FactorType m_factorType;
util::LoadMethod m_load_method;
const Factor *m_bos;
const Factor *m_eos;
size_t m_order;
inline lm::WordIndex TranslateID(const Word &word) const
{
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
std::vector<lm::WordIndex> m_lmIdLookup;
// batch
void CreateNGram(std::vector<std::pair<Hypothesis*, Context> > &contexts, Hypothesis &hypo) const;
void ShiftOrPush(std::vector<const Factor*> &context,
const Factor *factor) const;
SCORE Score(const Context &context) const;
};
}

601
contrib/moses2/LM/KENLM.cpp Normal file
View File

@ -0,0 +1,601 @@
/*
* KENLM.cpp
*
* Created on: 4 Nov 2015
* Author: hieu
*/
#include <sstream>
#include <vector>
#include "KENLM.h"
#include "../Phrase.h"
#include "../Scores.h"
#include "../System.h"
#include "../PhraseBased/Hypothesis.h"
#include "../PhraseBased/Manager.h"
#include "../PhraseBased/TargetPhraseImpl.h"
#include "lm/state.hh"
#include "lm/left.hh"
#include "util/exception.hh"
#include "util/tokenize_piece.hh"
#include "util/string_stream.hh"
#include "../legacy/FactorCollection.h"
#include "../SCFG/TargetPhraseImpl.h"
#include "../SCFG/Hypothesis.h"
#include "../SCFG/Manager.h"
using namespace std;
namespace Moses2
{
struct KenLMState: public FFState
{
lm::ngram::State state;
virtual size_t hash() const
{
size_t ret = hash_value(state);
return ret;
}
virtual bool operator==(const FFState& o) const
{
const KenLMState &other = static_cast<const KenLMState &>(o);
bool ret = state == other.state;
return ret;
}
virtual std::string ToString() const
{
stringstream ss;
for (size_t i = 0; i < state.Length(); ++i) {
ss << state.words[i] << " ";
}
return ss.str();
}
};
/////////////////////////////////////////////////////////////////
class LanguageModelChartStateKenLM : public FFState
{
public:
LanguageModelChartStateKenLM() {}
const lm::ngram::ChartState &GetChartState() const {
return m_state;
}
lm::ngram::ChartState &GetChartState() {
return m_state;
}
size_t hash() const {
size_t ret = hash_value(m_state);
return ret;
}
virtual bool operator==(const FFState& o) const {
const LanguageModelChartStateKenLM &other = static_cast<const LanguageModelChartStateKenLM &>(o);
bool ret = m_state == other.m_state;
return ret;
}
virtual std::string ToString() const
{
return "LanguageModelChartStateKenLM";
}
private:
lm::ngram::ChartState m_state;
};
/////////////////////////////////////////////////////////////////
class MappingBuilder: public lm::EnumerateVocab
{
public:
MappingBuilder(FactorCollection &factorCollection, System &system,
std::vector<lm::WordIndex> &mapping) :
m_factorCollection(factorCollection), m_system(system), m_mapping(mapping)
{
}
void Add(lm::WordIndex index, const StringPiece &str)
{
std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
m_mapping.resize(factorId + 1);
}
m_mapping[factorId] = index;
}
private:
FactorCollection &m_factorCollection;
std::vector<lm::WordIndex> &m_mapping;
System &m_system;
};
/////////////////////////////////////////////////////////////////
template<class Model>
KENLM<Model>::KENLM(size_t startInd, const std::string &line,
const std::string &file, FactorType factorType,
util::LoadMethod load_method) :
StatefulFeatureFunction(startInd, line), m_path(file), m_factorType(
factorType), m_load_method(load_method)
{
ReadParameters();
}
template<class Model>
KENLM<Model>::~KENLM()
{
// TODO Auto-generated destructor stub
}
template<class Model>
void KENLM<Model>::Load(System &system)
{
FactorCollection &fc = system.GetVocab();
m_bos = fc.AddFactor(BOS_, system, false);
m_eos = fc.AddFactor(EOS_, system, false);
lm::ngram::Config config;
config.messages = NULL;
FactorCollection &collection = system.GetVocab();
MappingBuilder builder(collection, system, m_lmIdLookup);
config.enumerate_vocab = &builder;
config.load_method = m_load_method;
m_ngram.reset(new Model(m_path.c_str(), config));
}
template<class Model>
FFState* KENLM<Model>::BlankState(MemPool &pool, const System &sys) const
{
FFState *ret;
if (sys.isPb) {
ret = new (pool.Allocate<KenLMState>()) KenLMState();
}
else {
ret = new (pool.Allocate<LanguageModelChartStateKenLM>()) LanguageModelChartStateKenLM();
}
return ret;
}
//! return the state associated with the empty hypothesis for a given sentence
template<class Model>
void KENLM<Model>::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
stateCast.state = m_ngram->BeginSentenceState();
}
template<class Model>
void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
size_t oovCount;
CalcScore(targetPhrase, fullScore, nGramScore, oovCount);
float estimateScore = fullScore - nGramScore;
bool GetLMEnableOOVFeature = false;
if (GetLMEnableOOVFeature) {
float scoresVec[2], estimateScoresVec[2];
scoresVec[0] = nGramScore;
scoresVec[1] = oovCount;
scores.PlusEquals(system, *this, scoresVec);
estimateScoresVec[0] = estimateScore;
estimateScoresVec[1] = 0;
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
estimateScoresVec);
estimatedScore += weightedScore;
}
else {
scores.PlusEquals(system, *this, nGramScore);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
estimateScore);
estimatedScore += weightedScore;
}
}
template<class Model>
void KENLM<Model>::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
size_t oovCount;
CalcScore(targetPhrase, fullScore, nGramScore, oovCount);
//float estimateScore = fullScore - nGramScore;
// all LM scores are estimated
float estimateScore = fullScore;
nGramScore = 0;
bool GetLMEnableOOVFeature = false;
if (GetLMEnableOOVFeature) {
float scoresVec[2], estimateScoresVec[2];
scoresVec[0] = nGramScore;
scoresVec[1] = oovCount;
scores.PlusEquals(system, *this, scoresVec);
estimateScoresVec[0] = estimateScore;
estimateScoresVec[1] = 0;
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
estimateScoresVec);
estimatedScore += weightedScore;
}
else {
scores.PlusEquals(system, *this, nGramScore);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
estimateScore);
estimatedScore += weightedScore;
}
}
template<class Model>
void KENLM<Model>::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
const System &system = mgr.system;
const lm::ngram::State &in_state =
static_cast<const KenLMState&>(prevState).state;
if (!hypo.GetTargetPhrase().GetSize()) {
stateCast.state = in_state;
return;
}
const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
//[begin, end) in STL-like fashion.
const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);
std::size_t position = begin;
typename Model::State aux_state;
typename Model::State *state0 = &stateCast.state, *state1 = &aux_state;
float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)),
*state0);
++position;
for (; position < adjust_end; ++position) {
score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)),
*state1);
std::swap(state0, state1);
}
if (hypo.GetBitmap().IsComplete()) {
// Score end of sentence.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
score += m_ngram->FullScoreForgotState(&indices.front(), last,
m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
}
else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
m_ngram->GetState(&indices.front(), last, stateCast.state);
}
else if (state0 != &stateCast.state) {
// Short enough phrase that we can just reuse the state.
stateCast.state = *state0;
}
score = TransformLMScore(score);
bool OOVFeatureEnabled = false;
if (OOVFeatureEnabled) {
std::vector<float> scoresVec(2);
scoresVec[0] = score;
scoresVec[1] = 0.0;
scores.PlusEquals(system, *this, scoresVec);
}
else {
scores.PlusEquals(system, *this, score);
}
}
template<class Model>
void KENLM<Model>::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
float &ngramScore, std::size_t &oovCount) const
{
fullScore = 0;
ngramScore = 0;
oovCount = 0;
if (!phrase.GetSize()) return;
lm::ngram::ChartState discarded_sadly;
lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
size_t position;
if (m_bos == phrase[0][m_factorType]) {
scorer.BeginSentence();
position = 1;
}
else {
position = 0;
}
size_t ngramBoundary = m_ngram->Order() - 1;
size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
for (; position < end_loop; ++position) {
const Word &word = phrase[position];
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
float before_boundary = fullScore + scorer.Finish();
for (; position < phrase.GetSize(); ++position) {
const Word &word = phrase[position];
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
fullScore += scorer.Finish();
ngramScore = TransformLMScore(fullScore - before_boundary);
fullScore = TransformLMScore(fullScore);
}
template<class Model>
void KENLM<Model>::CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore,
float &ngramScore, std::size_t &oovCount) const
{
fullScore = 0;
ngramScore = 0;
oovCount = 0;
if (!phrase.GetSize()) return;
lm::ngram::ChartState discarded_sadly;
lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
size_t position;
if (m_bos == phrase[0][m_factorType]) {
scorer.BeginSentence();
position = 1;
} else {
position = 0;
}
size_t ngramBoundary = m_ngram->Order() - 1;
size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
for (; position < end_loop; ++position) {
const SCFG::Word &word = phrase[position];
if (word.isNonTerminal) {
fullScore += scorer.Finish();
scorer.Reset();
} else {
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
}
float before_boundary = fullScore + scorer.Finish();
for (; position < phrase.GetSize(); ++position) {
const SCFG::Word &word = phrase[position];
if (word.isNonTerminal) {
fullScore += scorer.Finish();
scorer.Reset();
} else {
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
}
fullScore += scorer.Finish();
ngramScore = TransformLMScore(fullScore - before_boundary);
fullScore = TransformLMScore(fullScore);
}
// Convert last words of hypothesis into vocab ids, returning an end pointer.
template<class Model>
lm::WordIndex *KENLM<Model>::LastIDs(const Hypothesis &hypo,
lm::WordIndex *indices) const
{
lm::WordIndex *index = indices;
lm::WordIndex *end = indices + m_ngram->Order() - 1;
int position = hypo.GetCurrTargetWordsRange().GetEndPos();
for (;; ++index, --position) {
if (index == end) return index;
if (position == -1) {
*index = m_ngram->GetVocabulary().BeginSentence();
return index + 1;
}
*index = TranslateID(hypo.GetWord(position));
}
}
template<class Model>
void KENLM<Model>::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
LanguageModelChartStateKenLM &newState = static_cast<LanguageModelChartStateKenLM&>(state);
lm::ngram::RuleScore<Model> ruleScore(*m_ngram, newState.GetChartState());
const SCFG::TargetPhraseImpl &target = hypo.GetTargetPhrase();
const AlignmentInfo::NonTermIndexMap &nonTermIndexMap =
target.GetAlignNonTerm().GetNonTermIndexMap();
const size_t size = target.GetSize();
size_t phrasePos = 0;
// Special cases for first word.
if (size) {
const SCFG::Word &word = target[0];
if (word[m_factorType] == m_bos) {
// Begin of sentence
ruleScore.BeginSentence();
phrasePos++;
} else if (word.isNonTerminal) {
// Non-terminal is first so we can copy instead of rescoring.
const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState();
ruleScore.BeginNonTerminal(prevState);
phrasePos++;
}
}
for (; phrasePos < size; phrasePos++) {
const SCFG::Word &word = target[phrasePos];
if (word.isNonTerminal) {
const SCFG::Hypothesis *prevHypo = hypo.GetPrevHypo(nonTermIndexMap[phrasePos]);
const lm::ngram::ChartState &prevState = static_cast<const LanguageModelChartStateKenLM*>(prevHypo->GetState(featureID))->GetChartState();
ruleScore.NonTerminal(prevState);
} else {
ruleScore.Terminal(TranslateID(word));
}
}
float score = ruleScore.Finish();
score = TransformLMScore(score);
// take out score from loading. This needs reworking
//score -= target.GetScores().GetScores(*this)[0];
bool OOVFeatureEnabled = false;
if (OOVFeatureEnabled) {
std::vector<float> scoresVec(2);
scoresVec[0] = score;
scoresVec[1] = 0.0;
scores.PlusEquals(mgr.system, *this, scoresVec);
} else {
scores.PlusEquals(mgr.system, *this, score);
}
}
///////////////////////////////////////////////////////////////////////////
/* Instantiate LanguageModelKen here. Tells the compiler to generate code
* for the instantiations' non-inline member functions in this file.
* Otherwise, depending on the compiler, those functions may not be present
* at link time.
*/
template class KENLM<lm::ngram::ProbingModel> ;
template class KENLM<lm::ngram::RestProbingModel> ;
template class KENLM<lm::ngram::TrieModel> ;
template class KENLM<lm::ngram::ArrayTrieModel> ;
template class KENLM<lm::ngram::QuantTrieModel> ;
template class KENLM<lm::ngram::QuantArrayTrieModel> ;
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig)
{
FactorType factorType = 0;
string filePath;
util::LoadMethod load_method = util::POPULATE_OR_READ;
util::TokenIter<util::SingleCharacter, true> argument(lineOrig, ' ');
++argument; // KENLM
util::StringStream line;
line << "KENLM";
for (; argument; ++argument) {
const char *equals = std::find(argument->data(),
argument->data() + argument->size(), '=');
UTIL_THROW_IF2(equals == argument->data() + argument->size(),
"Expected = in KenLM argument " << *argument);
StringPiece name(argument->data(), equals - argument->data());
StringPiece value(equals + 1,
argument->data() + argument->size() - equals - 1);
if (name == "factor") {
factorType = boost::lexical_cast<FactorType>(value);
}
else if (name == "order") {
// Ignored
}
else if (name == "path") {
filePath.assign(value.data(), value.size());
}
else if (name == "lazyken") {
// deprecated: use load instead.
load_method =
boost::lexical_cast<bool>(value) ?
util::LAZY : util::POPULATE_OR_READ;
}
else if (name == "load") {
if (value == "lazy") {
load_method = util::LAZY;
}
else if (value == "populate_or_lazy") {
load_method = util::POPULATE_OR_LAZY;
}
else if (value == "populate_or_read" || value == "populate") {
load_method = util::POPULATE_OR_READ;
}
else if (value == "read") {
load_method = util::READ;
}
else if (value == "parallel_read") {
load_method = util::PARALLEL_READ;
}
else {
UTIL_THROW2("Unknown KenLM load method " << value);
}
}
else {
// pass to base class to interpret
line << " " << name << "=" << value;
}
}
return ConstructKenLM(startInd, line.str(), filePath, factorType, load_method);
}
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line,
const std::string &file, FactorType factorType,
util::LoadMethod load_method)
{
lm::ngram::ModelType model_type;
if (lm::ngram::RecognizeBinary(file.c_str(), model_type)) {
switch (model_type) {
case lm::ngram::PROBING:
return new KENLM<lm::ngram::ProbingModel>(startInd, line, file,
factorType, load_method);
case lm::ngram::REST_PROBING:
return new KENLM<lm::ngram::RestProbingModel>(startInd, line, file,
factorType, load_method);
case lm::ngram::TRIE:
return new KENLM<lm::ngram::TrieModel>(startInd, line, file, factorType,
load_method);
case lm::ngram::QUANT_TRIE:
return new KENLM<lm::ngram::QuantTrieModel>(startInd, line, file,
factorType, load_method);
case lm::ngram::ARRAY_TRIE:
return new KENLM<lm::ngram::ArrayTrieModel>(startInd, line, file,
factorType, load_method);
case lm::ngram::QUANT_ARRAY_TRIE:
return new KENLM<lm::ngram::QuantArrayTrieModel>(startInd, line, file,
factorType, load_method);
default:
UTIL_THROW2("Unrecognized kenlm model type " << model_type)
;
}
}
else {
return new KENLM<lm::ngram::ProbingModel>(startInd, line, file, factorType,
load_method);
}
}
}

88
contrib/moses2/LM/KENLM.h Normal file
View File

@ -0,0 +1,88 @@
/*
* KENLM.h
*
* Created on: 4 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/shared_ptr.hpp>
#include "../FF/StatefulFeatureFunction.h"
#include "lm/model.hh"
#include "../legacy/Factor.h"
#include "../legacy/Util2.h"
#include "../Word.h"
namespace Moses2
{
class Word;
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &lineOrig);
FeatureFunction *ConstructKenLM(size_t startInd, const std::string &line,
const std::string &file, FactorType factorType,
util::LoadMethod load_method);
template<class Model>
class KENLM: public StatefulFeatureFunction
{
public:
KENLM(size_t startInd, const std::string &line, const std::string &file,
FactorType factorType, util::LoadMethod load_method);
virtual ~KENLM();
virtual void Load(System &system);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
protected:
std::string m_path;
FactorType m_factorType;
util::LoadMethod m_load_method;
const Factor *m_bos;
const Factor *m_eos;
boost::shared_ptr<Model> m_ngram;
void CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore, float &ngramScore,
std::size_t &oovCount) const;
void CalcScore(const Phrase<SCFG::Word> &phrase, float &fullScore, float &ngramScore,
std::size_t &oovCount) const;
inline lm::WordIndex TranslateID(const Word &word) const
{
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
// Convert last words of hypothesis into vocab ids, returning an end pointer.
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const;
std::vector<lm::WordIndex> m_lmIdLookup;
};
}

View File

@ -0,0 +1,390 @@
/*
* KENLMBatch.cpp
*
* Created on: 4 Nov 2015
* Author: hieu
*/
#include <boost/foreach.hpp>
#include <sstream>
#include <vector>
#include <pthread.h>
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <unistd.h>
#include "KENLMBatch.h"
#include "../Phrase.h"
#include "../Scores.h"
#include "../System.h"
#include "../PhraseBased/Hypothesis.h"
#include "../PhraseBased/Manager.h"
#include "../PhraseBased/TargetPhraseImpl.h"
#include "lm/state.hh"
#include "lm/left.hh"
#include "util/exception.hh"
#include "util/tokenize_piece.hh"
#include "util/string_stream.hh"
#include "../legacy/FactorCollection.h"
using namespace std;
namespace Moses2
{
struct KenLMState: public FFState
{
lm::ngram::State state;
virtual size_t hash() const
{
size_t ret = hash_value(state);
return ret;
}
virtual bool operator==(const FFState& o) const
{
const KenLMState &other = static_cast<const KenLMState &>(o);
bool ret = state == other.state;
return ret;
}
virtual std::string ToString() const
{
stringstream ss;
for (size_t i = 0; i < state.Length(); ++i) {
ss << state.words[i] << " ";
}
return ss.str();
}
};
/////////////////////////////////////////////////////////////////
class MappingBuilder: public lm::EnumerateVocab
{
public:
MappingBuilder(FactorCollection &factorCollection, System &system,
std::vector<lm::WordIndex> &mapping) :
m_factorCollection(factorCollection), m_system(system), m_mapping(mapping)
{
}
void Add(lm::WordIndex index, const StringPiece &str)
{
std::size_t factorId = m_factorCollection.AddFactor(str, m_system, false)->GetId();
if (m_mapping.size() <= factorId) {
// 0 is <unk> :-)
m_mapping.resize(factorId + 1);
}
m_mapping[factorId] = index;
}
private:
FactorCollection &m_factorCollection;
std::vector<lm::WordIndex> &m_mapping;
System &m_system;
};
/////////////////////////////////////////////////////////////////
KENLMBatch::KENLMBatch(size_t startInd, const std::string &line)
:StatefulFeatureFunction(startInd, line)
,m_numHypos(0)
{
cerr << "KENLMBatch::KENLMBatch" << endl;
ReadParameters();
}
KENLMBatch::~KENLMBatch()
{
// TODO Auto-generated destructor stub
}
void KENLMBatch::Load(System &system)
{
cerr << "KENLMBatch::Load" << endl;
FactorCollection &fc = system.GetVocab();
m_bos = fc.AddFactor(BOS_, system, false);
m_eos = fc.AddFactor(EOS_, system, false);
lm::ngram::Config config;
config.messages = NULL;
FactorCollection &collection = system.GetVocab();
MappingBuilder builder(collection, system, m_lmIdLookup);
config.enumerate_vocab = &builder;
config.load_method = m_load_method;
m_ngram.reset(new Model(m_path.c_str(), config));
}
FFState* KENLMBatch::BlankState(MemPool &pool, const System &sys) const
{
KenLMState *ret = new (pool.Allocate<KenLMState>()) KenLMState();
return ret;
}
//! return the state associated with the empty hypothesis for a given sentence
void KENLMBatch::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
stateCast.state = m_ngram->BeginSentenceState();
}
void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
// contains factors used by this LM
float fullScore, nGramScore;
size_t oovCount;
CalcScore(targetPhrase, fullScore, nGramScore, oovCount);
float estimateScore = fullScore - nGramScore;
bool GetLMEnableOOVFeature = false;
if (GetLMEnableOOVFeature) {
float scoresVec[2], estimateScoresVec[2];
scoresVec[0] = nGramScore;
scoresVec[1] = oovCount;
scores.PlusEquals(system, *this, scoresVec);
estimateScoresVec[0] = estimateScore;
estimateScoresVec[1] = 0;
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
estimateScoresVec);
estimatedScore += weightedScore;
}
else {
scores.PlusEquals(system, *this, nGramScore);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this,
estimateScore);
estimatedScore += weightedScore;
}
}
void KENLMBatch::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void KENLMBatch::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
KenLMState &stateCast = static_cast<KenLMState&>(state);
const System &system = mgr.system;
const lm::ngram::State &in_state =
static_cast<const KenLMState&>(prevState).state;
if (!hypo.GetTargetPhrase().GetSize()) {
stateCast.state = in_state;
return;
}
const std::size_t begin = hypo.GetCurrTargetWordsRange().GetStartPos();
//[begin, end) in STL-like fashion.
const std::size_t end = hypo.GetCurrTargetWordsRange().GetEndPos() + 1;
const std::size_t adjust_end = std::min(end, begin + m_ngram->Order() - 1);
std::size_t position = begin;
typename Model::State aux_state;
typename Model::State *state0 = &stateCast.state, *state1 = &aux_state;
float score = m_ngram->Score(in_state, TranslateID(hypo.GetWord(position)),
*state0);
++position;
for (; position < adjust_end; ++position) {
score += m_ngram->Score(*state0, TranslateID(hypo.GetWord(position)),
*state1);
std::swap(state0, state1);
}
if (hypo.GetBitmap().IsComplete()) {
// Score end of sentence.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
score += m_ngram->FullScoreForgotState(&indices.front(), last,
m_ngram->GetVocabulary().EndSentence(), stateCast.state).prob;
}
else if (adjust_end < end) {
// Get state after adding a long phrase.
std::vector<lm::WordIndex> indices(m_ngram->Order() - 1);
const lm::WordIndex *last = LastIDs(hypo, &indices.front());
m_ngram->GetState(&indices.front(), last, stateCast.state);
}
else if (state0 != &stateCast.state) {
// Short enough phrase that we can just reuse the state.
stateCast.state = *state0;
}
score = TransformLMScore(score);
bool OOVFeatureEnabled = false;
if (OOVFeatureEnabled) {
std::vector<float> scoresVec(2);
scoresVec[0] = score;
scoresVec[1] = 0.0;
scores.PlusEquals(system, *this, scoresVec);
}
else {
scores.PlusEquals(system, *this, score);
}
}
void KENLMBatch::CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore,
float &ngramScore, std::size_t &oovCount) const
{
fullScore = 0;
ngramScore = 0;
oovCount = 0;
if (!phrase.GetSize()) return;
lm::ngram::ChartState discarded_sadly;
lm::ngram::RuleScore<Model> scorer(*m_ngram, discarded_sadly);
size_t position;
if (m_bos == phrase[0][m_factorType]) {
scorer.BeginSentence();
position = 1;
}
else {
position = 0;
}
size_t ngramBoundary = m_ngram->Order() - 1;
size_t end_loop = std::min(ngramBoundary, phrase.GetSize());
for (; position < end_loop; ++position) {
const Word &word = phrase[position];
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
float before_boundary = fullScore + scorer.Finish();
for (; position < phrase.GetSize(); ++position) {
const Word &word = phrase[position];
lm::WordIndex index = TranslateID(word);
scorer.Terminal(index);
if (!index) ++oovCount;
}
fullScore += scorer.Finish();
ngramScore = TransformLMScore(fullScore - before_boundary);
fullScore = TransformLMScore(fullScore);
}
// Convert last words of hypothesis into vocab ids, returning an end pointer.
lm::WordIndex *KENLMBatch::LastIDs(const Hypothesis &hypo,
lm::WordIndex *indices) const
{
lm::WordIndex *index = indices;
lm::WordIndex *end = indices + m_ngram->Order() - 1;
int position = hypo.GetCurrTargetWordsRange().GetEndPos();
for (;; ++index, --position) {
if (index == end) return index;
if (position == -1) {
*index = m_ngram->GetVocabulary().BeginSentence();
return index + 1;
}
*index = TranslateID(hypo.GetWord(position));
}
}
void KENLMBatch::SetParameter(const std::string& key,
const std::string& value)
{
//cerr << "key=" << key << " " << value << endl;
if (key == "path") {
m_path = value;
}
else if (key == "order") {
// ignore
}
else if (key == "factor") {
m_factorType = Scan<FactorType>(value);
}
else if (key == "lazyken") {
m_load_method =
boost::lexical_cast<bool>(value) ?
util::LAZY : util::POPULATE_OR_READ;
}
else if (key == "load") {
if (value == "lazy") {
m_load_method = util::LAZY;
}
else if (value == "populate_or_lazy") {
m_load_method = util::POPULATE_OR_LAZY;
}
else if (value == "populate_or_read" || value == "populate") {
m_load_method = util::POPULATE_OR_READ;
}
else if (value == "read") {
m_load_method = util::READ;
}
else if (value == "parallel_read") {
m_load_method = util::PARALLEL_READ;
}
else {
UTIL_THROW2("Unknown KenLM load method " << value);
}
}
else {
StatefulFeatureFunction::SetParameter(key, value);
}
//cerr << "SetParameter done" << endl;
}
void KENLMBatch::EvaluateWhenAppliedBatch(
const Batch &batch) const
{
{
// write lock
boost::unique_lock<boost::shared_mutex> lock(m_accessLock);
m_batches.push_back(&batch);
m_numHypos += batch.size();
}
//cerr << "m_numHypos=" << m_numHypos << endl;
if (m_numHypos > 0) {
// process batch
EvaluateWhenAppliedBatch();
m_batches.clear();
m_numHypos = 0;
m_threadNeeded.notify_all();
}
else {
boost::mutex::scoped_lock lock(m_mutex);
m_threadNeeded.wait(lock);
}
}
void KENLMBatch::EvaluateWhenAppliedBatch() const
{
BOOST_FOREACH(const Batch *batch, m_batches) {
//cerr << "batch=" << batch->size() << endl;
BOOST_FOREACH(Hypothesis *hypo, *batch) {
hypo->EvaluateWhenApplied(*this);
}
}
}
void KENLMBatch::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
}

View File

@ -0,0 +1,101 @@
/*
* KENLM.h
*
* Created on: 4 Nov 2015
* Author: hieu
*/
#pragma once
#include <boost/shared_ptr.hpp>
#include <boost/bind.hpp>
#include <boost/thread.hpp>
#include <pthread.h>
#include "../FF/StatefulFeatureFunction.h"
#include "lm/model.hh"
#include "../legacy/Factor.h"
#include "../legacy/Util2.h"
#include "../Word.h"
#include "../TypeDef.h"
namespace Moses2
{
class Word;
class KENLMBatch: public StatefulFeatureFunction
{
public:
KENLMBatch(size_t startInd, const std::string &line);
virtual ~KENLMBatch();
virtual void Load(System &system);
void SetParameter(const std::string& key,
const std::string& value);
virtual FFState* BlankState(MemPool &pool, const System &sys) const;
//! return the state associated with the empty hypothesis for a given sentence
virtual void EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<Moses2::Word> &source,
const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void
EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const;
virtual void EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const;
virtual void EvaluateWhenAppliedBatch(
const Batch &batch) const;
protected:
std::string m_path;
FactorType m_factorType;
util::LoadMethod m_load_method;
const Factor *m_bos;
const Factor *m_eos;
typedef lm::ngram::ProbingModel Model;
boost::shared_ptr<Model> m_ngram;
void CalcScore(const Phrase<Moses2::Word> &phrase, float &fullScore, float &ngramScore,
std::size_t &oovCount) const;
inline lm::WordIndex TranslateID(const Word &word) const
{
std::size_t factor = word[m_factorType]->GetId();
return (factor >= m_lmIdLookup.size() ? 0 : m_lmIdLookup[factor]);
}
// Convert last words of hypothesis into vocab ids, returning an end pointer.
lm::WordIndex *LastIDs(const Hypothesis &hypo, lm::WordIndex *indices) const;
std::vector<lm::WordIndex> m_lmIdLookup;
// batch
mutable std::vector<const Batch*> m_batches;
mutable size_t m_numHypos;
mutable boost::shared_mutex m_accessLock;
mutable boost::mutex m_mutex;
mutable boost::condition_variable m_threadNeeded;
void EvaluateWhenAppliedBatch() const;
};
}

View File

@ -0,0 +1,335 @@
/*
* LanguageModel.cpp
*
* Created on: 29 Oct 2015
* Author: hieu
*/
#include <vector>
#include "LanguageModel.h"
#include "../Phrase.h"
#include "../System.h"
#include "../PhraseBased/Manager.h"
#include "../PhraseBased/Hypothesis.h"
#include "../PhraseBased/TargetPhraseImpl.h"
#include "../FF/PointerState.h"
#include "../legacy/Util2.h"
#include "../legacy/InputFileStream.h"
#include "../legacy/Bitmap.h"
#include "../legacy/Util2.h"
using namespace std;
namespace Moses2
{
struct LMState: public PointerState
{
LMState() :
PointerState()
{
// uninitialised
}
void Set(MemPool &pool, void *lms, const std::vector<const Factor*> &context)
{
lmstate = lms;
numWords = context.size();
lastWords = (const Factor**) pool.Allocate(
sizeof(const Factor*) * numWords);
for (size_t i = 0; i < numWords; ++i) {
lastWords[i] = context[i];
}
}
void Init(MemPool &pool, const Factor *factor)
{
lmstate = NULL;
numWords = 1;
lastWords = (const Factor**) pool.Allocate(sizeof(const Factor*));
lastWords[0] = factor;
}
size_t numWords;
const Factor** lastWords;
};
////////////////////////////////////////////////////////////////////////////////////////
LanguageModel::LanguageModel(size_t startInd, const std::string &line) :
StatefulFeatureFunction(startInd, line), m_oov(-100)
{
ReadParameters();
}
LanguageModel::~LanguageModel()
{
// TODO Auto-generated destructor stub
}
void LanguageModel::Load(System &system)
{
FactorCollection &fc = system.GetVocab();
m_bos = fc.AddFactor(BOS_, system, false);
m_eos = fc.AddFactor(EOS_, system, false);
InputFileStream infile(m_path);
size_t lineNum = 0;
string line;
while (getline(infile, line)) {
if (++lineNum % 100000 == 0) {
cerr << lineNum << " ";
}
vector<string> substrings = Tokenize(line, "\t");
if (substrings.size() < 2) continue;
assert(substrings.size() == 2 || substrings.size() == 3);
SCORE prob = TransformLMScore(Scan<SCORE>(substrings[0]));
if (substrings[1] == "<unk>") {
m_oov = prob;
continue;
}
SCORE backoff = 0.f;
if (substrings.size() == 3) {
backoff = TransformLMScore(Scan<SCORE>(substrings[2]));
}
// ngram
vector<string> key = Tokenize(substrings[1], " ");
vector<const Factor*> factorKey(key.size());
for (size_t i = 0; i < key.size(); ++i) {
factorKey[factorKey.size() - i - 1] = fc.AddFactor(key[i], system, false);
}
m_root.insert(factorKey, LMScores(prob, backoff));
}
}
void LanguageModel::SetParameter(const std::string& key,
const std::string& value)
{
if (key == "path") {
m_path = value;
}
else if (key == "factor") {
m_factorType = Scan<FactorType>(value);
}
else if (key == "order") {
m_order = Scan<size_t>(value);
}
else {
StatefulFeatureFunction::SetParameter(key, value);
}
}
FFState* LanguageModel::BlankState(MemPool &pool, const System &sys) const
{
return new (pool.Allocate<LMState>()) LMState();
}
void LanguageModel::EmptyHypothesisState(FFState &state, const ManagerBase &mgr,
const InputType &input, const Hypothesis &hypo) const
{
LMState &stateCast = static_cast<LMState&>(state);
MemPool &pool = mgr.GetPool();
stateCast.Init(pool, m_bos);
}
void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system,
const Phrase<Moses2::Word> &source, const TargetPhraseImpl &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
if (targetPhrase.GetSize() == 0) {
return;
}
SCORE score = 0;
SCORE nonFullScore = 0;
vector<const Factor*> context;
// context.push_back(m_bos);
context.reserve(m_order);
for (size_t i = 0; i < targetPhrase.GetSize(); ++i) {
const Factor *factor = targetPhrase[i][m_factorType];
ShiftOrPush(context, factor);
if (context.size() == m_order) {
std::pair<SCORE, void*> fromScoring = Score(context);
score += fromScoring.first;
}
else {
std::pair<SCORE, void*> fromScoring = Score(context);
nonFullScore += fromScoring.first;
}
}
scores.PlusEquals(system, *this, score);
SCORE weightedScore = Scores::CalcWeightedScore(system, *this, nonFullScore);
estimatedScore += weightedScore;
}
void LanguageModel::EvaluateInIsolation(MemPool &pool, const System &system, const Phrase<SCFG::Word> &source,
const TargetPhrase<SCFG::Word> &targetPhrase, Scores &scores,
SCORE &estimatedScore) const
{
}
void LanguageModel::EvaluateWhenApplied(const ManagerBase &mgr,
const Hypothesis &hypo, const FFState &prevState, Scores &scores,
FFState &state) const
{
const LMState &prevLMState = static_cast<const LMState &>(prevState);
size_t numWords = prevLMState.numWords;
// context is held backwards
vector<const Factor*> context(numWords);
for (size_t i = 0; i < numWords; ++i) {
context[i] = prevLMState.lastWords[i];
}
//DebugContext(context);
SCORE score = 0;
std::pair<SCORE, void*> fromScoring;
const TargetPhrase<Moses2::Word> &tp = hypo.GetTargetPhrase();
for (size_t i = 0; i < tp.GetSize(); ++i) {
const Word &word = tp[i];
const Factor *factor = word[m_factorType];
ShiftOrPush(context, factor);
fromScoring = Score(context);
score += fromScoring.first;
}
const Bitmap &bm = hypo.GetBitmap();
if (bm.IsComplete()) {
// everything translated
ShiftOrPush(context, m_eos);
fromScoring = Score(context);
score += fromScoring.first;
fromScoring.second = NULL;
context.clear();
}
else {
assert(context.size());
if (context.size() == m_order) {
context.resize(context.size() - 1);
}
}
scores.PlusEquals(mgr.system, *this, score);
// return state
//DebugContext(context);
LMState &stateCast = static_cast<LMState&>(state);
MemPool &pool = mgr.GetPool();
stateCast.Set(pool, fromScoring.second, context);
}
void LanguageModel::ShiftOrPush(std::vector<const Factor*> &context,
const Factor *factor) const
{
if (context.size() < m_order) {
context.resize(context.size() + 1);
}
assert(context.size());
for (size_t i = context.size() - 1; i > 0; --i) {
context[i] = context[i - 1];
}
context[0] = factor;
}
std::pair<SCORE, void*> LanguageModel::Score(
const std::vector<const Factor*> &context) const
{
//cerr << "context=";
//DebugContext(context);
std::pair<SCORE, void*> ret;
typedef Node<const Factor*, LMScores> LMNode;
const LMNode *node = m_root.getNode(context);
if (node) {
ret.first = node->getValue().prob;
ret.second = (void*) node;
}
else {
SCORE backoff = 0;
std::vector<const Factor*> backOffContext(context.begin() + 1,
context.end());
node = m_root.getNode(backOffContext);
if (node) {
backoff = node->getValue().backoff;
}
std::vector<const Factor*> newContext(context.begin(), context.end() - 1);
std::pair<SCORE, void*> newRet = Score(newContext);
ret.first = backoff + newRet.first;
ret.second = newRet.second;
}
//cerr << "score=" << ret.first << endl;
return ret;
}
SCORE LanguageModel::BackoffScore(
const std::vector<const Factor*> &context) const
{
//cerr << "backoff=";
//DebugContext(context);
SCORE ret;
size_t stoppedAtInd;
const Node<const Factor*, LMScores> &node = m_root.getNode(context,
stoppedAtInd);
if (stoppedAtInd == context.size()) {
// found entire ngram
ret = node.getValue().backoff;
}
else {
if (stoppedAtInd == 0) {
ret = m_oov;
stoppedAtInd = 1;
}
else {
ret = node.getValue().backoff;
}
// recursive
std::vector<const Factor*> backoff(context.begin() + stoppedAtInd,
context.end());
ret += BackoffScore(backoff);
}
return ret;
}
void LanguageModel::DebugContext(
const std::vector<const Factor*> &context) const
{
for (size_t i = 0; i < context.size(); ++i) {
cerr << context[i]->GetString() << " ";
}
cerr << endl;
}
void LanguageModel::EvaluateWhenApplied(const SCFG::Manager &mgr,
const SCFG::Hypothesis &hypo, int featureID, Scores &scores,
FFState &state) const
{
UTIL_THROW2("Not implemented");
}
}

Some files were not shown because too many files have changed in this diff Show More