2015-10-23 22:53:36 +03:00
|
|
|
/*
|
2015-11-03 17:20:10 +03:00
|
|
|
* PhraseImpl.h
|
2015-10-23 22:53:36 +03:00
|
|
|
*
|
|
|
|
* Created on: 23 Oct 2015
|
|
|
|
* Author: hieu
|
|
|
|
*/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <cstddef>
|
2015-10-24 01:19:31 +03:00
|
|
|
#include <string>
|
2015-10-26 19:32:47 +03:00
|
|
|
#include <iostream>
|
2015-10-24 01:19:31 +03:00
|
|
|
#include "Word.h"
|
2015-10-28 19:11:12 +03:00
|
|
|
#include "MemPool.h"
|
2016-02-25 20:50:44 +03:00
|
|
|
#include "TypeDef.h"
|
2015-11-13 13:40:55 +03:00
|
|
|
#include "legacy/FactorCollection.h"
|
2016-04-28 01:30:24 +03:00
|
|
|
#include "SCFG/Word.h"
|
2015-10-23 22:53:36 +03:00
|
|
|
|
2015-12-10 23:49:30 +03:00
|
|
|
namespace Moses2
|
|
|
|
{
|
|
|
|
|
2016-04-27 00:18:19 +03:00
|
|
|
template<typename WORD>
|
2015-11-03 17:20:10 +03:00
|
|
|
class SubPhrase;
|
2016-04-27 00:18:19 +03:00
|
|
|
|
2016-02-23 15:41:48 +03:00
|
|
|
class Scores;
|
2016-02-23 19:52:17 +03:00
|
|
|
class PhraseTable;
|
|
|
|
class MemPool;
|
|
|
|
class System;
|
2015-11-03 17:20:10 +03:00
|
|
|
|
2016-04-26 11:23:18 +03:00
|
|
|
template<typename WORD>
|
2015-11-03 17:20:10 +03:00
|
|
|
class Phrase
|
2015-10-24 01:19:31 +03:00
|
|
|
{
|
2016-03-31 23:00:16 +03:00
|
|
|
friend std::ostream& operator<<(std::ostream &, const Phrase &);
|
2015-10-24 01:19:31 +03:00
|
|
|
public:
|
2016-03-31 23:00:16 +03:00
|
|
|
virtual ~Phrase()
|
|
|
|
{
|
|
|
|
}
|
2016-04-26 11:54:23 +03:00
|
|
|
virtual const WORD& operator[](size_t pos) const = 0;
|
2015-10-24 01:19:31 +03:00
|
|
|
virtual size_t GetSize() const = 0;
|
2016-04-25 15:36:29 +03:00
|
|
|
|
2016-04-26 11:54:23 +03:00
|
|
|
virtual const WORD& Back() const
|
2016-04-15 15:38:01 +03:00
|
|
|
{ return (*this)[GetSize() - 1]; }
|
|
|
|
|
2016-04-25 16:06:38 +03:00
|
|
|
virtual size_t hash() const
|
|
|
|
{
|
|
|
|
size_t seed = 0;
|
|
|
|
|
|
|
|
for (size_t i = 0; i < GetSize(); ++i) {
|
2016-04-26 11:54:23 +03:00
|
|
|
const WORD &word = (*this)[i];
|
2016-04-25 16:06:38 +03:00
|
|
|
size_t wordHash = word.hash();
|
|
|
|
boost::hash_combine(seed, wordHash);
|
|
|
|
}
|
|
|
|
|
|
|
|
return seed;
|
|
|
|
}
|
|
|
|
|
|
|
|
virtual bool operator==(const Phrase &compare) const
|
|
|
|
{
|
|
|
|
if (GetSize() != compare.GetSize()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (size_t i = 0; i < GetSize(); ++i) {
|
2016-04-26 11:54:23 +03:00
|
|
|
const WORD &word = (*this)[i];
|
|
|
|
const WORD &otherWord = compare[i];
|
2016-04-25 16:06:38 +03:00
|
|
|
if (word != otherWord) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-12-15 16:36:44 +03:00
|
|
|
virtual bool operator!=(const Phrase &compare) const
|
|
|
|
{
|
2016-03-31 23:00:16 +03:00
|
|
|
return !((*this) == compare);
|
2015-12-15 16:36:44 +03:00
|
|
|
}
|
2016-04-25 16:06:38 +03:00
|
|
|
|
|
|
|
virtual std::string GetString(const FactorList &factorTypes) const
|
|
|
|
{
|
|
|
|
if (GetSize() == 0) {
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
|
|
|
|
std::stringstream ret;
|
|
|
|
|
2016-04-26 11:54:23 +03:00
|
|
|
const WORD &word = (*this)[0];
|
2016-04-25 16:06:38 +03:00
|
|
|
ret << word.GetString(factorTypes);
|
|
|
|
for (size_t i = 1; i < GetSize(); ++i) {
|
2016-04-26 11:54:23 +03:00
|
|
|
const WORD &word = (*this)[i];
|
2016-04-25 16:06:38 +03:00
|
|
|
ret << " " << word.GetString(factorTypes);
|
|
|
|
}
|
|
|
|
return ret.str();
|
|
|
|
}
|
|
|
|
|
2016-04-27 00:18:19 +03:00
|
|
|
virtual SubPhrase<WORD> GetSubPhrase(size_t start, size_t size) const = 0;
|
2015-12-15 16:36:44 +03:00
|
|
|
|
2016-04-25 16:06:38 +03:00
|
|
|
virtual void OutputToStream(std::ostream &out) const
|
|
|
|
{
|
|
|
|
size_t size = GetSize();
|
|
|
|
if (size) {
|
|
|
|
out << (*this)[0];
|
|
|
|
for (size_t i = 1; i < size; ++i) {
|
2016-04-26 11:54:23 +03:00
|
|
|
const WORD &word = (*this)[i];
|
2016-04-25 16:06:38 +03:00
|
|
|
out << " " << word;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-03-17 21:11:05 +03:00
|
|
|
|
2015-10-24 01:19:31 +03:00
|
|
|
};
|
2016-04-25 16:06:38 +03:00
|
|
|
|
|
|
|
////////////////////////////////////////////////////////////////////////
|
2016-04-26 11:23:18 +03:00
|
|
|
inline std::ostream& operator<<(std::ostream &out, const Phrase<Moses2::Word> &obj)
|
2016-04-25 16:06:38 +03:00
|
|
|
{
|
|
|
|
if (obj.GetSize()) {
|
|
|
|
out << obj[0];
|
|
|
|
for (size_t i = 1; i < obj.GetSize(); ++i) {
|
2016-04-26 11:23:18 +03:00
|
|
|
const Moses2::Word &word = obj[i];
|
2016-04-25 16:06:38 +03:00
|
|
|
out << " " << word;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
2016-04-28 01:30:24 +03:00
|
|
|
|
|
|
|
inline std::ostream& operator<<(std::ostream &out, const Phrase<SCFG::Word> &obj)
|
2016-04-26 11:23:18 +03:00
|
|
|
{
|
|
|
|
if (obj.GetSize()) {
|
|
|
|
out << obj[0];
|
|
|
|
for (size_t i = 1; i < obj.GetSize(); ++i) {
|
2016-04-28 01:30:24 +03:00
|
|
|
const SCFG::Word &word = obj[i];
|
2016-04-26 11:23:18 +03:00
|
|
|
out << " " << word;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return out;
|
|
|
|
}
|
2016-02-25 20:50:44 +03:00
|
|
|
|
2016-02-23 15:41:48 +03:00
|
|
|
////////////////////////////////////////////////////////////////////////
|
2016-04-26 11:23:18 +03:00
|
|
|
template<typename WORD>
|
2015-11-25 20:35:22 +03:00
|
|
|
class PhraseOrdererLexical
|
|
|
|
{
|
|
|
|
public:
|
2016-04-26 11:23:18 +03:00
|
|
|
bool operator()(const Phrase<WORD> &a, const Phrase<WORD> &b) const
|
2016-03-31 23:00:16 +03:00
|
|
|
{
|
|
|
|
size_t minSize = std::min(a.GetSize(), b.GetSize());
|
|
|
|
for (size_t i = 0; i < minSize; ++i) {
|
|
|
|
const Word &aWord = a[i];
|
|
|
|
const Word &bWord = b[i];
|
|
|
|
int cmp = aWord.Compare(bWord);
|
|
|
|
//std::cerr << "WORD: " << aWord << " ||| " << bWord << " ||| " << lessThan << std::endl;
|
|
|
|
if (cmp) {
|
|
|
|
return (cmp < 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return a.GetSize() < b.GetSize();
|
2015-11-25 20:35:22 +03:00
|
|
|
}
|
|
|
|
};
|
2015-12-10 23:49:30 +03:00
|
|
|
|
|
|
|
}
|
|
|
|
|