2010-04-08 21:16:10 +04:00
|
|
|
#pragma once
|
2010-10-12 20:17:41 +04:00
|
|
|
// $Id$
|
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based, hierarchical and syntactic language decoder
|
|
|
|
Copyright (C) 2009 Hieu Hoang
|
2011-02-24 16:57:11 +03:00
|
|
|
|
2010-10-12 20:17:41 +04:00
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
2011-02-24 16:57:11 +03:00
|
|
|
|
2010-10-12 20:17:41 +04:00
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
2011-02-24 16:57:11 +03:00
|
|
|
|
2010-10-12 20:17:41 +04:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
2010-04-08 21:16:10 +04:00
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
|
|
|
#include <iostream>
|
|
|
|
#include <fstream>
|
2012-09-20 00:48:10 +04:00
|
|
|
#include <boost/shared_ptr.hpp>
|
2010-04-08 21:16:10 +04:00
|
|
|
#include "Vocab.h"
|
|
|
|
|
|
|
|
namespace Moses
|
|
|
|
{
|
2011-02-24 16:57:11 +03:00
|
|
|
class Word;
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace OnDiskPt
|
|
|
|
{
|
2012-05-15 19:16:16 +04:00
|
|
|
class Vocab;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2012-06-29 04:12:31 +04:00
|
|
|
/* A wrapper around a vocab id, and a boolean indicating whther it is a term or non-term.
|
|
|
|
* Factors can be represented by using a vocab string with | character, eg go|VB
|
|
|
|
*/
|
2010-04-08 21:16:10 +04:00
|
|
|
class Word
|
|
|
|
{
|
2011-02-24 16:57:11 +03:00
|
|
|
friend std::ostream& operator<<(std::ostream&, const Word&);
|
2010-04-08 21:16:10 +04:00
|
|
|
|
2012-10-18 19:35:42 +04:00
|
|
|
private:
|
2011-02-24 16:57:11 +03:00
|
|
|
bool m_isNonTerminal;
|
2012-06-16 00:44:45 +04:00
|
|
|
UINT64 m_vocabId;
|
2011-02-24 16:57:11 +03:00
|
|
|
|
2010-04-08 21:16:10 +04:00
|
|
|
public:
|
2013-06-10 21:11:55 +04:00
|
|
|
explicit Word() {
|
|
|
|
}
|
2011-02-24 16:57:11 +03:00
|
|
|
|
2012-06-16 00:44:45 +04:00
|
|
|
explicit Word(bool isNonTerminal)
|
2013-05-29 21:16:15 +04:00
|
|
|
:m_isNonTerminal(isNonTerminal)
|
2013-06-10 21:11:55 +04:00
|
|
|
,m_vocabId(0) {
|
|
|
|
}
|
2011-02-24 16:57:11 +03:00
|
|
|
|
|
|
|
Word(const Word ©);
|
|
|
|
~Word();
|
|
|
|
|
|
|
|
|
|
|
|
void CreateFromString(const std::string &inString, Vocab &vocab);
|
|
|
|
bool IsNonTerminal() const {
|
|
|
|
return m_isNonTerminal;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t WriteToMemory(char *mem) const;
|
2012-06-16 00:44:45 +04:00
|
|
|
size_t ReadFromMemory(const char *mem);
|
|
|
|
size_t ReadFromFile(std::fstream &file);
|
2011-02-24 16:57:11 +03:00
|
|
|
|
2012-06-16 00:44:45 +04:00
|
|
|
void SetVocabId(UINT32 vocabId) {
|
|
|
|
m_vocabId = vocabId;
|
2011-02-24 16:57:11 +03:00
|
|
|
}
|
|
|
|
|
2012-09-26 15:52:11 +04:00
|
|
|
void ConvertToMoses(
|
|
|
|
const std::vector<Moses::FactorType> &outputFactorsVec,
|
|
|
|
const Vocab &vocab,
|
|
|
|
Moses::Word &overwrite) const;
|
2011-02-24 16:57:11 +03:00
|
|
|
|
2012-11-14 21:14:46 +04:00
|
|
|
void DebugPrint(std::ostream &out, const Vocab &vocab) const;
|
2013-05-29 21:16:15 +04:00
|
|
|
inline const std::string &GetString(const Vocab &vocab) const {
|
2012-11-14 21:14:46 +04:00
|
|
|
return vocab.GetString(m_vocabId);
|
|
|
|
}
|
2012-05-15 19:16:16 +04:00
|
|
|
|
2011-02-24 16:57:11 +03:00
|
|
|
int Compare(const Word &compare) const;
|
|
|
|
bool operator<(const Word &compare) const;
|
|
|
|
bool operator==(const Word &compare) const;
|
2010-04-08 21:16:10 +04:00
|
|
|
|
|
|
|
};
|
2012-09-20 00:48:10 +04:00
|
|
|
|
|
|
|
typedef boost::shared_ptr<Word> WordPtr;
|
2010-04-08 21:16:10 +04:00
|
|
|
}
|
|
|
|
|