mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-10 00:47:31 +03:00
commit
4cbc47f2ae
@ -21,10 +21,11 @@
|
||||
#include <direct.h>
|
||||
#endif
|
||||
#include <sys/stat.h>
|
||||
#include "util/check.hh"
|
||||
#include <string>
|
||||
#include "OnDiskWrapper.h"
|
||||
#include "moses/Factor.h"
|
||||
#include "util/check.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -59,19 +60,29 @@ bool OnDiskWrapper::BeginLoad(const std::string &filePath)
|
||||
bool OnDiskWrapper::OpenForLoad(const std::string &filePath)
|
||||
{
|
||||
m_fileSource.open((filePath + "/Source.dat").c_str(), ios::in | ios::binary);
|
||||
CHECK(m_fileSource.is_open());
|
||||
UTIL_THROW_IF(!m_fileSource.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/Source.dat");
|
||||
|
||||
m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::in | ios::binary);
|
||||
CHECK(m_fileTargetInd.is_open());
|
||||
UTIL_THROW_IF(!m_fileTargetInd.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/TargetInd.dat");
|
||||
|
||||
m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::in | ios::binary);
|
||||
CHECK(m_fileTargetColl.is_open());
|
||||
UTIL_THROW_IF(!m_fileTargetColl.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/TargetColl.dat");
|
||||
|
||||
m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::in);
|
||||
CHECK(m_fileVocab.is_open());
|
||||
UTIL_THROW_IF(!m_fileVocab.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/Vocab.dat");
|
||||
|
||||
m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::in);
|
||||
CHECK(m_fileMisc.is_open());
|
||||
UTIL_THROW_IF(!m_fileMisc.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/Misc.dat");
|
||||
|
||||
// set up root node
|
||||
LoadMisc();
|
||||
@ -89,7 +100,9 @@ bool OnDiskWrapper::LoadMisc()
|
||||
while(m_fileMisc.getline(line, 100000)) {
|
||||
vector<string> tokens;
|
||||
Moses::Tokenize(tokens, line);
|
||||
CHECK(tokens.size() == 2);
|
||||
UTIL_THROW_IF2(tokens.size() != 2, "Except key value. Found " << line);
|
||||
|
||||
|
||||
const string &key = tokens[0];
|
||||
m_miscInfo[key] = Moses::Scan<UINT64>(tokens[1]);
|
||||
}
|
||||
@ -112,33 +125,52 @@ bool OnDiskWrapper::BeginSave(const std::string &filePath
|
||||
#endif
|
||||
|
||||
m_fileSource.open((filePath + "/Source.dat").c_str(), ios::out | ios::in | ios::binary | ios::ate | ios::trunc);
|
||||
CHECK(m_fileSource.is_open());
|
||||
UTIL_THROW_IF(!m_fileSource.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/Source.dat");
|
||||
|
||||
m_fileTargetInd.open((filePath + "/TargetInd.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc);
|
||||
CHECK(m_fileTargetInd.is_open());
|
||||
UTIL_THROW_IF(!m_fileTargetInd.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/TargetInd.dat");
|
||||
|
||||
m_fileTargetColl.open((filePath + "/TargetColl.dat").c_str(), ios::out | ios::binary | ios::ate | ios::trunc);
|
||||
CHECK(m_fileTargetColl.is_open());
|
||||
UTIL_THROW_IF(!m_fileTargetColl.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/TargetColl.dat");
|
||||
|
||||
m_fileVocab.open((filePath + "/Vocab.dat").c_str(), ios::out | ios::ate | ios::trunc);
|
||||
CHECK(m_fileVocab.is_open());
|
||||
UTIL_THROW_IF(!m_fileVocab.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/Vocab.dat");
|
||||
|
||||
m_fileMisc.open((filePath + "/Misc.dat").c_str(), ios::out | ios::ate | ios::trunc);
|
||||
CHECK(m_fileMisc.is_open());
|
||||
UTIL_THROW_IF(!m_fileMisc.is_open(),
|
||||
util::FileOpenException,
|
||||
"Couldn't open file " << filePath << "/Misc.dat");
|
||||
|
||||
// offset by 1. 0 offset is reserved
|
||||
char c = 0xff;
|
||||
m_fileSource.write(&c, 1);
|
||||
CHECK(1 == m_fileSource.tellp());
|
||||
UTIL_THROW_IF(1 != m_fileSource.tellp(),
|
||||
util::Exception,
|
||||
"Couldn't write to stream m_fileSource");
|
||||
|
||||
m_fileTargetInd.write(&c, 1);
|
||||
CHECK(1 == m_fileTargetInd.tellp());
|
||||
UTIL_THROW_IF(1 != m_fileTargetInd.tellp(),
|
||||
util::Exception,
|
||||
"Couldn't write to stream m_fileTargetInd");
|
||||
|
||||
m_fileTargetColl.write(&c, 1);
|
||||
CHECK(1 == m_fileTargetColl.tellp());
|
||||
UTIL_THROW_IF(1 != m_fileTargetColl.tellp(),
|
||||
util::Exception,
|
||||
"Couldn't write to stream m_fileTargetColl");
|
||||
|
||||
// set up root node
|
||||
CHECK(GetNumCounts() == 1);
|
||||
UTIL_THROW_IF(GetNumCounts() != 1,
|
||||
util::Exception,
|
||||
"Not sure what this is...");
|
||||
|
||||
vector<float> counts(GetNumCounts());
|
||||
counts[0] = DEFAULT_COUNT;
|
||||
m_rootSourceNode = new PhraseNode();
|
||||
@ -150,7 +182,7 @@ bool OnDiskWrapper::BeginSave(const std::string &filePath
|
||||
void OnDiskWrapper::EndSave()
|
||||
{
|
||||
bool ret = m_rootSourceNode->Saved();
|
||||
CHECK(ret);
|
||||
UTIL_THROW_IF(!ret, util::Exception, "Root node not saved");
|
||||
|
||||
GetVocab().Save(*this);
|
||||
|
||||
@ -187,7 +219,10 @@ UINT64 OnDiskWrapper::GetMisc(const std::string &key) const
|
||||
{
|
||||
std::map<std::string, UINT64>::const_iterator iter;
|
||||
iter = m_miscInfo.find(key);
|
||||
CHECK(iter != m_miscInfo.end());
|
||||
UTIL_THROW_IF(iter == m_miscInfo.end()
|
||||
, util::Exception
|
||||
, "Couldn't find value for key " << key
|
||||
);
|
||||
|
||||
return iter->second;
|
||||
}
|
||||
@ -201,7 +236,7 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &fact
|
||||
|
||||
size_t factorType = factorsVec[0];
|
||||
const Moses::Factor *factor = origWord.GetFactor(factorType);
|
||||
CHECK(factor);
|
||||
UTIL_THROW_IF(factor == NULL, util::Exception, "Expecting factor " << factorType);
|
||||
strme << factor->GetString();
|
||||
|
||||
for (size_t ind = 1 ; ind < factorsVec.size() ; ++ind) {
|
||||
@ -211,7 +246,9 @@ Word *OnDiskWrapper::ConvertFromMoses(const std::vector<Moses::FactorType> &fact
|
||||
// can have less factors than factorType.size()
|
||||
break;
|
||||
}
|
||||
CHECK(factor);
|
||||
UTIL_THROW_IF(factor == NULL,
|
||||
util::Exception,
|
||||
"Expecting factor " << factorType << " at position " << ind);
|
||||
strme << "|" << factor->GetString();
|
||||
} // for (size_t factorType
|
||||
|
||||
|
@ -18,9 +18,10 @@
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
#include <iostream>
|
||||
#include "util/check.hh"
|
||||
#include "moses/Util.h"
|
||||
#include "Phrase.h"
|
||||
#include "util/check.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -35,7 +36,9 @@ void Phrase::AddWord(WordPtr word)
|
||||
|
||||
void Phrase::AddWord(WordPtr word, size_t pos)
|
||||
{
|
||||
CHECK(pos < m_words.size());
|
||||
UTIL_THROW_IF(!(pos < m_words.size()),
|
||||
util::Exception,
|
||||
"Trying to get word " << pos << " when phrase size is " << m_words.size());
|
||||
m_words.insert(m_words.begin() + pos + 1, word);
|
||||
}
|
||||
|
||||
@ -59,7 +62,7 @@ int Phrase::Compare(const Phrase &compare) const
|
||||
}
|
||||
|
||||
if (ret == 0) {
|
||||
CHECK(compare.GetSize() >= GetSize());
|
||||
assert(compare.GetSize() >= GetSize());
|
||||
ret = (compare.GetSize() > GetSize()) ? 1 : 0;
|
||||
}
|
||||
return ret;
|
||||
|
@ -17,12 +17,13 @@
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
#include "util/check.hh"
|
||||
#include "PhraseNode.h"
|
||||
#include "OnDiskWrapper.h"
|
||||
#include "TargetPhraseCollection.h"
|
||||
#include "SourcePhrase.h"
|
||||
#include "moses/Util.h"
|
||||
#include "util/check.hh"
|
||||
#include "util/exception.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
@ -1,149 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1589516826">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1589516826" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1589516826" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1589516826." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1745490513" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.494216366" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/basic-decoder}/Debug" id="cdt.managedbuild.builder.gnu.cross.1009688321" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1601060976" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1429299440" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1531860010" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.include.paths.740132981" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths" valueType="includePath"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.118904161" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.352779274" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1941159898" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.1375886139" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1030129128" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:/basic-decoder}""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:/basic-decoder}/../3rdparty/boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:/basic-decoder}/../3rdparty/srilm/include""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.1451958570" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="SCORE_BREAKDOWN"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2109487710" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.212627609" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1748901445" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.paths.1618445450" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:/basic-decoder}/../3rdparty/srilm/lib/i686-m64""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:/basic-decoder}/../3rdparty/boost/lib64""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.1167343204" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="oolm"/>
|
||||
<listOptionValue builtIn="false" value="dstruct"/>
|
||||
<listOptionValue builtIn="false" value="misc"/>
|
||||
<listOptionValue builtIn="false" value="flm"/>
|
||||
<listOptionValue builtIn="false" value="lattice"/>
|
||||
<listOptionValue builtIn="false" value="pthread"/>
|
||||
<listOptionValue builtIn="false" value="boost_system-mt"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1945581555" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1222087129" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1333726557" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1297018052" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="3rdparty" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.306747385">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.306747385" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.306747385" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.306747385." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.616022040" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.606767516" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/basic-decoder}/Release" id="cdt.managedbuild.builder.gnu.cross.1139179673" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1157813077" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1587781623" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1231331792" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1324768096" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.383499349" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.22204341" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.272048981" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2004590487" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1869412997" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.614956418" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2117775172" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.994738670" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1885556143" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.525605520" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="basic-decoder.cdt.managedbuild.target.gnu.cross.exe.722685101" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.306747385;cdt.managedbuild.config.gnu.cross.exe.release.306747385.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.383499349;cdt.managedbuild.tool.gnu.cpp.compiler.input.2004590487">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1589516826;cdt.managedbuild.config.gnu.cross.exe.debug.1589516826.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.352779274;cdt.managedbuild.tool.gnu.cpp.compiler.input.2109487710">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1589516826;cdt.managedbuild.config.gnu.cross.exe.debug.1589516826.;cdt.managedbuild.tool.gnu.cross.c.compiler.1601060976;cdt.managedbuild.tool.gnu.c.compiler.input.118904161">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.306747385;cdt.managedbuild.config.gnu.cross.exe.release.306747385.;cdt.managedbuild.tool.gnu.cross.c.compiler.1157813077;cdt.managedbuild.tool.gnu.c.compiler.input.1324768096">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/basic-decoder"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/basic-decoder"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
@ -1,5 +0,0 @@
|
||||
# This code depends on make tool being used
|
||||
DEPFILES=$(wildcard $(addsuffix .d, ${OBJECTFILES}))
|
||||
ifneq (${DEPFILES},)
|
||||
include ${DEPFILES}
|
||||
endif
|
3
contrib/basic-decoder/.gitignore
vendored
3
contrib/basic-decoder/.gitignore
vendored
@ -1,3 +0,0 @@
|
||||
/Debug
|
||||
/Debug
|
||||
/Debug
|
@ -1,27 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>basic-decoder</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
@ -1,82 +0,0 @@
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include "DistortionScoreProducer.h"
|
||||
#include "TypeDef.h"
|
||||
#include "Sentence.h"
|
||||
#include "WordsBitmap.h"
|
||||
#include "Search/Hypothesis.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
DistortionScoreProducer::DistortionScoreProducer(const std::string &line)
|
||||
:StatefulFeatureFunction(line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
size_t DistortionScoreProducer::Evaluate(const Hypothesis& hypo,
|
||||
size_t prevState,
|
||||
Scores &scores) const
|
||||
{
|
||||
const WordsRange &range = hypo.GetRange();
|
||||
const WordsBitmap &coverage = hypo.GetCoverage();
|
||||
|
||||
const Hypothesis *prevHypo = hypo.GetPrevHypo();
|
||||
assert(prevHypo);
|
||||
const WordsRange &prevRange = prevHypo->GetRange();
|
||||
|
||||
SCORE score = ComputeDistortionScore(prevRange, range);
|
||||
scores.Add(*this, score);
|
||||
|
||||
size_t firstGap = coverage.GetFirstGapPos();
|
||||
|
||||
size_t ret = range.GetHash();
|
||||
boost::hash_combine(ret, firstGap);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
SCORE DistortionScoreProducer::ComputeDistortionScore(const WordsRange &prev, const WordsRange &curr) const
|
||||
{
|
||||
SCORE ret = (SCORE) prev.ComputeDistortionScore(curr);
|
||||
return ret;
|
||||
}
|
||||
|
||||
SCORE DistortionScoreProducer::CalculateDistortionScore_MooreAndQuick(const Hypothesis& hypo,
|
||||
const WordsRange &prev, const WordsRange &curr, int firstGap)
|
||||
{
|
||||
/* Pay distortion score as soon as possible, from Moore and Quirk MT Summit 2007
|
||||
Definitions:
|
||||
S : current source range
|
||||
S' : last translated source phrase range
|
||||
S'' : longest fully-translated initial segment
|
||||
*/
|
||||
|
||||
int prefixEndPos = (int)firstGap-1;
|
||||
if((int)firstGap==-1)
|
||||
prefixEndPos = -1;
|
||||
|
||||
// case1: S is adjacent to S'' => return 0
|
||||
if ((int) curr.startPos == prefixEndPos+1) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// case2: S is to the left of S' => return 2(length(S))
|
||||
if ((int) curr.endPos < (int) prev.endPos) {
|
||||
return (SCORE) -2*(int)curr.GetNumWordsCovered();
|
||||
}
|
||||
|
||||
// case3: S' is a subsequence of S'' => return 2(nbWordBetween(S,S'')+length(S))
|
||||
if ((int) prev.endPos <= prefixEndPos) {
|
||||
int z = (int)curr.startPos-prefixEndPos - 1;
|
||||
return (SCORE) -2*(z + (int)curr.GetNumWordsCovered());
|
||||
}
|
||||
|
||||
// case4: otherwise => return 2(nbWordBetween(S,S')+length(S))
|
||||
return (SCORE) -2*((int)curr.GetNumWordsBetween(prev) + (int)curr.GetNumWordsCovered());
|
||||
|
||||
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
#include "StatefulFeatureFunction.h"
|
||||
#include "WordsRange.h"
|
||||
|
||||
/** Calculates Distortion scores
|
||||
*/
|
||||
class DistortionScoreProducer : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
DistortionScoreProducer(const std::string &line);
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
size_t Evaluate(
|
||||
const Hypothesis& hypo,
|
||||
size_t prevState,
|
||||
Scores &scores) const;
|
||||
|
||||
protected:
|
||||
SCORE ComputeDistortionScore(const WordsRange &prev, const WordsRange &curr) const;
|
||||
|
||||
SCORE CalculateDistortionScore_MooreAndQuick(const Hypothesis& hypo,
|
||||
const WordsRange &prevRange,
|
||||
const WordsRange &currRange,
|
||||
int firstGap);
|
||||
|
||||
};
|
||||
|
@ -1,131 +0,0 @@
|
||||
|
||||
#include <set>
|
||||
#include "FeatureFunction.h"
|
||||
#include "Util.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "check.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
std::vector<FeatureFunction*> FeatureFunction::s_staticColl;
|
||||
size_t FeatureFunction::s_nextInd = 0;
|
||||
std::map<std::string, size_t> FeatureFunction::m_nameInd;
|
||||
|
||||
FeatureFunction::FeatureFunction(const std::string line)
|
||||
: m_numScores(1)
|
||||
{
|
||||
s_staticColl.push_back(this);
|
||||
|
||||
std::string featureName;
|
||||
|
||||
ParseLine(line, featureName);
|
||||
CreateName(featureName);
|
||||
|
||||
Register();
|
||||
cerr << m_name << "=" << m_startInd << "-" << (m_startInd+m_numScores-1) << endl;
|
||||
}
|
||||
|
||||
FeatureFunction::~FeatureFunction()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void FeatureFunction::ReadParameters()
|
||||
{
|
||||
while (!m_args.empty()) {
|
||||
const vector<string> &args = m_args[0];
|
||||
SetParameter(args[0], args[1]);
|
||||
|
||||
m_args.erase(m_args.begin());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void FeatureFunction::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void FeatureFunction::ParseLine(const std::string &line, std::string &featureName)
|
||||
{
|
||||
vector<string> toks;
|
||||
Tokenize(toks, line);
|
||||
|
||||
featureName = toks[0];
|
||||
|
||||
for (size_t i = 1; i < toks.size(); ++i) {
|
||||
vector<string> args = TokenizeFirstOnly(toks[i], "=");
|
||||
CHECK(args.size() == 2);
|
||||
|
||||
if (args[0] == "num-features") {
|
||||
m_numScores = Scan<size_t>(args[1]);
|
||||
} else if (args[0] == "name") {
|
||||
m_name = args[1];
|
||||
} else {
|
||||
m_args.push_back(args);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureFunction::CreateName(const std::string &featureName)
|
||||
{
|
||||
if (m_name.empty()) {
|
||||
std::map<std::string, size_t>::const_iterator iter;
|
||||
iter = m_nameInd.find(featureName);
|
||||
if (iter == m_nameInd.end()) {
|
||||
m_nameInd[featureName] = 0;
|
||||
m_name = featureName + SPrint(0);
|
||||
} else {
|
||||
size_t num = iter->second;
|
||||
m_name = featureName + SPrint(num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureFunction::Register()
|
||||
{
|
||||
m_startInd = s_nextInd;
|
||||
s_nextInd += m_numScores;
|
||||
}
|
||||
|
||||
|
||||
|
||||
FeatureFunction &FeatureFunction::FindFeatureFunction(const std::string& name)
|
||||
{
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
FeatureFunction &ff = *s_staticColl[i];
|
||||
if (ff.GetName() == name) {
|
||||
return ff;
|
||||
}
|
||||
}
|
||||
|
||||
throw "Unknown feature " + name;
|
||||
}
|
||||
|
||||
void FeatureFunction::Evaluate(const Phrase &source
|
||||
, TargetPhrase &targetPhrase
|
||||
, Scores &estimatedFutureScore)
|
||||
{
|
||||
Scores &scores = targetPhrase.GetScores();
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
FeatureFunction &ff = *s_staticColl[i];
|
||||
ff.Evaluate(source, targetPhrase, scores, estimatedFutureScore);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void FeatureFunction::Initialize(const Sentence &source)
|
||||
{
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
FeatureFunction &ff = *s_staticColl[i];
|
||||
ff.InitializeForInput(source);
|
||||
}
|
||||
}
|
||||
|
||||
void FeatureFunction::CleanUp(const Sentence &source)
|
||||
{
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
FeatureFunction &ff = *s_staticColl[i];
|
||||
ff.CleanUpAfterSentenceProcessing(source);
|
||||
}
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
class Phrase;
|
||||
class TargetPhrase;
|
||||
class Scores;
|
||||
class Sentence;
|
||||
|
||||
class FeatureFunction
|
||||
{
|
||||
public:
|
||||
static const std::vector<FeatureFunction*>& GetColl() {
|
||||
return s_staticColl;
|
||||
}
|
||||
static FeatureFunction &FindFeatureFunction(const std::string& name);
|
||||
static void Evaluate(const Phrase &source
|
||||
, TargetPhrase &targetPhrase
|
||||
, Scores &estimatedFutureScore);
|
||||
static void Initialize(const Sentence &source);
|
||||
static void CleanUp(const Sentence &source);
|
||||
|
||||
static size_t GetTotalNumScores() {
|
||||
return s_nextInd;
|
||||
}
|
||||
|
||||
FeatureFunction(const std::string line);
|
||||
virtual ~FeatureFunction();
|
||||
|
||||
virtual void Load()
|
||||
{}
|
||||
|
||||
virtual void InitializeForInput(const Sentence &source)
|
||||
{}
|
||||
|
||||
virtual void CleanUpAfterSentenceProcessing(const Sentence &source)
|
||||
{}
|
||||
|
||||
virtual void ReadParameters();
|
||||
virtual void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const = 0;
|
||||
|
||||
size_t GetStartInd() const {
|
||||
return m_startInd;
|
||||
}
|
||||
size_t GetNumScores() const {
|
||||
return m_numScores;
|
||||
}
|
||||
const std::string &GetName() const {
|
||||
return m_name;
|
||||
}
|
||||
|
||||
protected:
|
||||
static std::vector<FeatureFunction*> s_staticColl;
|
||||
static size_t s_nextInd;
|
||||
static std::map<std::string, size_t> m_nameInd;
|
||||
|
||||
std::vector<std::vector<std::string> > m_args;
|
||||
size_t m_numScores, m_startInd;
|
||||
std::string m_name;
|
||||
|
||||
void ParseLine(const std::string &line, std::string &featureName);
|
||||
void CreateName(const std::string &featureName);
|
||||
void Register();
|
||||
|
||||
};
|
||||
|
@ -1,144 +0,0 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "InternalLM.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "Util.h"
|
||||
#include "MyVocab.h"
|
||||
#include "Phrase.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
InternalLMNode *InternalLMNode::GetOrCreateNode(VOCABID vocabId)
|
||||
{
|
||||
Children::iterator iter;
|
||||
iter = m_children.find(vocabId);
|
||||
if (iter == m_children.end()) {
|
||||
return &m_children[vocabId];
|
||||
} else {
|
||||
InternalLMNode *node = &iter->second;
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
const InternalLMNode *InternalLMNode::Get(VOCABID vocabId) const
|
||||
{
|
||||
Children::const_iterator iter;
|
||||
iter = m_children.find(vocabId);
|
||||
if (iter == m_children.end()) {
|
||||
return NULL;
|
||||
} else {
|
||||
const InternalLMNode *node = &iter->second;
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
InternalLM::InternalLM(const std::string &line)
|
||||
:LM(line)
|
||||
,m_lastNode(NULL)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void InternalLM::Load()
|
||||
{
|
||||
// 1st, set prob for root
|
||||
m_node.score = 0;
|
||||
m_node.logBackOff = 0;
|
||||
|
||||
Moses::InputFileStream iniStrme(m_path);
|
||||
|
||||
vector<string> toks;
|
||||
size_t lineNum = 0;
|
||||
string line;
|
||||
while (getline(iniStrme, line)) {
|
||||
lineNum++;
|
||||
if (lineNum % 1000000 == 0) {
|
||||
cerr << lineNum << " " << flush;
|
||||
}
|
||||
|
||||
if (line.size() != 0 && line.substr(0,1) != "\\") {
|
||||
toks.clear();
|
||||
Tokenize(toks, line, "\t");
|
||||
|
||||
if (toks.size() >= 2) {
|
||||
// split unigram/bigram trigrams
|
||||
vector<string> wordVec;
|
||||
Tokenize(wordVec, toks[1], " ");
|
||||
|
||||
// create / traverse down tree
|
||||
InternalLMNode *node = &m_node;
|
||||
for (int pos = (int) wordVec.size() - 1 ; pos >= 0 ; pos--) {
|
||||
const string &wordStr = wordVec[pos];
|
||||
VOCABID vocabId = MyVocab::Instance().GetOrCreateId(wordStr);
|
||||
node = node->GetOrCreateNode(vocabId);
|
||||
assert(node);
|
||||
}
|
||||
assert(node);
|
||||
|
||||
SCORE score = TransformSRIScore(Scan<SCORE>(toks[0]));
|
||||
node->score = score;
|
||||
if (toks.size() == 3) {
|
||||
SCORE logBackOff = TransformSRIScore(Scan<SCORE>(toks[2]));
|
||||
node->logBackOff = logBackOff;
|
||||
} else {
|
||||
node->logBackOff = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t InternalLM::GetLastState() const
|
||||
{
|
||||
assert(m_lastNode);
|
||||
size_t ret = (size_t) m_lastNode;
|
||||
return ret;
|
||||
}
|
||||
|
||||
SCORE InternalLM::GetValue(const PhraseVec &phraseVec) const
|
||||
{
|
||||
m_lastNode = &GetNode(phraseVec);
|
||||
assert(m_lastNode);
|
||||
return m_lastNode->score;
|
||||
}
|
||||
|
||||
const InternalLMNode &InternalLM::GetNode(const PhraseVec &phraseVec) const
|
||||
{
|
||||
size_t size = phraseVec.size();
|
||||
|
||||
const InternalLMNode *node = &m_node;
|
||||
const InternalLMNode *prevNode = node;
|
||||
for (int pos = (int) size - 1 ; pos >= 0 ; pos--) {
|
||||
const Word &word = *phraseVec[pos];
|
||||
VOCABID vocabId = word.GetVocab();
|
||||
node = node->Get(vocabId);
|
||||
|
||||
if (node) {
|
||||
prevNode = node;
|
||||
} else {
|
||||
node = prevNode;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return *node;
|
||||
}
|
||||
|
||||
void InternalLM::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "path") {
|
||||
m_path = value;
|
||||
} else {
|
||||
LM::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -1,41 +0,0 @@
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "LM.h"
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
class InternalLMNode
|
||||
{
|
||||
public:
|
||||
typedef boost::unordered_map<VOCABID, InternalLMNode> Children;
|
||||
|
||||
InternalLMNode *GetOrCreateNode(VOCABID vocabId);
|
||||
const InternalLMNode *Get(VOCABID vocabId) const;
|
||||
|
||||
SCORE score, logBackOff;
|
||||
protected:
|
||||
Children m_children;
|
||||
};
|
||||
|
||||
class InternalLM : public LM
|
||||
{
|
||||
public:
|
||||
InternalLM(const std::string &line);
|
||||
void Load();
|
||||
virtual size_t GetLastState() const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
InternalLMNode m_node;
|
||||
std::string m_path;
|
||||
const InternalLMNode &GetNode(const PhraseVec &phraseVec) const;
|
||||
|
||||
mutable const InternalLMNode *m_lastNode;
|
||||
|
||||
virtual SCORE GetValue(const PhraseVec &phraseVec) const;
|
||||
};
|
||||
|
||||
}
|
@ -1,164 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include "LM.h"
|
||||
#include "Util.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "MyVocab.h"
|
||||
#include "Search/Hypothesis.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
LM::LM(const std::string &line)
|
||||
:StatefulFeatureFunction(line)
|
||||
{
|
||||
m_bos.CreateFromString("<s>");
|
||||
m_eos.CreateFromString("</s>");
|
||||
}
|
||||
|
||||
void LM::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const
|
||||
{
|
||||
SCORE all = 0, ngram = 0;
|
||||
|
||||
PhraseVec phraseVec;
|
||||
phraseVec.reserve(m_order);
|
||||
for (size_t pos = 0; pos < targetPhrase.GetSize(); ++pos) {
|
||||
const Word &word = targetPhrase.GetWord(pos);
|
||||
ShiftOrPush(phraseVec, word);
|
||||
SCORE score = GetValueCache(phraseVec);
|
||||
|
||||
all += score;
|
||||
if (phraseVec.size() == m_order) {
|
||||
ngram += score;
|
||||
}
|
||||
}
|
||||
|
||||
SCORE estimated = all - ngram;
|
||||
scores.Add(*this, ngram);
|
||||
estimatedFutureScore.Add(*this, estimated);
|
||||
}
|
||||
|
||||
size_t LM::Evaluate(
|
||||
const Hypothesis& hypo,
|
||||
size_t prevState,
|
||||
Scores &scores) const
|
||||
{
|
||||
if (m_order <= 1) {
|
||||
return 0; // not sure if returning NULL is correct
|
||||
}
|
||||
|
||||
if (hypo.targetPhrase.GetSize() == 0) {
|
||||
return 0; // not sure if returning NULL is correct
|
||||
}
|
||||
|
||||
PhraseVec m_phraseVec(m_order);
|
||||
|
||||
const size_t currEndPos = hypo.targetRange.endPos;
|
||||
const size_t startPos = hypo.targetRange.startPos;
|
||||
|
||||
size_t index = 0;
|
||||
for (int currPos = (int) startPos - (int) m_order + 1 ; currPos <= (int) startPos ; currPos++) {
|
||||
if (currPos >= 0)
|
||||
m_phraseVec[index++] = &hypo.GetWord(currPos);
|
||||
else {
|
||||
m_phraseVec[index++] = &m_bos;
|
||||
}
|
||||
}
|
||||
|
||||
SCORE lmScore = GetValueCache(m_phraseVec);
|
||||
|
||||
// main loop
|
||||
size_t endPos = std::min(startPos + m_order - 2
|
||||
, currEndPos);
|
||||
for (size_t currPos = startPos + 1 ; currPos <= endPos ; currPos++) {
|
||||
// shift all args down 1 place
|
||||
for (size_t i = 0 ; i < m_order - 1 ; i++)
|
||||
m_phraseVec[i] = m_phraseVec[i + 1];
|
||||
|
||||
// add last factor
|
||||
m_phraseVec.back() = &hypo.GetWord(currPos);
|
||||
|
||||
lmScore += GetValueCache(m_phraseVec);
|
||||
}
|
||||
|
||||
// end of sentence
|
||||
if (hypo.GetCoverage().IsComplete()) {
|
||||
const size_t size = hypo.GetSize();
|
||||
m_phraseVec.back() = &m_eos;
|
||||
|
||||
for (size_t i = 0 ; i < m_order - 1 ; i ++) {
|
||||
int currPos = (int)(size - m_order + i + 1);
|
||||
if (currPos < 0)
|
||||
m_phraseVec[i] = &m_bos;
|
||||
else
|
||||
m_phraseVec[i] = &hypo.GetWord((size_t)currPos);
|
||||
}
|
||||
lmScore += GetValueCache(m_phraseVec);
|
||||
} else {
|
||||
if (endPos < currEndPos) {
|
||||
//need to get the LM state (otherwise the last LM state is fine)
|
||||
for (size_t currPos = endPos+1; currPos <= currEndPos; currPos++) {
|
||||
for (size_t i = 0 ; i < m_order - 1 ; i++)
|
||||
m_phraseVec[i] = m_phraseVec[i + 1];
|
||||
m_phraseVec.back() = &hypo.GetWord(currPos);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t state = GetLastState();
|
||||
return state;
|
||||
}
|
||||
|
||||
SCORE LM::GetValueCache(const PhraseVec &phraseVec) const
|
||||
{
|
||||
SCORE score = GetValue(phraseVec);
|
||||
return score;
|
||||
|
||||
size_t hash = 0;
|
||||
for (size_t i = 0; i < phraseVec.size(); ++i) {
|
||||
VOCABID vocabId = phraseVec[i]->GetVocab();
|
||||
boost::hash_combine(hash, vocabId);
|
||||
}
|
||||
|
||||
Cache::const_iterator iter;
|
||||
iter = m_cache.find(hash);
|
||||
if (iter != m_cache.end()) {
|
||||
return iter->second;
|
||||
}
|
||||
else {
|
||||
SCORE score = GetValue(phraseVec);
|
||||
m_cache[hash] = score;
|
||||
return score;
|
||||
}
|
||||
}
|
||||
|
||||
void LM::ShiftOrPush(PhraseVec &phraseVec, const Word &word) const
|
||||
{
|
||||
if (phraseVec.size() < m_order) {
|
||||
phraseVec.push_back(&word);
|
||||
} else {
|
||||
// shift
|
||||
for (size_t currNGramOrder = 0 ; currNGramOrder < m_order - 1 ; currNGramOrder++) {
|
||||
phraseVec[currNGramOrder] = phraseVec[currNGramOrder + 1];
|
||||
}
|
||||
phraseVec[m_order - 1] = &word;
|
||||
}
|
||||
}
|
||||
|
||||
void LM::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "order") {
|
||||
m_order = Scan<size_t>(value);
|
||||
} else {
|
||||
StatefulFeatureFunction::SetParameter(key, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,43 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "FF/StatefulFeatureFunction.h"
|
||||
#include "TypeDef.h"
|
||||
#include "Phrase.h"
|
||||
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
class LM : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
LM(const std::string &line);
|
||||
|
||||
virtual size_t GetLastState() const = 0;
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const;
|
||||
|
||||
virtual size_t Evaluate(
|
||||
const Hypothesis& hypo,
|
||||
size_t prevState,
|
||||
Scores &scores) const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
size_t m_order;
|
||||
Word m_bos, m_eos;
|
||||
|
||||
typedef boost::unordered_map<size_t, SCORE> Cache;
|
||||
mutable Cache m_cache;
|
||||
|
||||
virtual SCORE GetValue(const PhraseVec &phraseVec) const = 0;
|
||||
SCORE GetValueCache(const PhraseVec &phraseVec) const;
|
||||
void ShiftOrPush(PhraseVec &phraseVec, const Word &word) const;
|
||||
};
|
||||
|
||||
}
|
@ -1,139 +0,0 @@
|
||||
|
||||
#include <Ngram.h>
|
||||
#include <Vocab.h>
|
||||
#include "SRILM.h"
|
||||
#include "MyVocab.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define MAX_NGRAM_SIZE 10
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
SRILM::SRILM(const string &line)
|
||||
:LM(line)
|
||||
{
|
||||
ReadParameters();
|
||||
|
||||
}
|
||||
|
||||
void SRILM::Load()
|
||||
{
|
||||
m_srilmVocab = new Vocab();
|
||||
m_srilmModel = new Ngram(*m_srilmVocab, m_order);
|
||||
|
||||
m_srilmModel->skipOOVs() = false;
|
||||
|
||||
File file(m_path.c_str(), "r" );
|
||||
m_srilmModel->read(file);
|
||||
|
||||
CreateVocab();
|
||||
m_unknownId = m_srilmVocab->unkIndex();
|
||||
|
||||
}
|
||||
|
||||
void SRILM::CreateVocab()
|
||||
{
|
||||
MyVocab &factorCollection = MyVocab::Instance();
|
||||
|
||||
std::map<size_t, VocabIndex> lmIdMap;
|
||||
size_t maxFactorId = 0; // to create lookup vector later on
|
||||
|
||||
VocabString str;
|
||||
VocabIter iter(*m_srilmVocab);
|
||||
while ( (str = iter.next()) != NULL) {
|
||||
VocabIndex lmId = GetLmID(str);
|
||||
VOCABID factorId = factorCollection.GetOrCreateId(str);
|
||||
lmIdMap[factorId] = lmId;
|
||||
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
|
||||
}
|
||||
|
||||
VOCABID factorId;
|
||||
factorId = factorCollection.GetOrCreateId("<s>");
|
||||
lmIdMap[factorId] = GetLmID("<s>");
|
||||
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
|
||||
|
||||
factorId = factorCollection.GetOrCreateId("</s>");
|
||||
lmIdMap[factorId] = GetLmID("</s>");
|
||||
maxFactorId = (factorId > maxFactorId) ? factorId : maxFactorId;
|
||||
|
||||
// add to lookup vector in object
|
||||
m_lmIdLookup.resize(maxFactorId+1);
|
||||
|
||||
fill(m_lmIdLookup.begin(), m_lmIdLookup.end(), m_unknownId);
|
||||
|
||||
map<size_t, VocabIndex>::iterator iterMap;
|
||||
for (iterMap = lmIdMap.begin() ; iterMap != lmIdMap.end() ; ++iterMap) {
|
||||
m_lmIdLookup[iterMap->first] = iterMap->second;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
VocabIndex SRILM::GetLmID( const std::string &str ) const
|
||||
{
|
||||
return m_srilmVocab->getIndex( str.c_str(), m_unknownId );
|
||||
}
|
||||
|
||||
VocabIndex SRILM::GetLmID(VOCABID vocabId) const
|
||||
{
|
||||
return ( vocabId >= m_lmIdLookup.size()) ? m_unknownId : m_lmIdLookup[vocabId];
|
||||
}
|
||||
|
||||
|
||||
SCORE SRILM::GetValue(const PhraseVec &phraseVec) const
|
||||
{
|
||||
size_t count = phraseVec.size();
|
||||
if (count <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// set up context
|
||||
VocabIndex context[MAX_NGRAM_SIZE];
|
||||
for (size_t i = 0 ; i < count - 1 ; i++) {
|
||||
const Word &word = *phraseVec[count-2-i];
|
||||
VOCABID vocabId = word.GetVocab();
|
||||
|
||||
context[i] = GetLmID(vocabId);
|
||||
}
|
||||
context[count-1] = Vocab_None;
|
||||
|
||||
assert(phraseVec[count-1] != NULL);
|
||||
// call sri lm fn
|
||||
VocabIndex lmId= GetLmID(phraseVec[count-1]->GetVocab());
|
||||
float ret = GetValue(lmId, context);
|
||||
|
||||
for (int i = count - 2 ; i >= 0 ; i--)
|
||||
context[i+1] = context[i];
|
||||
context[0] = lmId;
|
||||
unsigned len;
|
||||
m_lastState = m_srilmModel->contextID(context, len);
|
||||
len++;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
float SRILM::GetValue(VocabIndex wordId, VocabIndex *context) const
|
||||
{
|
||||
float p = m_srilmModel->wordProb( wordId, context );
|
||||
return FloorScore(TransformSRIScore(p)); // log10->log
|
||||
}
|
||||
|
||||
|
||||
size_t SRILM::GetLastState() const
|
||||
{
|
||||
return (size_t)m_lastState;
|
||||
}
|
||||
|
||||
void SRILM::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "path") {
|
||||
m_path = value;
|
||||
} else {
|
||||
LM::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,38 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "LM.h"
|
||||
#include <Vocab.h>
|
||||
|
||||
class Ngram;
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
class SRILM : public LM
|
||||
{
|
||||
public:
|
||||
SRILM(const std::string &line);
|
||||
void Load();
|
||||
|
||||
virtual size_t GetLastState() const;
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
protected:
|
||||
Vocab *m_srilmVocab;
|
||||
Ngram *m_srilmModel;
|
||||
std::string m_path;
|
||||
VocabIndex m_unknownId;
|
||||
std::vector<VocabIndex> m_lmIdLookup;
|
||||
mutable void *m_lastState;
|
||||
|
||||
void CreateVocab();
|
||||
VocabIndex GetLmID( const std::string &str ) const;
|
||||
VocabIndex GetLmID(VOCABID vocabId) const;
|
||||
float GetValue(VocabIndex wordId, VocabIndex *context) const;
|
||||
|
||||
virtual SCORE GetValue(const PhraseVec &phraseVec) const;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
|
||||
#include "PhrasePenalty.h"
|
||||
#include "Scores.h"
|
||||
|
||||
PhrasePenalty::PhrasePenalty(const std::string &line)
|
||||
:StatelessFeatureFunction(line)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
PhrasePenalty::~PhrasePenalty()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void PhrasePenalty::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const
|
||||
{
|
||||
scores.Add(*this, 1);
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
class PhrasePenalty : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
PhrasePenalty(const std::string &line);
|
||||
virtual ~PhrasePenalty();
|
||||
|
||||
void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const;
|
||||
|
||||
};
|
||||
|
@ -1,42 +0,0 @@
|
||||
#include <cassert>
|
||||
#include "StatefulFeatureFunction.h"
|
||||
#include "Search/Hypothesis.h"
|
||||
|
||||
std::vector<StatefulFeatureFunction*> StatefulFeatureFunction::s_staticColl;
|
||||
|
||||
StatefulFeatureFunction::StatefulFeatureFunction(const std::string line)
|
||||
:FeatureFunction(line)
|
||||
{
|
||||
s_staticColl.push_back(this);
|
||||
}
|
||||
|
||||
StatefulFeatureFunction::~StatefulFeatureFunction()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////
|
||||
|
||||
void StatefulFeatureFunction::Evaluate(Hypothesis& hypo)
|
||||
{
|
||||
const Hypothesis &prevHypo = *hypo.GetPrevHypo();
|
||||
Scores &scores = hypo.GetScores();
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
const StatefulFeatureFunction &ff = *s_staticColl[i];
|
||||
size_t prevFFState = prevHypo.GetState(i);
|
||||
|
||||
size_t ffState = ff.Evaluate(hypo, prevFFState, scores);
|
||||
assert(ffState);
|
||||
hypo.SetState(i, ffState);
|
||||
}
|
||||
}
|
||||
|
||||
void StatefulFeatureFunction::EvaluateEmptyHypo(const Sentence &input, Hypothesis& hypo)
|
||||
{
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
const StatefulFeatureFunction &ff = *s_staticColl[i];
|
||||
size_t ffState = ff.EmptyHypo(input, hypo);
|
||||
hypo.SetState(i, ffState);
|
||||
}
|
||||
}
|
||||
|
@ -1,36 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "FeatureFunction.h"
|
||||
|
||||
class FFState;
|
||||
class Hypothesis;
|
||||
class Scores;
|
||||
|
||||
class StatefulFeatureFunction : public FeatureFunction
|
||||
{
|
||||
public:
|
||||
static const std::vector<StatefulFeatureFunction*>& GetColl() {
|
||||
return s_staticColl;
|
||||
}
|
||||
static void Evaluate(Hypothesis& hypo);
|
||||
static void EvaluateEmptyHypo(const Sentence &input, Hypothesis& hypo);
|
||||
|
||||
StatefulFeatureFunction(const std::string line);
|
||||
virtual ~StatefulFeatureFunction();
|
||||
|
||||
virtual size_t Evaluate(
|
||||
const Hypothesis& hypo,
|
||||
size_t prevState,
|
||||
Scores &scores) const = 0;
|
||||
virtual size_t EmptyHypo(
|
||||
const Sentence &input,
|
||||
Hypothesis& hypo) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
static std::vector<StatefulFeatureFunction*> s_staticColl;
|
||||
|
||||
};
|
||||
|
@ -1,16 +0,0 @@
|
||||
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
std::vector<StatelessFeatureFunction*> StatelessFeatureFunction::s_staticColl;
|
||||
|
||||
StatelessFeatureFunction::StatelessFeatureFunction(const std::string line)
|
||||
:FeatureFunction(line)
|
||||
{
|
||||
s_staticColl.push_back(this);
|
||||
}
|
||||
|
||||
StatelessFeatureFunction::~StatelessFeatureFunction()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
@ -1,20 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "FeatureFunction.h"
|
||||
|
||||
class StatelessFeatureFunction : public FeatureFunction
|
||||
{
|
||||
public:
|
||||
static const std::vector<StatelessFeatureFunction*>& GetColl() {
|
||||
return s_staticColl;
|
||||
}
|
||||
|
||||
StatelessFeatureFunction(const std::string line);
|
||||
virtual ~StatelessFeatureFunction();
|
||||
|
||||
protected:
|
||||
static std::vector<StatelessFeatureFunction*> s_staticColl;
|
||||
|
||||
};
|
||||
|
@ -1,43 +0,0 @@
|
||||
|
||||
#include "Node.h"
|
||||
#include "Phrase.h"
|
||||
|
||||
Node::Node()
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Node::~Node()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
Node &Node::GetOrCreate(const Phrase &source, size_t pos)
|
||||
{
|
||||
if (pos == source.GetSize()) {
|
||||
return *this;
|
||||
}
|
||||
|
||||
const Word &word = source.GetWord(pos);
|
||||
Node &child = m_children[word];
|
||||
return child.GetOrCreate(source, pos + 1);
|
||||
}
|
||||
|
||||
const Node *Node::Get(const Word &word) const
|
||||
{
|
||||
Children::const_iterator iter;
|
||||
iter = m_children.find(word);
|
||||
if (iter == m_children.end()) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// found child node
|
||||
const Node &child = iter->second;
|
||||
return &child;
|
||||
}
|
||||
|
||||
void Node::AddTarget(TargetPhrase *target)
|
||||
{
|
||||
m_tpColl.Add(target);
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "Word.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "TargetPhrases.h"
|
||||
|
||||
class Phrase;
|
||||
|
||||
class Node
|
||||
{
|
||||
public:
|
||||
typedef boost::unordered_map<Word, Node, WordHasher> Children;
|
||||
|
||||
Node();
|
||||
virtual ~Node();
|
||||
|
||||
Node &GetOrCreate(const Phrase &source, size_t pos);
|
||||
const Node *Get(const Word &word) const;
|
||||
|
||||
void AddTarget(TargetPhrase *target);
|
||||
const TargetPhrases &GetTargetPhrases() const {
|
||||
return m_tpColl;
|
||||
}
|
||||
|
||||
protected:
|
||||
Children m_children;
|
||||
TargetPhrases m_tpColl;
|
||||
};
|
||||
|
||||
|
@ -1,26 +0,0 @@
|
||||
/*
|
||||
* PhraseTable.cpp
|
||||
*
|
||||
* Created on: 5 Oct 2013
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "PhraseTable.h"
|
||||
#include "InputPath.h"
|
||||
|
||||
std::vector<PhraseTable*> PhraseTable::s_staticColl;
|
||||
size_t PhraseTable::s_ptId = 0;
|
||||
|
||||
PhraseTable::PhraseTable(const std::string line)
|
||||
:StatelessFeatureFunction(line)
|
||||
,m_ptId(s_ptId++)
|
||||
{
|
||||
s_staticColl.push_back(this);
|
||||
|
||||
}
|
||||
|
||||
PhraseTable::~PhraseTable()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
* PhraseTable.h
|
||||
*
|
||||
* Created on: 5 Oct 2013
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "FF/StatelessFeatureFunction.h"
|
||||
|
||||
class InputPath;
|
||||
|
||||
class PhraseTable :public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
static const std::vector<PhraseTable*>& GetColl() {
|
||||
return s_staticColl;
|
||||
}
|
||||
|
||||
PhraseTable(const std::string line);
|
||||
virtual ~PhraseTable();
|
||||
|
||||
virtual void Lookup(const std::vector<InputPath*> &inputPathQueue) = 0;
|
||||
protected:
|
||||
static std::vector<PhraseTable*> s_staticColl;
|
||||
static size_t s_ptId;
|
||||
|
||||
size_t m_ptId;
|
||||
|
||||
};
|
||||
|
@ -1,110 +0,0 @@
|
||||
/*
|
||||
* PhraseTableMemory.cpp
|
||||
*
|
||||
* Created on: 5 Oct 2013
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "PhraseTableMemory.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "Util.h"
|
||||
#include "Phrase.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "InputPath.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
PhraseTableMemory::PhraseTableMemory(const std::string &line)
|
||||
:PhraseTable(line)
|
||||
,m_tableLimit(20)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
PhraseTableMemory::~PhraseTableMemory()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void PhraseTableMemory::Load()
|
||||
{
|
||||
Scores *estimatedFutureScore = new Scores();
|
||||
|
||||
Moses::InputFileStream iniStrme(m_path);
|
||||
|
||||
vector<string> toks;
|
||||
size_t lineNum = 0;
|
||||
string line;
|
||||
while (getline(iniStrme, line)) {
|
||||
if (lineNum % 10000 == 0) {
|
||||
cerr << lineNum << " " << flush;
|
||||
}
|
||||
toks.clear();
|
||||
TokenizeMultiCharSeparator(toks, line, "|||");
|
||||
|
||||
Phrase *source = Phrase::CreateFromString(toks[0]);
|
||||
TargetPhrase *target = TargetPhrase::CreateFromString(*this, toks[1], toks[2], true);
|
||||
FeatureFunction::Evaluate(*source, *target, *estimatedFutureScore);
|
||||
|
||||
//cerr << target->Debug() << endl;
|
||||
|
||||
Node &node = m_root.GetOrCreate(*source, 0);
|
||||
node.AddTarget(target);
|
||||
|
||||
++lineNum;
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseTableMemory::SetParameter(const std::string& key, const std::string& value)
|
||||
{
|
||||
if (key == "path") {
|
||||
m_path = value;
|
||||
} else if (key == "table-limit") {
|
||||
m_tableLimit = Scan<size_t>(value);
|
||||
} else {
|
||||
PhraseTable::SetParameter(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
void PhraseTableMemory::Lookup(const std::vector<InputPath*> &inputPathQueue)
|
||||
{
|
||||
for (size_t i = 0; i < inputPathQueue.size(); ++i) {
|
||||
InputPath &path = *inputPathQueue[i];
|
||||
const InputPath *prevPath = path.GetPrevPath();
|
||||
|
||||
//cerr << path.GetPhrase().Debug() << endl;
|
||||
|
||||
// which node to start the lookup
|
||||
const Node *node;
|
||||
if (prevPath) {
|
||||
// get node from previous lookup.
|
||||
// May be null --> don't lookup any further
|
||||
node = (const Node *) prevPath->GetPtLookup(m_ptId).ptNode;
|
||||
} else {
|
||||
// 1st lookup. Start from root
|
||||
node = &m_root;
|
||||
}
|
||||
|
||||
// where to store the info for this lookup
|
||||
PhraseTableLookup &ptLookup = path.GetPtLookup(m_ptId);
|
||||
if (node) {
|
||||
// LOOKUP
|
||||
// lookup the LAST word only
|
||||
const Phrase &source = path.GetPhrase();
|
||||
const Word &lastWord = source.Back();
|
||||
|
||||
node = node->Get(lastWord);
|
||||
}
|
||||
|
||||
if (node) {
|
||||
// found something
|
||||
const TargetPhrases &tpColl = node->GetTargetPhrases();
|
||||
ptLookup.Set(&tpColl, node);
|
||||
} else {
|
||||
ptLookup.Set(NULL, NULL);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
* PhraseTableMemory.h
|
||||
*
|
||||
* Created on: 5 Oct 2013
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "PhraseTable.h"
|
||||
#include "Memory/Node.h"
|
||||
|
||||
class PhraseTableMemory: public PhraseTable
|
||||
{
|
||||
public:
|
||||
PhraseTableMemory(const std::string &line);
|
||||
virtual ~PhraseTableMemory();
|
||||
|
||||
void Load();
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
void Lookup(const std::vector<InputPath*> &inputPathQueue);
|
||||
protected:
|
||||
std::string m_path;
|
||||
size_t m_tableLimit;
|
||||
|
||||
Node m_root;
|
||||
|
||||
};
|
||||
|
@ -1,70 +0,0 @@
|
||||
/*
|
||||
* UnknownWordPenalty.cpp
|
||||
*
|
||||
* Created on: 5 Oct 2013
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "UnknownWordPenalty.h"
|
||||
#include "InputPath.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "TargetPhrases.h"
|
||||
#include "WordsRange.h"
|
||||
#include "Util.h"
|
||||
#include "TypeDef.h"
|
||||
#include "FF/FeatureFunction.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
UnknownWordPenalty::UnknownWordPenalty(const std::string line)
|
||||
:PhraseTable(line)
|
||||
{
|
||||
ReadParameters();
|
||||
|
||||
}
|
||||
|
||||
UnknownWordPenalty::~UnknownWordPenalty()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void UnknownWordPenalty::Lookup(const std::vector<InputPath*> &inputPathQueue)
|
||||
{
|
||||
Scores *estimatedFutureScore = new Scores();
|
||||
|
||||
for (size_t i = 0; i < inputPathQueue.size(); ++i) {
|
||||
InputPath &path = *inputPathQueue[i];
|
||||
PhraseTableLookup &ptLookup = path.GetPtLookup(m_ptId);
|
||||
|
||||
const Phrase &source = path.GetPhrase();
|
||||
if (source.GetSize() == 1) {
|
||||
const Word &sourceWord = source.GetWord(0);
|
||||
string str = sourceWord.ToString();
|
||||
str = "UNK:" + str + ":UNK";
|
||||
|
||||
Word targetWord;
|
||||
targetWord.CreateFromString(str);
|
||||
|
||||
TargetPhrase *tp = new TargetPhrase(1);
|
||||
tp->Set(0, targetWord);
|
||||
tp->GetScores().Add(*this, LOWEST_SCORE);
|
||||
|
||||
FeatureFunction::Evaluate(source, *tp, *estimatedFutureScore);
|
||||
|
||||
TargetPhrases *tpColl = new TargetPhrases();
|
||||
m_targetPhrases.push_back(tpColl);
|
||||
tpColl->Add(tp);
|
||||
|
||||
ptLookup.Set(tpColl, NULL);
|
||||
} else {
|
||||
ptLookup.Set(NULL, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void UnknownWordPenalty::CleanUpAfterSentenceProcessing(const Sentence &source)
|
||||
{
|
||||
m_targetPhrases.clear();
|
||||
}
|
||||
|
@ -1,32 +0,0 @@
|
||||
/*
|
||||
* UnknownWordPenalty.h
|
||||
*
|
||||
* Created on: 5 Oct 2013
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "PhraseTable.h"
|
||||
|
||||
class TargetPhrases;
|
||||
|
||||
class UnknownWordPenalty: public PhraseTable
|
||||
{
|
||||
public:
|
||||
UnknownWordPenalty(const std::string line);
|
||||
virtual ~UnknownWordPenalty();
|
||||
|
||||
void CleanUpAfterSentenceProcessing(const Sentence &source);
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const {
|
||||
}
|
||||
|
||||
void Lookup(const std::vector<InputPath*> &inputPathQueue);
|
||||
|
||||
protected:
|
||||
std::vector<TargetPhrases*> m_targetPhrases;
|
||||
};
|
||||
|
@ -1,17 +0,0 @@
|
||||
#include "WordPenaltyProducer.h"
|
||||
#include "TargetPhrase.h"
|
||||
|
||||
WordPenaltyProducer::WordPenaltyProducer(const std::string &line)
|
||||
:StatelessFeatureFunction(line)
|
||||
{
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
void WordPenaltyProducer::Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const
|
||||
{
|
||||
SCORE numWords = - (SCORE) targetPhrase.GetSize();
|
||||
scores.Add(*this, numWords);
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "StatelessFeatureFunction.h"
|
||||
|
||||
|
||||
class WordPenaltyProducer : public StatelessFeatureFunction
|
||||
{
|
||||
public:
|
||||
WordPenaltyProducer(const std::string &line);
|
||||
|
||||
virtual void Evaluate(const Phrase &source
|
||||
, const TargetPhrase &targetPhrase
|
||||
, Scores &scores
|
||||
, Scores &estimatedFutureScore) const;
|
||||
|
||||
|
||||
};
|
||||
|
||||
|
@ -1,172 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include "Global.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "Util.h"
|
||||
#include "check.h"
|
||||
|
||||
#include "FF/FeatureFunction.h"
|
||||
#include "FF/DistortionScoreProducer.h"
|
||||
#include "FF/WordPenaltyProducer.h"
|
||||
#include "FF/PhrasePenalty.h"
|
||||
#include "FF/TranslationModel/PhraseTableMemory.h"
|
||||
#include "FF/TranslationModel/UnknownWordPenalty.h"
|
||||
#include "FF/LM/InternalLM.h"
|
||||
#include "FF/LM/SRILM.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Global Global::s_instance;
|
||||
|
||||
Global::Global()
|
||||
{}
|
||||
|
||||
Global::~Global()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Global::Init(int argc, char** argv)
|
||||
{
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
string arg = argv[i];
|
||||
if (arg == "-f") {
|
||||
m_iniPath = argv[++i];
|
||||
} else if (arg == "-i") {
|
||||
m_inputPath = argv[++i];
|
||||
}
|
||||
}
|
||||
|
||||
// input file
|
||||
if (m_inputPath.empty()) {
|
||||
m_inputStrme = &cin;
|
||||
} else {
|
||||
m_inputStrme = new Moses::InputFileStream(m_inputPath);
|
||||
}
|
||||
|
||||
// read ini file
|
||||
Moses::InputFileStream iniStrme(m_iniPath);
|
||||
|
||||
ParamList *paramList = NULL;
|
||||
string line;
|
||||
while (getline(iniStrme, line)) {
|
||||
line = Trim(line);
|
||||
if (line.find("[") == 0) {
|
||||
paramList = &m_params[line];
|
||||
} else if (line.find("#") == 0 || line.empty()) {
|
||||
// do nothing
|
||||
} else {
|
||||
paramList->push_back(line);
|
||||
}
|
||||
}
|
||||
|
||||
timer.check("InitParams");
|
||||
InitParams();
|
||||
timer.check("InitFF");
|
||||
InitFF();
|
||||
timer.check("InitWeight");
|
||||
InitWeight();
|
||||
timer.check("Start Load");
|
||||
Load();
|
||||
timer.check("Finished Load");
|
||||
|
||||
}
|
||||
|
||||
bool Global::ParamExist(const std::string &key) const
|
||||
{
|
||||
Params::const_iterator iter;
|
||||
iter = m_params.find(key);
|
||||
bool ret = (iter != m_params.end());
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Global::InitParams()
|
||||
{
|
||||
if (ParamExist("[stack]")) {
|
||||
stackSize = Scan<size_t>(m_params["[stack]"][0]);
|
||||
} else {
|
||||
stackSize = 200;
|
||||
}
|
||||
|
||||
if (ParamExist("[distortion-limit]")) {
|
||||
maxDistortion = Scan<int>(m_params["[distortion-limit]"][0]);
|
||||
} else {
|
||||
maxDistortion = 6;
|
||||
}
|
||||
}
|
||||
|
||||
void Global::InitFF()
|
||||
{
|
||||
ParamList &list = m_params["[feature]"];
|
||||
|
||||
for (size_t i = 0; i < list.size(); ++i) {
|
||||
string &line = list[i];
|
||||
cerr << "line=" << line << endl;
|
||||
|
||||
FeatureFunction *ff = NULL;
|
||||
if (line.find("Distortion") == 0) {
|
||||
ff = new DistortionScoreProducer(line);
|
||||
} else if (line.find("WordPenalty") == 0) {
|
||||
ff = new WordPenaltyProducer(line);
|
||||
} else if (line.find("PhraseDictionaryMemory") == 0) {
|
||||
ff = new PhraseTableMemory(line);
|
||||
} else if (line.find("UnknownWordPenalty") == 0) {
|
||||
ff = new UnknownWordPenalty(line);
|
||||
} else if (line.find("PhrasePenalty") == 0) {
|
||||
ff = new PhrasePenalty(line);
|
||||
} else if (line.find("InternalLM") == 0) {
|
||||
ff = new FastMoses::InternalLM(line);
|
||||
} else if (line.find("SRILM") == 0) {
|
||||
ff = new FastMoses::SRILM(line);
|
||||
} else {
|
||||
cerr << "Unknown FF " << line << endl;
|
||||
abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Global::InitWeight()
|
||||
{
|
||||
weights.SetNumScores(FeatureFunction::GetTotalNumScores());
|
||||
ParamList &list = m_params["[weight]"];
|
||||
|
||||
for (size_t i = 0; i < list.size(); ++i) {
|
||||
string &line = list[i];
|
||||
cerr << "line=" << line << endl;
|
||||
|
||||
vector<string> toks = TokenizeFirstOnly(line, "=");
|
||||
CHECK(toks.size() == 2);
|
||||
|
||||
FeatureFunction &ff = FeatureFunction::FindFeatureFunction(toks[0]);
|
||||
|
||||
vector<SCORE> featureWeights;
|
||||
Tokenize<SCORE>(featureWeights, toks[1]);
|
||||
CHECK(ff.GetNumScores() == featureWeights.size());
|
||||
weights.SetWeights(ff, featureWeights);
|
||||
}
|
||||
}
|
||||
|
||||
void Global::Load()
|
||||
{
|
||||
std::vector<PhraseTable*> pts;
|
||||
|
||||
cerr << "Loading" << endl;
|
||||
const std::vector<FeatureFunction*> &ffs = FeatureFunction::GetColl();
|
||||
for (size_t i = 0; i < ffs.size(); ++i) {
|
||||
FeatureFunction *ff = ffs[i];
|
||||
PhraseTable *pt = dynamic_cast<PhraseTable*>(ff);
|
||||
if (pt) {
|
||||
// load pt after other ff
|
||||
pts.push_back(pt);
|
||||
} else {
|
||||
cerr << ff->GetName() << endl;
|
||||
ff->Load();
|
||||
}
|
||||
}
|
||||
|
||||
// load pt
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
cerr << pts[i]->GetName() << endl;
|
||||
pts[i]->Load();
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <fstream>
|
||||
#include "Weights.h"
|
||||
#include "Timer.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
class InputFileStream;
|
||||
}
|
||||
|
||||
class FeatureFunction;
|
||||
|
||||
class Global
|
||||
{
|
||||
public:
|
||||
static const Global &Instance() {
|
||||
return s_instance;
|
||||
}
|
||||
static Global &InstanceNonConst() {
|
||||
return s_instance;
|
||||
}
|
||||
|
||||
Global();
|
||||
virtual ~Global();
|
||||
void Init(int argc, char** argv);
|
||||
|
||||
std::istream &GetInputStream() const {
|
||||
return *m_inputStrme;
|
||||
}
|
||||
|
||||
size_t stackSize;
|
||||
int maxDistortion;
|
||||
|
||||
Weights weights;
|
||||
mutable Moses::Timer timer;
|
||||
|
||||
protected:
|
||||
static Global s_instance;
|
||||
std::string m_iniPath, m_inputPath;
|
||||
|
||||
mutable std::istream *m_inputStrme;
|
||||
|
||||
typedef std::vector<std::string> ParamList;
|
||||
typedef std::map<std::string, ParamList> Params;
|
||||
Params m_params;
|
||||
|
||||
void InitParams();
|
||||
void InitFF();
|
||||
void InitWeight();
|
||||
void Load();
|
||||
|
||||
bool ParamExist(const std::string &key) const;
|
||||
};
|
||||
|
@ -1,61 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include "InputFileStream.h"
|
||||
#include "gzfilebuf.h"
|
||||
#include <iostream>
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
InputFileStream::InputFileStream(const std::string &filePath)
|
||||
: std::istream(NULL)
|
||||
, m_streambuf(NULL)
|
||||
{
|
||||
if (filePath.size() > 3 &&
|
||||
filePath.substr(filePath.size() - 3, 3) == ".gz") {
|
||||
m_streambuf = new gzfilebuf(filePath.c_str());
|
||||
} else {
|
||||
std::filebuf* fb = new std::filebuf();
|
||||
fb = fb->open(filePath.c_str(), std::ios::in);
|
||||
if (! fb) {
|
||||
cerr << "Can't read " << filePath.c_str() << endl;
|
||||
exit(1);
|
||||
}
|
||||
m_streambuf = fb;
|
||||
}
|
||||
this->init(m_streambuf);
|
||||
}
|
||||
|
||||
InputFileStream::~InputFileStream()
|
||||
{
|
||||
delete m_streambuf;
|
||||
m_streambuf = NULL;
|
||||
}
|
||||
|
||||
void InputFileStream::Close()
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -1,46 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Used in place of std::istream, can read zipped files if it ends in .gz
|
||||
*/
|
||||
class InputFileStream : public std::istream
|
||||
{
|
||||
protected:
|
||||
std::streambuf *m_streambuf;
|
||||
public:
|
||||
|
||||
InputFileStream(const std::string &filePath);
|
||||
~InputFileStream();
|
||||
|
||||
void Close();
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -1,19 +0,0 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "InputPath.h"
|
||||
#include "WordsRange.h"
|
||||
#include "FF/TranslationModel/PhraseTable.h"
|
||||
|
||||
InputPath::InputPath(const InputPath *prevPath, const Phrase *phrase, size_t endPos)
|
||||
:m_lookupColl(PhraseTable::GetColl().size())
|
||||
,m_prevPath(prevPath)
|
||||
,m_phrase(phrase)
|
||||
{
|
||||
size_t startPos = prevPath ? prevPath->GetRange().startPos : endPos;
|
||||
m_range = new WordsRange(startPos, endPos);
|
||||
}
|
||||
|
||||
InputPath::~InputPath()
|
||||
{
|
||||
}
|
||||
|
@ -1,51 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "Phrase.h"
|
||||
|
||||
class WordsRange;
|
||||
class TargetPhrases;
|
||||
|
||||
struct PhraseTableLookup {
|
||||
const TargetPhrases *tpColl;
|
||||
const void *ptNode;
|
||||
|
||||
void Set(const TargetPhrases *tpColl, const void *ptNode) {
|
||||
this->tpColl = tpColl;
|
||||
this->ptNode = ptNode;
|
||||
}
|
||||
};
|
||||
|
||||
class InputPath
|
||||
{
|
||||
public:
|
||||
InputPath(const InputPath *prevPath, const Phrase *phrase, size_t endPos);
|
||||
virtual ~InputPath();
|
||||
|
||||
const Phrase &GetPhrase() const {
|
||||
return *m_phrase;
|
||||
}
|
||||
|
||||
const PhraseTableLookup &GetPtLookup(size_t ptId) const {
|
||||
return m_lookupColl[ptId];
|
||||
}
|
||||
PhraseTableLookup &GetPtLookup(size_t ptId) {
|
||||
return m_lookupColl[ptId];
|
||||
}
|
||||
|
||||
const InputPath *GetPrevPath() const {
|
||||
return m_prevPath;
|
||||
}
|
||||
|
||||
const WordsRange &GetRange() const {
|
||||
return *m_range;
|
||||
}
|
||||
|
||||
protected:
|
||||
const InputPath *m_prevPath;
|
||||
const Phrase *m_phrase;
|
||||
const WordsRange *m_range;
|
||||
std::vector<PhraseTableLookup> m_lookupColl; // arranged by pt
|
||||
};
|
||||
|
@ -1,57 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include "Sentence.h"
|
||||
#include "Global.h"
|
||||
#include "Util.h"
|
||||
#include "Search/Manager.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void temp();
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
//temp();
|
||||
|
||||
Fix(cerr, 3);
|
||||
|
||||
Global &global = Global::InstanceNonConst();
|
||||
global.timer.start("Starting...");
|
||||
|
||||
global.Init(argc, argv);
|
||||
|
||||
global.timer.check("Ready for input:");
|
||||
|
||||
string line;
|
||||
while (getline(global.GetInputStream(), line)) {
|
||||
if (line == "EXIT") {
|
||||
break;
|
||||
}
|
||||
|
||||
Sentence *input = Sentence::CreateFromString(line);
|
||||
cerr << "input=" << input->Debug() << endl;
|
||||
|
||||
Manager manager(*input);
|
||||
|
||||
const Hypothesis *hypo = manager.GetHypothesis();
|
||||
if (hypo) {
|
||||
cerr << "TRANSLATION FOUND" << hypo->Debug() << endl;
|
||||
hypo->Output(cout);
|
||||
} else {
|
||||
cerr << "NO BEST TRANSLATION" << endl;
|
||||
}
|
||||
cout << endl;
|
||||
|
||||
cerr << "Ready for input:" << endl;
|
||||
}
|
||||
|
||||
cerr << "Shutting down" << endl;
|
||||
|
||||
cerr << "hypotheses created=" << Hypothesis::GetNumHypothesesCreated() << endl;
|
||||
|
||||
global.timer.check("Finished");
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,32 +0,0 @@
|
||||
UNAME_S := $(shell uname -s)
|
||||
|
||||
BOOST_DIR = $(PWD)/../3rdparty/boost
|
||||
BOOST_LIB = $(BOOST_DIR)/lib64
|
||||
SRI_DIR = $(PWD)/../3rdparty/srilm
|
||||
SRI_MACHINE_TYPE=$(shell $(SRI_DIR)/sbin/machine-type)
|
||||
SRI_LIB = $(SRI_DIR)/lib/$(SRI_MACHINE_TYPE)
|
||||
|
||||
CXX = g++
|
||||
CPPFLAGS = -Wall -O3 -I. -I$(SRI_DIR)/include -I$(BOOST_DIR)/include -DSCORE_BREAKDOWN
|
||||
LIBS = -L$(BOOST_LIB) -L$(SRI_LIB) -lz -loolm -ldstruct -lflm -llattice -lmisc -lboost_system-mt -lpthread #-liconv
|
||||
|
||||
OBJECTS = Global.o Phrase.o Timer.o Word.o \
|
||||
InputFileStream.o Scores.o TypeDef.o WordsBitmap.o \
|
||||
InputPath.o Sentence.o Util.o WordsRange.o \
|
||||
Main.o TargetPhrase.o MyVocab.o \
|
||||
TargetPhrases.o Weights.o \
|
||||
FF/DistortionScoreProducer.o FF/PhrasePenalty.o FF/WordPenaltyProducer.o \
|
||||
FF/StatefulFeatureFunction.o \
|
||||
FF/FeatureFunction.o FF/StatelessFeatureFunction.o \
|
||||
FF/TranslationModel/PhraseTable.o FF/TranslationModel/UnknownWordPenalty.o \
|
||||
FF/TranslationModel/PhraseTableMemory.o \
|
||||
FF/TranslationModel/Memory/Node.o \
|
||||
FF/LM/LM.o FF/LM/InternalLM.o FF/LM/SRILM.o \
|
||||
Search/Hypothesis.o Search/Manager.o Search/Stack.o Search/Stacks.o
|
||||
|
||||
basic-decoder: $(OBJECTS)
|
||||
$(CXX) $(CPPFLAGS) -o basic-decoder $(OBJECTS) $(LIBS)
|
||||
#rm -f *.o */*.o */*/*.o */*/*/*.o
|
||||
|
||||
clean:
|
||||
rm -f basic-decoder *.o */*.o */*/*.o */*/*/*.o
|
@ -1,47 +0,0 @@
|
||||
|
||||
#include "MyVocab.h"
|
||||
#include "Util.h"
|
||||
#include "check.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
MyVocab MyVocab::s_instance;
|
||||
VOCABID MyVocab::s_currId = 0;
|
||||
|
||||
MyVocab::MyVocab()
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
}
|
||||
|
||||
MyVocab::~MyVocab()
|
||||
{
|
||||
cerr << "delete Vocab" << endl;
|
||||
}
|
||||
|
||||
VOCABID MyVocab::GetOrCreateId(const std::string &str)
|
||||
{
|
||||
Coll::left_map::const_iterator iter;
|
||||
iter = m_coll.left.find(str);
|
||||
if (iter != m_coll.left.end()) {
|
||||
return iter->second;
|
||||
} else {
|
||||
++s_currId;
|
||||
m_coll.insert(Coll::value_type(str, s_currId));
|
||||
return s_currId;
|
||||
}
|
||||
}
|
||||
|
||||
const std::string &MyVocab::GetString(VOCABID id) const
|
||||
{
|
||||
Coll::right_map::const_iterator iter;
|
||||
iter = m_coll.right.find(id);
|
||||
assert(iter != m_coll.right.end());
|
||||
|
||||
const string &str = iter->second;
|
||||
return str;
|
||||
}
|
||||
|
||||
}
|
@ -1,31 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/bimap.hpp>
|
||||
#include "TypeDef.h"
|
||||
|
||||
namespace FastMoses
|
||||
{
|
||||
|
||||
class MyVocab
|
||||
{
|
||||
public:
|
||||
static MyVocab &Instance() {
|
||||
return s_instance;
|
||||
}
|
||||
|
||||
MyVocab();
|
||||
virtual ~MyVocab();
|
||||
|
||||
VOCABID GetOrCreateId(const std::string &str);
|
||||
const std::string &GetString(VOCABID id) const;
|
||||
protected:
|
||||
static MyVocab s_instance;
|
||||
static VOCABID s_currId;
|
||||
|
||||
typedef boost::bimap<std::string, VOCABID > Coll;
|
||||
Coll m_coll;
|
||||
|
||||
};
|
||||
|
||||
}
|
@ -1,68 +0,0 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "Phrase.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Phrase::Phrase(size_t size)
|
||||
:m_size(size)
|
||||
,m_words(size)
|
||||
{
|
||||
}
|
||||
|
||||
Phrase::Phrase(const Phrase ©, size_t extra)
|
||||
:m_size(copy.GetSize() + extra)
|
||||
,m_words(copy.GetSize() + extra)
|
||||
{
|
||||
for (size_t i = 0; i < copy.GetSize(); ++i) {
|
||||
const Word &word = copy.GetWord(i);
|
||||
Set(i, word);
|
||||
}
|
||||
}
|
||||
|
||||
Phrase::~Phrase()
|
||||
{
|
||||
}
|
||||
|
||||
void Phrase::Set(size_t pos, const Word &word)
|
||||
{
|
||||
m_words[pos].Set(word);
|
||||
}
|
||||
|
||||
|
||||
void Phrase::Output(std::ostream &out) const
|
||||
{
|
||||
for (size_t i = 0; i < m_size; ++i) {
|
||||
const Word &word = m_words[i];
|
||||
word.Output(out);
|
||||
out << " ";
|
||||
}
|
||||
}
|
||||
|
||||
std::string Phrase::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
for (size_t i = 0; i < m_size; ++i) {
|
||||
const Word &word = m_words[i];
|
||||
strme << word.Debug() << " ";
|
||||
}
|
||||
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
Phrase *Phrase::CreateFromString(const std::string &line)
|
||||
{
|
||||
vector<string> toks;
|
||||
Tokenize(toks, line);
|
||||
Phrase *phrase = new Phrase(toks.size());
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
Word &word = phrase->GetWord(i);
|
||||
word.CreateFromString(toks[i]);
|
||||
}
|
||||
|
||||
return phrase;
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <cassert>
|
||||
#include "Word.h"
|
||||
|
||||
typedef std::vector<const Word*> PhraseVec;
|
||||
|
||||
class Phrase
|
||||
{
|
||||
public:
|
||||
static Phrase *CreateFromString(const std::string &line);
|
||||
|
||||
Phrase(const Phrase ©); // do not implement
|
||||
Phrase(size_t size);
|
||||
Phrase(const Phrase ©, size_t extra);
|
||||
|
||||
virtual ~Phrase();
|
||||
|
||||
const Word &GetWord(size_t pos) const {
|
||||
return m_words[pos];
|
||||
}
|
||||
Word &GetWord(size_t pos) {
|
||||
assert(pos < m_size);
|
||||
return m_words[pos];
|
||||
}
|
||||
const Word &Back() const {
|
||||
assert(m_size);
|
||||
return m_words[m_size - 1];
|
||||
}
|
||||
|
||||
size_t GetSize() const {
|
||||
return m_size;
|
||||
}
|
||||
|
||||
void Set(size_t pos, const Word &word);
|
||||
void SetLastWord(const Word &word) {
|
||||
Set(m_size - 1, word);
|
||||
}
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
virtual std::string Debug() const;
|
||||
|
||||
protected:
|
||||
size_t m_size;
|
||||
std::vector<Word> m_words;
|
||||
};
|
||||
|
@ -1,119 +0,0 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <algorithm>
|
||||
#include "Scores.h"
|
||||
#include "Global.h"
|
||||
#include "Util.h"
|
||||
#include "check.h"
|
||||
#include "FF/FeatureFunction.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Scores::Scores()
|
||||
:m_weightedScore(0)
|
||||
#ifdef SCORE_BREAKDOWN
|
||||
,m_scores(FeatureFunction::GetTotalNumScores(), 0)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
Scores::Scores(const Scores ©)
|
||||
:m_weightedScore(copy.m_weightedScore)
|
||||
#ifdef SCORE_BREAKDOWN
|
||||
,m_scores(copy.m_scores)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
Scores::~Scores()
|
||||
{
|
||||
}
|
||||
|
||||
void Scores::CreateFromString(const FeatureFunction &ff, const std::string &line, bool logScores)
|
||||
{
|
||||
std::vector<SCORE> scores(ff.GetNumScores());
|
||||
|
||||
if (logScores) {
|
||||
std::vector<SCORE> probs(ff.GetNumScores());
|
||||
Tokenize<SCORE>(probs, line);
|
||||
std::transform(probs.begin(),probs.end(),scores.begin(),
|
||||
TransformScore);
|
||||
} else {
|
||||
Tokenize<SCORE>(scores, line);
|
||||
}
|
||||
|
||||
Add(ff, scores);
|
||||
}
|
||||
|
||||
void Scores::Add(const Scores &other)
|
||||
{
|
||||
m_weightedScore += other.m_weightedScore;
|
||||
|
||||
#ifdef SCORE_BREAKDOWN
|
||||
size_t numScores = FeatureFunction::GetTotalNumScores();
|
||||
for (size_t i = 0; i < numScores; ++i) {
|
||||
m_scores[i] += other.m_scores[i];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void Scores::Add(const FeatureFunction &ff, SCORE score)
|
||||
{
|
||||
size_t numScores = ff.GetNumScores();
|
||||
CHECK(numScores == 1);
|
||||
size_t startInd = ff.GetStartInd();
|
||||
|
||||
// weighted score
|
||||
const Global &Global = Global::Instance();
|
||||
const std::vector<SCORE> &weights = Global.weights.GetWeights();
|
||||
SCORE weight = weights[startInd];
|
||||
|
||||
m_weightedScore += weight * score;
|
||||
|
||||
// update vector
|
||||
#ifdef SCORE_BREAKDOWN
|
||||
m_scores[startInd] += score;
|
||||
#endif
|
||||
}
|
||||
|
||||
void Scores::Add(const FeatureFunction &ff, const std::vector<SCORE> &scores)
|
||||
{
|
||||
size_t numScores = ff.GetNumScores();
|
||||
CHECK(numScores == scores.size());
|
||||
size_t startInd = ff.GetStartInd();
|
||||
|
||||
const Global &Global = Global::Instance();
|
||||
const std::vector<SCORE> &weights = Global.weights.GetWeights();
|
||||
|
||||
for (size_t i = 0; i < numScores; ++i) {
|
||||
size_t ffInd = startInd + i;
|
||||
SCORE score = scores[i];
|
||||
SCORE weight = weights[ffInd];
|
||||
|
||||
// weighted score
|
||||
m_weightedScore += weight * score;
|
||||
|
||||
// update vector
|
||||
#ifdef SCORE_BREAKDOWN
|
||||
m_scores[ffInd] += score;
|
||||
#endif
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::string Scores::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "TOTAL=" << m_weightedScore;
|
||||
#ifdef SCORE_BREAKDOWN
|
||||
strme << " [" << m_scores[0];
|
||||
|
||||
size_t numScores = FeatureFunction::GetTotalNumScores();
|
||||
for (size_t i = 1; i < numScores; ++i) {
|
||||
strme << "," << m_scores[i];
|
||||
}
|
||||
strme << "]";
|
||||
#endif
|
||||
return strme.str();
|
||||
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "TypeDef.h"
|
||||
|
||||
class FeatureFunction;
|
||||
|
||||
class Scores
|
||||
{
|
||||
public:
|
||||
Scores();
|
||||
Scores(const Scores ©);
|
||||
virtual ~Scores();
|
||||
void CreateFromString(const FeatureFunction &ff, const std::string &line, bool logScores);
|
||||
|
||||
SCORE GetWeightedScore() const {
|
||||
return m_weightedScore;
|
||||
}
|
||||
|
||||
void Add(const Scores &other);
|
||||
void Add(const FeatureFunction &ff, SCORE score);
|
||||
void Add(const FeatureFunction &ff, const std::vector<SCORE> &scores);
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
protected:
|
||||
#ifdef SCORE_BREAKDOWN
|
||||
std::vector<SCORE> m_scores; // maybe it doesn't need this
|
||||
#endif
|
||||
SCORE m_weightedScore;
|
||||
};
|
@ -1,121 +0,0 @@
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include "Hypothesis.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "Sentence.h"
|
||||
#include "WordsRange.h"
|
||||
#include "Util.h"
|
||||
#include "FF/StatefulFeatureFunction.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
size_t Hypothesis::s_id = 0;
|
||||
|
||||
Hypothesis::Hypothesis(const TargetPhrase &tp, const WordsRange &range, const WordsBitmap &coverage)
|
||||
:m_id(++s_id)
|
||||
,targetPhrase(tp)
|
||||
,m_range(range)
|
||||
,m_prevHypo(NULL)
|
||||
,m_coverage(coverage)
|
||||
,m_scores(tp.GetScores())
|
||||
,m_hash(0)
|
||||
,targetRange(NOT_FOUND, NOT_FOUND)
|
||||
{
|
||||
size_t numSFF = StatefulFeatureFunction::GetColl().size();
|
||||
m_ffStates.resize(numSFF);
|
||||
}
|
||||
|
||||
Hypothesis::Hypothesis(const TargetPhrase &tp, const Hypothesis &prevHypo, const WordsRange &range, const WordsBitmap &coverage)
|
||||
:m_id(++s_id)
|
||||
,targetPhrase(tp)
|
||||
,m_range(range)
|
||||
,m_prevHypo(&prevHypo)
|
||||
,m_coverage(coverage)
|
||||
,m_scores(prevHypo.GetScores())
|
||||
,m_hash(0)
|
||||
,targetRange(prevHypo.targetRange, tp.GetSize())
|
||||
{
|
||||
m_scores.Add(targetPhrase.GetScores());
|
||||
size_t numSFF = StatefulFeatureFunction::GetColl().size();
|
||||
m_ffStates.resize(numSFF);
|
||||
}
|
||||
|
||||
Hypothesis::~Hypothesis()
|
||||
{
|
||||
}
|
||||
|
||||
size_t Hypothesis::GetHash() const
|
||||
{
|
||||
if (m_hash == 0) {
|
||||
// do nothing, assume already hashed
|
||||
// m_hash can be 0, but very small prob, or no statefull ff
|
||||
size_t numStates = StatefulFeatureFunction::GetColl().size();
|
||||
for (size_t i = 0; i < numStates; ++i) {
|
||||
size_t state = m_ffStates[i];
|
||||
boost::hash_combine(m_hash, state);
|
||||
}
|
||||
}
|
||||
|
||||
return m_hash;
|
||||
}
|
||||
|
||||
bool Hypothesis::operator==(const Hypothesis &other) const
|
||||
{
|
||||
size_t numStates = StatefulFeatureFunction::GetColl().size();
|
||||
for (size_t i = 0; i < numStates; ++i) {
|
||||
size_t state = m_ffStates[i];
|
||||
size_t otherState = other.m_ffStates[i];
|
||||
|
||||
bool isEqual = (state == otherState);
|
||||
if (!isEqual) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Hypothesis::Output(std::ostream &out) const
|
||||
{
|
||||
if (m_prevHypo) {
|
||||
m_prevHypo->Output(out);
|
||||
}
|
||||
targetPhrase.Output(out);
|
||||
}
|
||||
|
||||
std::string Hypothesis::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
Fix(strme, 3);
|
||||
strme << m_range.Debug() << " targetRange=" << targetRange.Debug() << " " << m_scores.Debug() << " ";
|
||||
Output(strme);
|
||||
|
||||
// states
|
||||
strme << "states=";
|
||||
size_t numSFF = StatefulFeatureFunction::GetColl().size();
|
||||
for (size_t i = 0; i < numSFF; ++i) {
|
||||
size_t state = m_ffStates[i];
|
||||
strme << state << ",";
|
||||
}
|
||||
strme << "=" << m_hash;
|
||||
|
||||
/*
|
||||
if (m_prevHypo) {
|
||||
strme << endl;
|
||||
strme << m_prevHypo->Debug();
|
||||
}
|
||||
*/
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
const Word &Hypothesis::GetWord(size_t pos) const
|
||||
{
|
||||
assert(pos <= targetRange.endPos);
|
||||
const Hypothesis *hypo = this;
|
||||
while (pos < hypo->targetRange.startPos) {
|
||||
hypo = hypo->GetPrevHypo();
|
||||
assert(hypo != NULL);
|
||||
}
|
||||
return hypo->GetCurrWord(pos - hypo->targetRange.startPos);
|
||||
}
|
||||
|
@ -1,107 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
#include "WordsBitmap.h"
|
||||
#include "WordsRange.h"
|
||||
#include "Scores.h"
|
||||
#include "Word.h"
|
||||
#include "TargetPhrase.h"
|
||||
|
||||
class FFState;
|
||||
class Sentence;
|
||||
class WordsRange;
|
||||
|
||||
class Hypothesis
|
||||
{
|
||||
public:
|
||||
const TargetPhrase &targetPhrase;
|
||||
const WordsRange targetRange;
|
||||
|
||||
Hypothesis(); // do no implement
|
||||
Hypothesis(const Hypothesis ©); // do not implement
|
||||
|
||||
// creating the inital hypo
|
||||
Hypothesis(const TargetPhrase &tp, const WordsRange &range, const WordsBitmap &coverage);
|
||||
|
||||
// for extending a previous hypo
|
||||
Hypothesis(const TargetPhrase &tp, const Hypothesis &prevHypo, const WordsRange &range, const WordsBitmap &coverage);
|
||||
virtual ~Hypothesis();
|
||||
|
||||
const Scores &GetScores() const {
|
||||
return m_scores;
|
||||
}
|
||||
Scores &GetScores() {
|
||||
return m_scores;
|
||||
}
|
||||
|
||||
const WordsBitmap &GetCoverage() const {
|
||||
return m_coverage;
|
||||
}
|
||||
const Hypothesis *GetPrevHypo() const {
|
||||
return m_prevHypo;
|
||||
}
|
||||
|
||||
const WordsRange &GetRange() const {
|
||||
return m_range;
|
||||
}
|
||||
|
||||
size_t GetState(size_t id) const {
|
||||
return m_ffStates[id];
|
||||
}
|
||||
void SetState(size_t id, size_t state) {
|
||||
m_ffStates[id] = state;
|
||||
}
|
||||
|
||||
const Word &GetWord(size_t pos) const;
|
||||
inline const Word &GetCurrWord(size_t pos) const {
|
||||
return targetPhrase.GetWord(pos);
|
||||
}
|
||||
|
||||
/** length of the partial translation (from the start of the sentence) */
|
||||
inline size_t GetSize() const {
|
||||
return targetRange.endPos + 1;
|
||||
}
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
|
||||
size_t GetHash() const;
|
||||
bool operator==(const Hypothesis &other) const;
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
static size_t GetNumHypothesesCreated() {
|
||||
return s_id;
|
||||
}
|
||||
protected:
|
||||
static size_t s_id;
|
||||
size_t m_id;
|
||||
|
||||
const WordsRange &m_range;
|
||||
const Hypothesis *m_prevHypo;
|
||||
const WordsBitmap m_coverage;
|
||||
Scores m_scores;
|
||||
|
||||
std::vector<size_t> m_ffStates;
|
||||
mutable size_t m_hash;
|
||||
};
|
||||
|
||||
|
||||
struct HypothesisHasher {
|
||||
size_t operator()(const Hypothesis *hypo) const {
|
||||
return hypo->GetHash();
|
||||
}
|
||||
};
|
||||
|
||||
struct HypothesisEqual {
|
||||
bool operator()(const Hypothesis *a, const Hypothesis *b) const {
|
||||
bool ret = *a == *b;
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
struct HypothesisScoreOrderer {
|
||||
bool operator()(const Hypothesis* a, const Hypothesis* b) const {
|
||||
return a->GetScores().GetWeightedScore() > b->GetScores().GetWeightedScore();
|
||||
}
|
||||
};
|
@ -1,106 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include "Manager.h"
|
||||
#include "InputPath.h"
|
||||
#include "Hypothesis.h"
|
||||
#include "Global.h"
|
||||
#include "FF/StatefulFeatureFunction.h"
|
||||
#include "FF/TranslationModel/PhraseTable.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Manager::Manager(Sentence &sentence)
|
||||
:m_sentence(sentence)
|
||||
,m_stacks(sentence.GetSize() + 1)
|
||||
,m_emptyPhrase(new TargetPhrase(0))
|
||||
,m_emptyRange(new WordsRange(NOT_FOUND, NOT_FOUND))
|
||||
,m_emptyCoverage(new WordsBitmap(sentence.GetSize()))
|
||||
{
|
||||
FeatureFunction::Initialize(m_sentence);
|
||||
|
||||
Global &global = Global::InstanceNonConst();
|
||||
|
||||
global.timer.check("Begin CreateInputPaths");
|
||||
CreateInputPaths();
|
||||
global.timer.check("Begin Lookup");
|
||||
Lookup();
|
||||
global.timer.check("Begin Search");
|
||||
Search();
|
||||
global.timer.check("Finished Search");
|
||||
}
|
||||
|
||||
Manager::~Manager()
|
||||
{
|
||||
FeatureFunction::CleanUp(m_sentence);
|
||||
}
|
||||
|
||||
void Manager::CreateInputPaths()
|
||||
{
|
||||
for (size_t pos = 0; pos < m_sentence.GetSize(); ++pos) {
|
||||
Phrase *phrase = new Phrase(1);
|
||||
phrase->Set(0, m_sentence.GetWord(pos));
|
||||
|
||||
InputPath *path = new InputPath(NULL, phrase, pos);
|
||||
m_inputPathQueue.push_back(path);
|
||||
|
||||
CreateInputPaths(*path, pos + 1);
|
||||
}
|
||||
}
|
||||
|
||||
void Manager::CreateInputPaths(const InputPath &prevPath, size_t pos)
|
||||
{
|
||||
if (pos >= m_sentence.GetSize()) {
|
||||
return;
|
||||
}
|
||||
|
||||
Phrase *phrase = new Phrase(prevPath.GetPhrase(), 1);
|
||||
phrase->SetLastWord(m_sentence.GetWord(pos));
|
||||
|
||||
InputPath *path = new InputPath(&prevPath, phrase, pos);
|
||||
m_inputPathQueue.push_back(path);
|
||||
|
||||
CreateInputPaths(*path, pos + 1);
|
||||
}
|
||||
|
||||
void Manager::Lookup()
|
||||
{
|
||||
const std::vector<PhraseTable*> &pts = PhraseTable::GetColl();
|
||||
for (size_t i = 0; i < pts.size(); ++i) {
|
||||
PhraseTable &pt = *pts[i];
|
||||
pt.Lookup(m_inputPathQueue);
|
||||
}
|
||||
}
|
||||
|
||||
void Manager::Search()
|
||||
{
|
||||
Hypothesis *emptyHypo = new Hypothesis(*m_emptyPhrase, *m_emptyRange, *m_emptyCoverage);
|
||||
StatefulFeatureFunction::EvaluateEmptyHypo(m_sentence, *emptyHypo);
|
||||
|
||||
m_stacks.Add(emptyHypo, 0);
|
||||
|
||||
for (size_t i = 0; i < m_stacks.GetSize() - 1; ++i) {
|
||||
cerr << Debug() << endl;
|
||||
|
||||
Stack &stack = m_stacks.Get(i);
|
||||
stack.PruneToSize();
|
||||
stack.Search(m_inputPathQueue);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
const Hypothesis *Manager::GetHypothesis() const
|
||||
{
|
||||
const Stack &lastStack = m_stacks.Back();
|
||||
const Hypothesis *hypo = lastStack.GetHypothesis();
|
||||
return hypo;
|
||||
}
|
||||
|
||||
std::string Manager::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
for (size_t i = 0; i < m_stacks.GetSize(); ++i) {
|
||||
const Stack &stack = m_stacks.Get(i);
|
||||
strme << stack.Debug() << " ";
|
||||
}
|
||||
return strme.str();
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Sentence.h"
|
||||
#include "Stacks.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "WordsRange.h"
|
||||
|
||||
class InputPath;
|
||||
|
||||
class Manager
|
||||
{
|
||||
public:
|
||||
Manager(Sentence &sentence);
|
||||
virtual ~Manager();
|
||||
|
||||
const Hypothesis *GetHypothesis() const;
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
protected:
|
||||
Sentence &m_sentence;
|
||||
std::vector<InputPath*> m_inputPathQueue;
|
||||
Stacks m_stacks;
|
||||
|
||||
TargetPhrase *m_emptyPhrase;
|
||||
WordsRange *m_emptyRange;
|
||||
WordsBitmap *m_emptyCoverage;
|
||||
|
||||
void CreateInputPaths();
|
||||
void CreateInputPaths(const InputPath &prevPath, size_t pos);
|
||||
|
||||
void Lookup();
|
||||
void Search();
|
||||
};
|
||||
|
@ -1,217 +0,0 @@
|
||||
|
||||
#include <limits>
|
||||
#include "Stack.h"
|
||||
#include "Stacks.h"
|
||||
#include "check.h"
|
||||
#include "InputPath.h"
|
||||
#include "TargetPhrase.h"
|
||||
#include "TargetPhrases.h"
|
||||
#include "WordsRange.h"
|
||||
#include "Global.h"
|
||||
#include "FF/TranslationModel/PhraseTable.h"
|
||||
#include "FF/StatefulFeatureFunction.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Stack::Stack()
|
||||
{
|
||||
m_maxHypoStackSize = Global::Instance().stackSize;
|
||||
m_coll.reserve(m_maxHypoStackSize*2);
|
||||
}
|
||||
|
||||
Stack::~Stack()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
bool Stack::AddPrune(Hypothesis *hypo)
|
||||
{
|
||||
|
||||
std::pair<iterator, bool> addRet = Add(hypo);
|
||||
if (addRet.second) {
|
||||
// added
|
||||
return true;
|
||||
}
|
||||
|
||||
// recombine
|
||||
// equiv hypo exists, recombine with other hypo
|
||||
iterator &iterExisting = addRet.first;
|
||||
const Hypothesis *hypoExisting = *iterExisting;
|
||||
|
||||
if (hypo->GetScores().GetWeightedScore() > hypoExisting->GetScores().GetWeightedScore()) {
|
||||
// incoming hypo is better than the one we have
|
||||
Remove(iterExisting);
|
||||
|
||||
bool added = Add(hypo).second;
|
||||
assert(added);
|
||||
return false;
|
||||
} else {
|
||||
// already storing the best hypo. discard current hypo
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::pair<Stack::iterator, bool> Stack::Add(const Hypothesis *hypo)
|
||||
{
|
||||
pair<iterator,bool> ret = m_coll.insert(hypo);
|
||||
if (ret.second) {
|
||||
// equiv hypo doesn't exists
|
||||
if (m_coll.size() > m_maxHypoStackSize * 2) {
|
||||
PruneToSize(m_maxHypoStackSize);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Stack::PruneToSize()
|
||||
{
|
||||
PruneToSize(m_maxHypoStackSize);
|
||||
}
|
||||
|
||||
void Stack::PruneToSize(size_t newSize)
|
||||
{
|
||||
if (m_coll.size() <= newSize ) {
|
||||
return; // not over limit
|
||||
}
|
||||
|
||||
vector<const Hypothesis*> keep;
|
||||
SortHypotheses(newSize, keep);
|
||||
|
||||
m_coll.clear();
|
||||
vector<const Hypothesis*>::const_iterator iter;
|
||||
for (iter = keep.begin(); iter != keep.end(); ++iter) {
|
||||
const Hypothesis *hypo = *iter;
|
||||
//cerr << "hypo" << hypo->Debug() << endl;
|
||||
std::pair<Stack::iterator, bool> ret = Add(hypo);
|
||||
CHECK(ret.second);
|
||||
}
|
||||
}
|
||||
|
||||
void Stack::SortHypotheses(size_t newSize, vector<const Hypothesis*> &out)
|
||||
{
|
||||
// sort hypotheses
|
||||
out.reserve(m_coll.size());
|
||||
std::copy(m_coll.begin(), m_coll.end(), std::inserter(out, out.end()));
|
||||
std::sort(out.begin(), out.end(), HypothesisScoreOrderer());
|
||||
|
||||
// also keep those on boundary
|
||||
const Hypothesis &boundaryHypo = *out[newSize - 1];
|
||||
SCORE boundaryScore = boundaryHypo.GetScores().GetWeightedScore();
|
||||
|
||||
for (size_t i = newSize; i < out.size(); ++i) {
|
||||
const Hypothesis *hypo = out[i];
|
||||
SCORE score = hypo->GetScores().GetWeightedScore();
|
||||
if (score < boundaryScore) {
|
||||
// score for this hypothesis is less than boundary score.
|
||||
// Discard this and all following hypos
|
||||
out.resize(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Stack::Remove(Coll::iterator &iter)
|
||||
{
|
||||
//const Hypothesis *hypo = *iter;
|
||||
size_t sizeBefore = m_coll.size();
|
||||
m_coll.erase(iter);
|
||||
assert(sizeBefore - m_coll.size() == 1);
|
||||
}
|
||||
|
||||
void Stack::Search(const std::vector<InputPath*> &queue)
|
||||
{
|
||||
for (iterator iter = begin(); iter != end(); ++iter) {
|
||||
const Hypothesis &hypo = **iter;
|
||||
Extend(hypo, queue);
|
||||
}
|
||||
}
|
||||
|
||||
void Stack::Extend(const Hypothesis &hypo, const std::vector<InputPath*> &queue)
|
||||
{
|
||||
//cerr << "extending " << hypo.Debug() << endl;
|
||||
const WordsBitmap &hypoCoverage = hypo.GetCoverage();
|
||||
|
||||
for (size_t i = 0; i < queue.size(); ++i) {
|
||||
const InputPath &path = *queue[i];
|
||||
const WordsRange &range = path.GetRange();
|
||||
//cerr << range.Debug() << " " << hypoCoverage.Debug() << endl;
|
||||
if (!hypoCoverage.Overlap(range)) {
|
||||
Extend(hypo, path);
|
||||
//cerr << "EXTEND" << endl;
|
||||
} else {
|
||||
//cerr << "DONT EXTEND" << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Stack::Extend(const Hypothesis &hypo, const InputPath &path)
|
||||
{
|
||||
const WordsRange &range = path.GetRange();
|
||||
const WordsRange &prevRange = hypo.GetRange();
|
||||
const WordsBitmap &coverage = hypo.GetCoverage();
|
||||
if (!coverage.WithinReorderingConstraint(prevRange, range)) {
|
||||
return;
|
||||
}
|
||||
|
||||
size_t numPt = PhraseTable::GetColl().size();
|
||||
for (size_t i = 0; i < numPt; ++i) {
|
||||
const PhraseTableLookup &lookup = path.GetPtLookup(i);
|
||||
const TargetPhrases *tpColl = lookup.tpColl;
|
||||
if (tpColl) {
|
||||
Extend(hypo, *tpColl, range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Stack::Extend(const Hypothesis &hypo, const TargetPhrases &tpColl, const WordsRange &range)
|
||||
{
|
||||
//cerr << "range=" << range.Debug() << " tpColl=" << tpColl.GetSize() << endl;
|
||||
WordsBitmap newCoverage(hypo.GetCoverage(), range);
|
||||
size_t wordsCovered = newCoverage.GetNumWordsCovered();
|
||||
|
||||
TargetPhrases::const_iterator iter;
|
||||
for (iter = tpColl.begin(); iter != tpColl.end(); ++iter) {
|
||||
const TargetPhrase &tp = **iter;
|
||||
|
||||
Hypothesis *newHypo = new Hypothesis(tp, hypo, range, newCoverage);
|
||||
|
||||
StatefulFeatureFunction::Evaluate(*newHypo);
|
||||
|
||||
bool added = m_stacks->Add(newHypo, wordsCovered);
|
||||
if (added) {
|
||||
//cerr << "added" << newHypo->Debug() << endl;
|
||||
} else {
|
||||
// discarded
|
||||
//delete newHypo;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
std::string Stack::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
strme << GetSize();
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
const Hypothesis *Stack::GetHypothesis() const
|
||||
{
|
||||
const Hypothesis *ret = NULL;
|
||||
SCORE bestScore = -std::numeric_limits<SCORE>::max();
|
||||
|
||||
for (const_iterator iter = begin(); iter != end(); ++iter) {
|
||||
const Hypothesis *currHypo = *iter;
|
||||
SCORE currScore = currHypo->GetScores().GetWeightedScore();
|
||||
//cerr << currHypo->Debug() << endl;
|
||||
if (currScore > bestScore) {
|
||||
ret = currHypo;
|
||||
bestScore = currScore;
|
||||
}
|
||||
|
||||
}
|
||||
return ret;
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <boost/unordered_set.hpp>
|
||||
#include "Search/Hypothesis.h"
|
||||
|
||||
class InputPath;
|
||||
class TargetPhrases;
|
||||
class Stacks;
|
||||
|
||||
class Stack
|
||||
{
|
||||
protected:
|
||||
typedef boost::unordered_set<const Hypothesis*,
|
||||
HypothesisHasher,
|
||||
HypothesisEqual> Coll;
|
||||
Coll m_coll;
|
||||
size_t m_maxHypoStackSize;
|
||||
Stacks *m_stacks;
|
||||
|
||||
std::pair<Coll::iterator, bool> Add(const Hypothesis *hypo);
|
||||
|
||||
void Remove(Coll::iterator &iter);
|
||||
void SortHypotheses(size_t newSize, std::vector<const Hypothesis*> &out);
|
||||
void Extend(const Hypothesis &hypo, const std::vector<InputPath*> &queue);
|
||||
void Extend(const Hypothesis &hypo, const InputPath &path);
|
||||
void Extend(const Hypothesis &hypo, const TargetPhrases &tpColl, const WordsRange &range);
|
||||
void PruneToSize(size_t newSize);
|
||||
|
||||
public:
|
||||
typedef Coll::iterator iterator;
|
||||
typedef Coll::const_iterator const_iterator;
|
||||
//! iterators
|
||||
const_iterator begin() const {
|
||||
return m_coll.begin();
|
||||
}
|
||||
const_iterator end() const {
|
||||
return m_coll.end();
|
||||
}
|
||||
|
||||
Stack();
|
||||
virtual ~Stack();
|
||||
|
||||
size_t GetSize() const {
|
||||
return m_coll.size();
|
||||
}
|
||||
|
||||
const Hypothesis *GetHypothesis() const;
|
||||
|
||||
bool AddPrune(Hypothesis *hypo);
|
||||
void PruneToSize();
|
||||
|
||||
void Search(const std::vector<InputPath*> &queue);
|
||||
|
||||
void SetContainer(Stacks &stacks) {
|
||||
m_stacks = &stacks;
|
||||
}
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
};
|
||||
|
@ -1,16 +0,0 @@
|
||||
#include "Stacks.h"
|
||||
|
||||
Stacks::Stacks(size_t size)
|
||||
:m_stacks(size)
|
||||
{
|
||||
for (size_t i = 0; i <= m_stacks.size(); ++i) {
|
||||
m_stacks[i].SetContainer(*this);
|
||||
}
|
||||
}
|
||||
/*
|
||||
bool Stacks::Add(Hypothesis *hypo, size_t wordsCovered)
|
||||
{
|
||||
bool added = m_stacks[wordsCovered].AddPrune(hypo);
|
||||
return added;
|
||||
}
|
||||
*/
|
@ -1,35 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "Stack.h"
|
||||
|
||||
class Hypothesis;
|
||||
|
||||
class Stacks
|
||||
{
|
||||
public:
|
||||
Stacks(size_t size);
|
||||
|
||||
size_t GetSize() const
|
||||
{ return m_stacks.size(); }
|
||||
|
||||
const Stack &Get(size_t i) const
|
||||
{ return m_stacks[i]; }
|
||||
Stack &Get(size_t i)
|
||||
{ return m_stacks[i]; }
|
||||
|
||||
const Stack &Back() const
|
||||
{ return m_stacks.back(); }
|
||||
|
||||
inline bool Add(Hypothesis *hypo, size_t wordsCovered)
|
||||
{
|
||||
bool added = m_stacks[wordsCovered].AddPrune(hypo);
|
||||
return added;
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<Stack> m_stacks;
|
||||
|
||||
};
|
||||
|
||||
|
@ -1,30 +0,0 @@
|
||||
|
||||
#include "Sentence.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Sentence::Sentence(size_t size)
|
||||
:Phrase(size)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Sentence::~Sentence()
|
||||
{
|
||||
}
|
||||
|
||||
Sentence *Sentence::CreateFromString(const std::string &line)
|
||||
{
|
||||
vector<string> toks;
|
||||
Tokenize(toks, line);
|
||||
Sentence *phrase = new Sentence(toks.size());
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
Word &word = phrase->GetWord(i);
|
||||
word.CreateFromString(toks[i]);
|
||||
}
|
||||
|
||||
return phrase;
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Phrase.h"
|
||||
|
||||
class Sentence :public Phrase
|
||||
{
|
||||
public:
|
||||
static Sentence *CreateFromString(const std::string &line);
|
||||
|
||||
Sentence(size_t size);
|
||||
virtual ~Sentence();
|
||||
};
|
||||
|
@ -1,49 +0,0 @@
|
||||
|
||||
#include "TargetPhrase.h"
|
||||
#include "Util.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
TargetPhrase::TargetPhrase(size_t size)
|
||||
:Phrase(size)
|
||||
,m_scores()
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
TargetPhrase::~TargetPhrase()
|
||||
{
|
||||
}
|
||||
|
||||
TargetPhrase *TargetPhrase::CreateFromString(
|
||||
const FeatureFunction &ff,
|
||||
const std::string &targetStr,
|
||||
const std::string &scoreStr,
|
||||
bool logScores)
|
||||
{
|
||||
vector<string> toks;
|
||||
|
||||
// words
|
||||
Tokenize(toks, targetStr);
|
||||
TargetPhrase *phrase = new TargetPhrase(toks.size());
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
Word &word = phrase->GetWord(i);
|
||||
word.CreateFromString(toks[i]);
|
||||
}
|
||||
|
||||
// score
|
||||
phrase->GetScores().CreateFromString(ff, scoreStr, logScores);
|
||||
|
||||
return phrase;
|
||||
}
|
||||
|
||||
std::string TargetPhrase::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
strme << Phrase::Debug() << " ";
|
||||
strme << m_scores.Debug();
|
||||
return strme.str();
|
||||
}
|
@ -1,33 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Phrase.h"
|
||||
#include "Scores.h"
|
||||
|
||||
class TargetPhrase: public Phrase
|
||||
{
|
||||
public:
|
||||
static TargetPhrase *CreateFromString(const FeatureFunction &ff,
|
||||
const std::string &targetStr,
|
||||
const std::string &scoreStr,
|
||||
bool logScores);
|
||||
|
||||
TargetPhrase(size_t size);
|
||||
|
||||
TargetPhrase(const TargetPhrase ©); // do not implement
|
||||
|
||||
virtual ~TargetPhrase();
|
||||
|
||||
Scores &GetScores() {
|
||||
return m_scores;
|
||||
}
|
||||
const Scores &GetScores() const {
|
||||
return m_scores;
|
||||
}
|
||||
|
||||
virtual std::string Debug() const;
|
||||
|
||||
protected:
|
||||
Scores m_scores;
|
||||
};
|
||||
|
@ -1,16 +0,0 @@
|
||||
|
||||
#include <iostream>
|
||||
#include "TargetPhrases.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
TargetPhrases::TargetPhrases()
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
TargetPhrases::~TargetPhrases()
|
||||
{
|
||||
//cerr << "deleted=" << this << endl;
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
|
||||
class TargetPhrase;
|
||||
|
||||
class TargetPhrases
|
||||
{
|
||||
typedef std::vector<const TargetPhrase*> Coll;
|
||||
|
||||
public:
|
||||
typedef Coll::iterator iterator;
|
||||
typedef Coll::const_iterator const_iterator;
|
||||
|
||||
const_iterator begin() const {
|
||||
return m_coll.begin();
|
||||
}
|
||||
const_iterator end() const {
|
||||
return m_coll.end();
|
||||
}
|
||||
|
||||
TargetPhrases();
|
||||
virtual ~TargetPhrases();
|
||||
|
||||
void Add(const TargetPhrase *tp) {
|
||||
m_coll.push_back(tp);
|
||||
}
|
||||
|
||||
size_t GetSize() const {
|
||||
return m_coll.size();
|
||||
}
|
||||
|
||||
protected:
|
||||
Coll m_coll;
|
||||
|
||||
};
|
||||
|
@ -1,50 +0,0 @@
|
||||
#include <iostream>
|
||||
#include "Timer.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
void Timer::start(const char* msg)
|
||||
{
|
||||
// Print an optional message, something like "Starting timer t";
|
||||
if (msg) {
|
||||
cerr << msg << std::endl;
|
||||
}
|
||||
|
||||
// Return immediately if the timer is already running
|
||||
if (running) return;
|
||||
|
||||
// Change timer status to running
|
||||
running = true;
|
||||
|
||||
// Set the start time;
|
||||
time(&start_time);
|
||||
}
|
||||
|
||||
void Timer::check(const char* msg)
|
||||
{
|
||||
// Print an optional message, something like "Checking timer t";
|
||||
if (msg) {
|
||||
cerr << msg << " : ";
|
||||
}
|
||||
|
||||
// TRACE_ERR( "[" << std::setiosflags(std::ios::fixed) << std::setprecision(2) << (running ? elapsed_time() : 0) << "] seconds\n");
|
||||
cerr << "[" << (running ? elapsed_time() : 0) << "] seconds\n";
|
||||
}
|
||||
|
||||
double Timer::elapsed_time()
|
||||
{
|
||||
time_t now;
|
||||
time(&now);
|
||||
return difftime(now, start_time);
|
||||
}
|
||||
|
||||
double Timer::get_elapsed_time()
|
||||
{
|
||||
return elapsed_time();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,49 +0,0 @@
|
||||
#ifndef moses_Time_H
|
||||
#define moses_Time_H
|
||||
|
||||
#include <ctime>
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include "Util.h"
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Wrapper around time_t to time how long things have been running
|
||||
* according to walltime. We avoid CPU time since it is less reliable
|
||||
* in a multi-threaded environment and can spuriously include clock cycles
|
||||
* used by other threads in the same process.
|
||||
*/
|
||||
class Timer
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream& os, Timer& t);
|
||||
|
||||
private:
|
||||
bool running;
|
||||
// note: this only has the resolution of seconds, we'd often like better resolution
|
||||
// we make our best effort to do this on a system-by-system basis
|
||||
time_t start_time;
|
||||
|
||||
// in seconds
|
||||
double elapsed_time();
|
||||
|
||||
public:
|
||||
/***
|
||||
* 'running' is initially false. A timer needs to be explicitly started
|
||||
* using 'start' or 'restart'
|
||||
*/
|
||||
Timer() : running(false) {
|
||||
start_time = 0;
|
||||
}
|
||||
|
||||
void start(const char* msg = 0);
|
||||
// void restart(const char* msg = 0);
|
||||
// void stop(const char* msg = 0);
|
||||
void check(const char* msg = 0);
|
||||
double get_elapsed_time();
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,4 +0,0 @@
|
||||
|
||||
#include "TypeDef.h"
|
||||
|
||||
|
@ -1,11 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <limits>
|
||||
|
||||
typedef float SCORE;
|
||||
typedef int VOCABID;
|
||||
|
||||
#define NOT_FOUND std::numeric_limits<size_t>::max()
|
||||
|
||||
const SCORE LOWEST_SCORE = -100.0;
|
@ -1,9 +0,0 @@
|
||||
|
||||
#include "Util.h"
|
||||
|
||||
const std::string Trim(const std::string& str, const std::string dropChars)
|
||||
{
|
||||
std::string res = str;
|
||||
res.erase(str.find_last_not_of(dropChars)+1);
|
||||
return res.erase(0, res.find_first_not_of(dropChars));
|
||||
}
|
@ -1,132 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <cmath>
|
||||
#include <ostream>
|
||||
#include "TypeDef.h"
|
||||
|
||||
//! get string representation of any object/variable, as long as it can pipe to a stream
|
||||
template<typename T>
|
||||
inline std::string SPrint(const T &input)
|
||||
{
|
||||
std::stringstream stream("");
|
||||
stream << input;
|
||||
return stream.str();
|
||||
}
|
||||
|
||||
//! delete white spaces at beginning and end of string
|
||||
const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r");
|
||||
|
||||
//! convert string to variable of type T. Used to reading floats, int etc from files
|
||||
template<typename T>
|
||||
inline T Scan(const std::string &input)
|
||||
{
|
||||
std::stringstream stream(input);
|
||||
T ret;
|
||||
stream >> ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void Scan(std::vector<T> &output, const std::vector< std::string > &input)
|
||||
{
|
||||
output.resize(input.size());
|
||||
for (size_t i = 0 ; i < input.size() ; i++) {
|
||||
output[i] = Scan<T>( input[i] );
|
||||
}
|
||||
}
|
||||
|
||||
inline void Tokenize(std::vector<std::string> &output
|
||||
, const std::string& str
|
||||
, const std::string& delimiters = " \t")
|
||||
{
|
||||
// Skip delimiters at beginning.
|
||||
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
|
||||
// Find first "non-delimiter".
|
||||
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos) {
|
||||
// Found a token, add it to the vector.
|
||||
output.push_back(str.substr(lastPos, pos - lastPos));
|
||||
// Skip delimiters. Note the "not_of"
|
||||
lastPos = str.find_first_not_of(delimiters, pos);
|
||||
// Find next "non-delimiter"
|
||||
pos = str.find_first_of(delimiters, lastPos);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline void Tokenize( std::vector<T> &output
|
||||
, const std::string &input
|
||||
, const std::string& delimiters = " \t")
|
||||
{
|
||||
std::vector<std::string> stringVector;
|
||||
Tokenize(stringVector, input, delimiters);
|
||||
return Scan<T>(output, stringVector );
|
||||
}
|
||||
|
||||
|
||||
/** only split of the first delimiter. Used by class FeatureFunction for parse key=value pair.
|
||||
* Value may have = character
|
||||
*/
|
||||
inline std::vector<std::string> TokenizeFirstOnly(const std::string& str,
|
||||
const std::string& delimiters = " \t")
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
std::string::size_type pos = str.find_first_of(delimiters);
|
||||
|
||||
if (std::string::npos != pos) {
|
||||
// Found a token, add it to the vector.
|
||||
tokens.push_back(str.substr(0, pos));
|
||||
tokens.push_back(str.substr(pos + 1, str.size() - pos - 1));
|
||||
} else {
|
||||
tokens.push_back(str);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
// speeded up version of above
|
||||
inline void TokenizeMultiCharSeparator(std::vector<std::string> &output
|
||||
,const std::string& str
|
||||
,const std::string& separator)
|
||||
{
|
||||
size_t pos = 0;
|
||||
// Find first "non-delimiter".
|
||||
std::string::size_type nextPos = str.find(separator, pos);
|
||||
|
||||
while (nextPos != std::string::npos) {
|
||||
// Found a token, add it to the vector.
|
||||
output.push_back(Trim(str.substr(pos, nextPos - pos)));
|
||||
// Skip delimiters. Note the "not_of"
|
||||
pos = nextPos + separator.size();
|
||||
// Find next "non-delimiter"
|
||||
nextPos = str.find(separator, pos);
|
||||
}
|
||||
output.push_back(Trim(str.substr(pos, nextPos - pos)));
|
||||
}
|
||||
|
||||
inline SCORE FloorScore(SCORE logScore)
|
||||
{
|
||||
return (std::max)(logScore , LOWEST_SCORE);
|
||||
}
|
||||
|
||||
inline SCORE TransformScore(SCORE prob)
|
||||
{
|
||||
return FloorScore(log(prob));
|
||||
}
|
||||
|
||||
inline float TransformSRIScore(float sriScore)
|
||||
{
|
||||
return sriScore * 2.30258509299405f;
|
||||
}
|
||||
|
||||
/** Enforce rounding */
|
||||
inline void Fix(std::ostream& stream, size_t size)
|
||||
{
|
||||
stream.setf(std::ios::fixed);
|
||||
stream.precision(size);
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
|
||||
#include "Weights.h"
|
||||
#include "Util.h"
|
||||
#include "check.h"
|
||||
#include "FF/FeatureFunction.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Weights::Weights()
|
||||
:m_weights(FeatureFunction::GetTotalNumScores(), 0)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Weights::~Weights()
|
||||
{
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Weights::CreateFromString(const std::string &line)
|
||||
{
|
||||
Tokenize<SCORE>(m_weights, line);
|
||||
|
||||
}
|
||||
|
||||
void Weights::SetWeights(const FeatureFunction &ff, const std::vector<SCORE> &weights)
|
||||
{
|
||||
size_t numScores = ff.GetNumScores();
|
||||
CHECK(numScores == weights.size());
|
||||
size_t startInd = ff.GetStartInd();
|
||||
|
||||
size_t inInd = 0;
|
||||
for (size_t i = startInd; i < startInd + numScores; ++i, ++inInd) {
|
||||
m_weights[i] = weights[inInd];
|
||||
}
|
||||
}
|
||||
|
@ -1,29 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include "TypeDef.h"
|
||||
|
||||
class FeatureFunction;
|
||||
|
||||
class Weights
|
||||
{
|
||||
public:
|
||||
Weights();
|
||||
virtual ~Weights();
|
||||
void CreateFromString(const std::string &line);
|
||||
|
||||
const std::vector<SCORE> &GetWeights() const {
|
||||
return m_weights;
|
||||
}
|
||||
|
||||
void SetWeights(const FeatureFunction &ff, const std::vector<SCORE> &weights);
|
||||
void SetNumScores(size_t num) {
|
||||
m_weights.resize(num, 0);
|
||||
}
|
||||
|
||||
protected:
|
||||
std::vector<SCORE> m_weights;
|
||||
};
|
||||
|
@ -1,45 +0,0 @@
|
||||
|
||||
#include "Word.h"
|
||||
#include "MyVocab.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Word::Word()
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Word::~Word()
|
||||
{
|
||||
}
|
||||
|
||||
void Word::Set(const Word &word)
|
||||
{
|
||||
m_vocabId = word.m_vocabId;
|
||||
}
|
||||
|
||||
void Word::CreateFromString(const std::string &line)
|
||||
{
|
||||
FastMoses::MyVocab &vocab = FastMoses::MyVocab::Instance();
|
||||
m_vocabId = vocab.GetOrCreateId(line);
|
||||
}
|
||||
|
||||
void Word::Output(std::ostream &out) const
|
||||
{
|
||||
FastMoses::MyVocab &vocab = FastMoses::MyVocab::Instance();
|
||||
const string &ret = vocab.GetString(m_vocabId);
|
||||
out << ret;
|
||||
}
|
||||
|
||||
std::string Word::ToString() const
|
||||
{
|
||||
stringstream strme;
|
||||
Output(strme);
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
std::string Word::Debug() const
|
||||
{
|
||||
return ToString();
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include "TypeDef.h"
|
||||
|
||||
class Word
|
||||
{
|
||||
public:
|
||||
Word();
|
||||
virtual ~Word();
|
||||
|
||||
void CreateFromString(const std::string &line);
|
||||
|
||||
VOCABID GetVocab() const {
|
||||
return m_vocabId;
|
||||
}
|
||||
|
||||
void Set(const Word &word);
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
std::string ToString() const;
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
int Compare(const Word &other) const {
|
||||
if (m_vocabId == other.m_vocabId) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return (m_vocabId < other.m_vocabId) ? -1 : +1;
|
||||
}
|
||||
|
||||
bool operator== (const Word &other) const {
|
||||
// needed to store word in GenerationDictionary map
|
||||
// uses comparison of FactorKey
|
||||
// 'proper' comparison, not address/id comparison
|
||||
return Compare(other) == 0;
|
||||
}
|
||||
|
||||
protected:
|
||||
VOCABID m_vocabId;
|
||||
};
|
||||
|
||||
class WordHasher
|
||||
{
|
||||
public:
|
||||
size_t operator()(const Word &word) const {
|
||||
return word.GetVocab();
|
||||
}
|
||||
};
|
||||
|
@ -1,109 +0,0 @@
|
||||
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <memory.h>
|
||||
#include "WordsBitmap.h"
|
||||
#include "WordsRange.h"
|
||||
#include "TypeDef.h"
|
||||
#include "Global.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
WordsBitmap::WordsBitmap(size_t size)
|
||||
:m_bitmap(size, false)
|
||||
{
|
||||
}
|
||||
|
||||
WordsBitmap::WordsBitmap(const WordsBitmap ©)
|
||||
:m_bitmap(copy.m_bitmap)
|
||||
{
|
||||
}
|
||||
|
||||
WordsBitmap::WordsBitmap(const WordsBitmap ©, const WordsRange &range)
|
||||
:m_bitmap(copy.m_bitmap)
|
||||
{
|
||||
for (size_t pos = range.startPos; pos <= range.endPos; ++pos) {
|
||||
m_bitmap[pos] = true;
|
||||
}
|
||||
}
|
||||
|
||||
WordsBitmap::~WordsBitmap()
|
||||
{
|
||||
}
|
||||
|
||||
size_t WordsBitmap::GetNumWordsCovered() const
|
||||
{
|
||||
size_t count = 0;
|
||||
for (size_t pos = 0 ; pos < m_bitmap.size() ; pos++) {
|
||||
if (m_bitmap[pos])
|
||||
++count;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
size_t WordsBitmap::GetFirstGapPos() const
|
||||
{
|
||||
for (size_t pos = 0 ; pos < m_bitmap.size() ; pos++) {
|
||||
if (!m_bitmap[pos]) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
// all words translated
|
||||
return NOT_FOUND;
|
||||
}
|
||||
|
||||
bool WordsBitmap::WithinReorderingConstraint(const WordsRange &prevRange, const WordsRange &nextRange) const
|
||||
{
|
||||
//return true;
|
||||
int maxDistortion = Global::Instance().maxDistortion;
|
||||
if (maxDistortion < 0) {
|
||||
// unlimited distortion
|
||||
return true;
|
||||
}
|
||||
|
||||
// actual distortion score if you do create this hypo
|
||||
int distScore = prevRange.ComputeDistortionScore(nextRange);
|
||||
if (distScore > maxDistortion) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// what distortion you need to expend to jump back to earler untranslated words
|
||||
size_t firstGap = GetFirstGapPos();
|
||||
if (nextRange.startPos == firstGap) {
|
||||
// no gaps
|
||||
return true;
|
||||
}
|
||||
|
||||
assert(firstGap < nextRange.endPos);
|
||||
if (nextRange.endPos - firstGap + 1 > maxDistortion) {
|
||||
// has to jump back too far
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool WordsBitmap::Overlap(const WordsRange &compare) const
|
||||
{
|
||||
for (size_t pos = compare.startPos ; pos <= compare.endPos ; pos++) {
|
||||
if (m_bitmap[pos]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string WordsBitmap::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "[";
|
||||
for (size_t i = 0; i < m_bitmap.size(); ++i) {
|
||||
strme << m_bitmap[i];
|
||||
}
|
||||
strme << "]";
|
||||
return strme.str();
|
||||
|
||||
}
|
||||
|
@ -1,39 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unistd.h>
|
||||
#include <vector>
|
||||
|
||||
class WordsRange;
|
||||
|
||||
class WordsBitmap
|
||||
{
|
||||
protected:
|
||||
std::vector<bool> m_bitmap;
|
||||
public:
|
||||
WordsBitmap(); // do not implement
|
||||
|
||||
// creating the inital hypo. No words translated
|
||||
WordsBitmap(size_t size);
|
||||
WordsBitmap(const WordsBitmap ©);
|
||||
WordsBitmap(const WordsBitmap ©, const WordsRange &range);
|
||||
virtual ~WordsBitmap();
|
||||
|
||||
//! count of words translated
|
||||
size_t GetNumWordsCovered() const;
|
||||
|
||||
//! count of words translated
|
||||
size_t GetFirstGapPos() const;
|
||||
|
||||
bool IsComplete() const {
|
||||
return m_bitmap.size() == GetNumWordsCovered();
|
||||
}
|
||||
|
||||
bool WithinReorderingConstraint(const WordsRange &prevRange, const WordsRange &nextRange) const;
|
||||
|
||||
//! whether the wordrange overlaps with any translated word in this bitmap
|
||||
bool Overlap(const WordsRange &compare) const;
|
||||
|
||||
std::string Debug() const;
|
||||
};
|
||||
|
@ -1,46 +0,0 @@
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <sstream>
|
||||
#include "WordsRange.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
WordsRange::WordsRange(const WordsRange &prevRange, size_t phraseSize)
|
||||
{
|
||||
startPos = prevRange.endPos + 1;
|
||||
endPos = prevRange.endPos + phraseSize;
|
||||
}
|
||||
|
||||
WordsRange::~WordsRange()
|
||||
{
|
||||
}
|
||||
|
||||
size_t WordsRange::GetHash() const
|
||||
{
|
||||
size_t ret = startPos;
|
||||
boost::hash_combine(ret, endPos);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool WordsRange::operator==(const WordsRange &other) const
|
||||
{
|
||||
return (startPos == other.startPos) && (endPos == other.endPos);
|
||||
}
|
||||
|
||||
std::string WordsRange::Debug() const
|
||||
{
|
||||
stringstream strme;
|
||||
strme << "[" << startPos << "," << endPos << "]";
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
int WordsRange::ComputeDistortionScore(const WordsRange &next) const
|
||||
{
|
||||
int dist = 0;
|
||||
if (GetNumWordsCovered() == 0) {
|
||||
dist = next.startPos;
|
||||
} else {
|
||||
dist = (int)endPos - (int)next.startPos + 1 ;
|
||||
}
|
||||
return -abs(dist);
|
||||
}
|
@ -1,46 +0,0 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unistd.h>
|
||||
#include "TypeDef.h"
|
||||
|
||||
class WordsRange
|
||||
{
|
||||
public:
|
||||
WordsRange(); // do not implement
|
||||
|
||||
WordsRange(const WordsRange &prevRange, size_t phraseSize);
|
||||
|
||||
WordsRange(size_t s, size_t e)
|
||||
:startPos(s)
|
||||
,endPos(e)
|
||||
{}
|
||||
|
||||
virtual ~WordsRange();
|
||||
|
||||
size_t startPos, endPos;
|
||||
|
||||
//! count of words translated
|
||||
inline size_t GetNumWordsCovered() const {
|
||||
return (startPos == NOT_FOUND) ? 0 : endPos - startPos + 1;
|
||||
}
|
||||
|
||||
inline size_t GetNumWordsBetween(const WordsRange &other) const {
|
||||
//CHECK(!Overlap(x));
|
||||
|
||||
if (other.endPos < startPos) {
|
||||
return startPos - other.endPos - 1;
|
||||
}
|
||||
|
||||
return other.startPos - endPos - 1;
|
||||
}
|
||||
|
||||
int ComputeDistortionScore(const WordsRange &next) const;
|
||||
|
||||
size_t GetHash() const;
|
||||
bool operator==(const WordsRange &other) const;
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
};
|
||||
|
@ -1,21 +0,0 @@
|
||||
/* People have been abusing assert by assuming it will always execute. To
|
||||
* rememdy the situation, asserts were replaced with CHECK. These should then
|
||||
* be manually replaced with assert (when used correctly) or UTIL_THROW (for
|
||||
* runtime checks).
|
||||
*/
|
||||
#ifndef UTIL_CHECK__
|
||||
#define UTIL_CHECK__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <iostream>
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#define CHECK(Condition) do { \
|
||||
if (!(Condition)) { \
|
||||
std::cerr << "Check " << #Condition << " failed in " << __FILE__ << ":" << __LINE__ << std::endl; \
|
||||
abort(); \
|
||||
} \
|
||||
} while (0) // swallow ;
|
||||
|
||||
#endif // UTIL_CHECK__
|
@ -1,88 +0,0 @@
|
||||
#ifndef moses_gzfile_buf_h
|
||||
#define moses_gzfile_buf_h
|
||||
|
||||
#include <streambuf>
|
||||
#include <zlib.h>
|
||||
#include <cstring>
|
||||
|
||||
/** wrapper around gzip input stream. Unknown parentage
|
||||
* @todo replace with boost version - output stream already uses it
|
||||
*/
|
||||
class gzfilebuf : public std::streambuf
|
||||
{
|
||||
public:
|
||||
gzfilebuf(const char *filename) {
|
||||
_gzf = gzopen(filename, "rb");
|
||||
setg (_buff+sizeof(int), // beginning of putback area
|
||||
_buff+sizeof(int), // read position
|
||||
_buff+sizeof(int)); // end position
|
||||
}
|
||||
~gzfilebuf() {
|
||||
gzclose(_gzf);
|
||||
}
|
||||
protected:
|
||||
virtual int_type overflow (int_type /* c */) {
|
||||
throw;
|
||||
}
|
||||
|
||||
// write multiple characters
|
||||
virtual
|
||||
std::streamsize xsputn (const char* /* s */,
|
||||
std::streamsize /* num */) {
|
||||
throw;
|
||||
}
|
||||
|
||||
virtual std::streampos seekpos ( std::streampos /* sp */, std::ios_base::openmode /* which = std::ios_base::in | std::ios_base::out */ ) {
|
||||
throw;
|
||||
}
|
||||
|
||||
//read one character
|
||||
virtual int_type underflow () {
|
||||
// is read position before end of _buff?
|
||||
if (gptr() < egptr()) {
|
||||
return traits_type::to_int_type(*gptr());
|
||||
}
|
||||
|
||||
/* process size of putback area
|
||||
* - use number of characters read
|
||||
* - but at most four
|
||||
*/
|
||||
unsigned int numPutback = gptr() - eback();
|
||||
if (numPutback > sizeof(int)) {
|
||||
numPutback = sizeof(int);
|
||||
}
|
||||
|
||||
/* copy up to four characters previously read into
|
||||
* the putback _buff (area of first four characters)
|
||||
*/
|
||||
std::memmove (_buff+(sizeof(int)-numPutback), gptr()-numPutback,
|
||||
numPutback);
|
||||
|
||||
// read new characters
|
||||
int num = gzread(_gzf, _buff+sizeof(int), _buffsize-sizeof(int));
|
||||
if (num <= 0) {
|
||||
// ERROR or EOF
|
||||
return EOF;
|
||||
}
|
||||
|
||||
// reset _buff pointers
|
||||
setg (_buff+(sizeof(int)-numPutback), // beginning of putback area
|
||||
_buff+sizeof(int), // read position
|
||||
_buff+sizeof(int)+num); // end of buffer
|
||||
|
||||
// return next character
|
||||
return traits_type::to_int_type(*gptr());
|
||||
}
|
||||
|
||||
std::streamsize xsgetn (char* s,
|
||||
std::streamsize num) {
|
||||
return gzread(_gzf,s,num);
|
||||
}
|
||||
|
||||
private:
|
||||
gzFile _gzf;
|
||||
static const unsigned int _buffsize = 1024;
|
||||
char _buff[_buffsize];
|
||||
};
|
||||
|
||||
#endif
|
176
contrib/other-builds/mira/.cproject
Normal file
176
contrib/other-builds/mira/.cproject
Normal file
@ -0,0 +1,176 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.377583226" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.2071063316" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/mira/Debug}" id="cdt.managedbuild.builder.gnu.cross.881204887" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1218877049" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1094111510" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.2142370493" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1560615310" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool command="g++" id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.115638939" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1315998281" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.778416356" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.317569168" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
|
||||
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
|
||||
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
|
||||
<listOptionValue builtIn="false" value="WITH_THREADS"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1743631842" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../..""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1454738757" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1480777831" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.485611005" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.libs.1007486529" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="moses"/>
|
||||
<listOptionValue builtIn="false" value="irstlm"/>
|
||||
<listOptionValue builtIn="false" value="dstruct"/>
|
||||
<listOptionValue builtIn="false" value="flm"/>
|
||||
<listOptionValue builtIn="false" value="oolm"/>
|
||||
<listOptionValue builtIn="false" value="lattice"/>
|
||||
<listOptionValue builtIn="false" value="misc"/>
|
||||
<listOptionValue builtIn="false" value="dalm"/>
|
||||
<listOptionValue builtIn="false" value="MurmurHash3"/>
|
||||
<listOptionValue builtIn="false" value="search"/>
|
||||
<listOptionValue builtIn="false" value="RandLM"/>
|
||||
<listOptionValue builtIn="false" value="OnDiskPt"/>
|
||||
<listOptionValue builtIn="false" value="lm"/>
|
||||
<listOptionValue builtIn="false" value="util"/>
|
||||
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_system-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_thread-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
|
||||
<listOptionValue builtIn="false" value="pthread"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="dl"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.paths.132082917" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../irstlm/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../DALM/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../nplm/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../randlm/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../cmph/lib""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../srilm/lib/macosx""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../srilm/lib/i686-m64""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../srilm/lib/i686""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/moses/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/lm/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/OnDiskPt/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/util/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/search/Debug""/>
|
||||
<listOptionValue builtIn="false" value="/opt/local/lib"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1827477602" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1554055737" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1335019965" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1106765201" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1722081106" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.36030994" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/mira/Release}" id="cdt.managedbuild.builder.gnu.cross.329863268" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.299271422" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1049770857" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1354488968" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.674520633" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.568828285" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1042930447" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.305563840" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1424960921" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.460791828" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.945282347" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.561813601" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1813861310" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.991451934" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1702585996" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="mira.cdt.managedbuild.target.gnu.cross.exe.1862989567" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092;cdt.managedbuild.config.gnu.cross.exe.debug.1385309092.;cdt.managedbuild.tool.gnu.cross.c.compiler.1218877049;cdt.managedbuild.tool.gnu.c.compiler.input.1560615310">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.2038764866;cdt.managedbuild.config.gnu.cross.exe.release.2038764866.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.568828285;cdt.managedbuild.tool.gnu.cpp.compiler.input.1424960921">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092;cdt.managedbuild.config.gnu.cross.exe.debug.1385309092.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.115638939;cdt.managedbuild.tool.gnu.cpp.compiler.input.1454738757">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.2038764866;cdt.managedbuild.config.gnu.cross.exe.release.2038764866.;cdt.managedbuild.tool.gnu.cross.c.compiler.299271422;cdt.managedbuild.tool.gnu.c.compiler.input.674520633">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/mira"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/mira"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
|
||||
</cproject>
|
81
contrib/other-builds/mira/.project
Normal file
81
contrib/other-builds/mira/.project
Normal file
@ -0,0 +1,81 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>mira</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
<project>mert_lib</project>
|
||||
<project>moses</project>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>Decoder.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/Decoder.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Decoder.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/Decoder.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Hildreth.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/Hildreth.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Hildreth.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/Hildreth.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>HypothesisQueue.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/HypothesisQueue.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>HypothesisQueue.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/HypothesisQueue.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/Main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/Main.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>MiraOptimiser.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/MiraOptimiser.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Perceptron.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/mira/Perceptron.cpp</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
@ -5,13 +5,13 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.162355801" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -88,6 +88,7 @@
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="dl"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.128214028" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
@ -107,13 +108,13 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.516628324" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -5,13 +5,13 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.461114338" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -85,6 +85,7 @@
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
<listOptionValue builtIn="false" value="dl"/>
|
||||
<listOptionValue builtIn="false" value="rt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.userobjs.1542590830" name="Other objects" superClass="gnu.cpp.link.option.userobjs"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.983725033" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
@ -108,13 +109,13 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.2121690436" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -1,7 +1,12 @@
|
||||
/*
|
||||
* This class creates c++ like stream from file descriptor
|
||||
* It uses gcc-specific functions, therefore is not portable
|
||||
*
|
||||
* Jeroen Vermeulen reckons that it can be replaced with Boost's io::stream_buffer
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#ifndef _FDSTREAM_
|
||||
#define _FDSTREAM_
|
||||
|
||||
|
@ -31,6 +31,15 @@ namespace MosesTuning
|
||||
#define TRACE_ERR(str) { }
|
||||
#endif
|
||||
|
||||
#if __GNUC__ == 4 && __GNUC_MINOR__ == 8 && (__GNUC_PATCHLEVEL__ == 1 || __GNUC_PATCHLEVEL__ == 2)
|
||||
// gcc nth_element() bug
|
||||
#define NTH_ELEMENT3(begin, middle, end) std::sort(begin, end)
|
||||
#define NTH_ELEMENT4(begin, middle, end, orderer) std::sort(begin, end, orderer)
|
||||
#else
|
||||
#define NTH_ELEMENT3(begin, middle, end) std::nth_element(begin, middle, end)
|
||||
#define NTH_ELEMENT4(begin, middle, end, orderer) std::nth_element(begin, middle, end, orderer)
|
||||
#endif
|
||||
|
||||
const char kDefaultDelimiterSymbol[] = " ";
|
||||
|
||||
int verboselevel();
|
||||
|
@ -42,6 +42,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
#include "FeatureDataIterator.h"
|
||||
#include "ScoreDataIterator.h"
|
||||
#include "BleuScorer.h"
|
||||
#include "Util.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace MosesTuning;
|
||||
@ -232,7 +233,7 @@ int main(int argc, char** argv)
|
||||
|
||||
float sample_threshold = -1.0;
|
||||
if (samples.size() > n_samples) {
|
||||
nth_element(scores.begin(), scores.begin() + (n_samples-1), scores.end());
|
||||
NTH_ELEMENT3(scores.begin(), scores.begin() + (n_samples-1), scores.end());
|
||||
sample_threshold = 0.99999-scores[n_samples-1];
|
||||
}
|
||||
|
||||
|
@ -47,7 +47,7 @@ static char* strToChar(const string& s)
|
||||
MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vector<string> decoder_params)
|
||||
: m_manager(NULL)
|
||||
{
|
||||
static int BASE_ARGC = 8;
|
||||
static int BASE_ARGC = 4;
|
||||
Parameter* params = new Parameter();
|
||||
char ** mosesargv = new char*[BASE_ARGC + argc];
|
||||
mosesargv[0] = strToChar("-f");
|
||||
@ -56,10 +56,13 @@ MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vect
|
||||
stringstream dbgin;
|
||||
dbgin << debuglevel;
|
||||
mosesargv[3] = strToChar(dbgin.str());
|
||||
|
||||
/*
|
||||
mosesargv[4] = strToChar("-use-persistent-cache");
|
||||
mosesargv[5] = strToChar("0");
|
||||
mosesargv[6] = strToChar("-persistent-cache-size");
|
||||
mosesargv[7] = strToChar("0");
|
||||
*/
|
||||
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
char *cstr = &(decoder_params[i])[0];
|
||||
@ -76,8 +79,9 @@ MosesDecoder::MosesDecoder(const string& inifile, int debuglevel, int argc, vect
|
||||
}
|
||||
delete[] mosesargv;
|
||||
|
||||
//m_bleuScoreFeature = staticData.GetBleuScoreFeature(); TODO
|
||||
assert(false);
|
||||
const std::vector<BleuScoreFeature*> &bleuFFs = BleuScoreFeature::GetColl();
|
||||
assert(bleuFFs.size() == 1);
|
||||
m_bleuScoreFeature = bleuFFs[0];
|
||||
}
|
||||
|
||||
void MosesDecoder::cleanup(bool chartDecoding)
|
||||
|
@ -530,11 +530,11 @@ void OutputFeatureWeightsForHypergraph(std::ostream &outputSearchGraphStream)
|
||||
featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, slf[i], outputSearchGraphStream);
|
||||
}
|
||||
}
|
||||
const vector<PhraseDictionary*>& pds = staticData.GetPhraseDictionaries();
|
||||
const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
|
||||
for( size_t i=0; i<pds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, pds[i], outputSearchGraphStream);
|
||||
}
|
||||
const vector<const GenerationDictionary*>& gds = staticData.GetGenerationDictionaries();
|
||||
const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
|
||||
for( size_t i=0; i<gds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureWeightsForHypergraph(featureIndex, gds[i], outputSearchGraphStream);
|
||||
}
|
||||
|
@ -245,7 +245,7 @@ void ChartHypothesis::CleanupArcList()
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize) {
|
||||
// prune arc list only if there too many arcs
|
||||
nth_element(m_arcList->begin()
|
||||
NTH_ELEMENT4(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
, m_arcList->end()
|
||||
, CompareChartChartHypothesisTotalScore());
|
||||
|
@ -141,7 +141,7 @@ ChartParser::ChartParser(InputType const &source, ChartCellCollectionBase &cells
|
||||
staticData.InitializeForInput(source);
|
||||
CreateInputPaths(m_source);
|
||||
|
||||
const std::vector<PhraseDictionary*> &dictionaries = staticData.GetPhraseDictionaries();
|
||||
const std::vector<PhraseDictionary*> &dictionaries = PhraseDictionary::GetColl();
|
||||
m_ruleLookupManagers.reserve(dictionaries.size());
|
||||
for (std::vector<PhraseDictionary*>::const_iterator p = dictionaries.begin();
|
||||
p != dictionaries.end(); ++p) {
|
||||
|
@ -102,7 +102,7 @@ void ChartTranslationOptionList::Add(const TargetPhraseCollection &tpc,
|
||||
|
||||
// Prune if bursting
|
||||
if (m_size == m_ruleLimit * 2) {
|
||||
std::nth_element(m_collection.begin(),
|
||||
NTH_ELEMENT4(m_collection.begin(),
|
||||
m_collection.begin() + m_ruleLimit - 1,
|
||||
m_collection.begin() + m_size,
|
||||
ChartTranslationOptionOrderer());
|
||||
@ -128,7 +128,7 @@ void ChartTranslationOptionList::ApplyThreshold()
|
||||
assert(m_size < m_ruleLimit * 2);
|
||||
// Reduce the list to the best m_ruleLimit options. The remaining
|
||||
// options can be overwritten on subsequent calls to Add().
|
||||
std::nth_element(m_collection.begin(),
|
||||
NTH_ELEMENT4(m_collection.begin(),
|
||||
m_collection.begin()+m_ruleLimit,
|
||||
m_collection.begin()+m_size,
|
||||
ChartTranslationOptionOrderer());
|
||||
|
@ -11,6 +11,7 @@ namespace Moses
|
||||
{
|
||||
|
||||
size_t BleuScoreState::bleu_order = 4;
|
||||
std::vector<BleuScoreFeature*> BleuScoreFeature::s_staticColl;
|
||||
|
||||
BleuScoreState::BleuScoreState(): m_words(1),
|
||||
m_source_length(0),
|
||||
@ -94,6 +95,8 @@ BleuScoreFeature::BleuScoreFeature(const std::string &line)
|
||||
m_smoothing_scheme(PLUS_POINT_ONE)
|
||||
{
|
||||
std::cerr << "Initializing BleuScoreFeature." << std::endl;
|
||||
s_staticColl.push_back(this);
|
||||
|
||||
m_tuneable = false;
|
||||
|
||||
ReadParameters();
|
||||
|
@ -61,6 +61,9 @@ public:
|
||||
class BleuScoreFeature : public StatefulFeatureFunction
|
||||
{
|
||||
public:
|
||||
static const std::vector<BleuScoreFeature*>& GetColl() {
|
||||
return s_staticColl;
|
||||
}
|
||||
|
||||
typedef boost::unordered_map<size_t, RefValue > RefCounts;
|
||||
typedef boost::unordered_map<size_t, NGrams> Matches;
|
||||
@ -150,6 +153,8 @@ public:
|
||||
}
|
||||
|
||||
private:
|
||||
static std::vector<BleuScoreFeature*> s_staticColl;
|
||||
|
||||
bool m_enabled;
|
||||
bool m_sentence_bleu;
|
||||
bool m_simple_history_bleu;
|
||||
|
@ -15,12 +15,12 @@ namespace Moses
|
||||
|
||||
multiset<string> FeatureFunction::description_counts;
|
||||
|
||||
std::vector<FeatureFunction*> FeatureFunction::m_producers;
|
||||
std::vector<FeatureFunction*> FeatureFunction::s_staticColl;
|
||||
|
||||
FeatureFunction &FeatureFunction::FindFeatureFunction(const std::string& name)
|
||||
{
|
||||
for (size_t i = 0; i < m_producers.size(); ++i) {
|
||||
FeatureFunction &ff = *m_producers[i];
|
||||
for (size_t i = 0; i < s_staticColl.size(); ++i) {
|
||||
FeatureFunction &ff = *s_staticColl[i];
|
||||
if (ff.GetScoreProducerDescription() == name) {
|
||||
return ff;
|
||||
}
|
||||
@ -53,7 +53,7 @@ Initialize(const std::string &line)
|
||||
ParseLine(line);
|
||||
|
||||
ScoreComponentCollection::RegisterScoreProducer(this);
|
||||
m_producers.push_back(this);
|
||||
s_staticColl.push_back(this);
|
||||
}
|
||||
|
||||
FeatureFunction::~FeatureFunction() {}
|
||||
|
@ -27,7 +27,7 @@ class FeatureFunction
|
||||
{
|
||||
protected:
|
||||
/**< all the score producers in this run */
|
||||
static std::vector<FeatureFunction*> m_producers;
|
||||
static std::vector<FeatureFunction*> s_staticColl;
|
||||
|
||||
std::string m_description, m_argLine;
|
||||
std::vector<std::vector<std::string> > m_args;
|
||||
@ -41,7 +41,7 @@ protected:
|
||||
|
||||
public:
|
||||
static const std::vector<FeatureFunction*>& GetFeatureFunctions() {
|
||||
return m_producers;
|
||||
return s_staticColl;
|
||||
}
|
||||
static FeatureFunction &FindFeatureFunction(const std::string& name);
|
||||
|
||||
|
@ -19,8 +19,7 @@ InputFeature::InputFeature(const std::string &line)
|
||||
|
||||
void InputFeature::Load()
|
||||
{
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const PhraseDictionary *pt = staticData.GetTranslationScoreProducer(0);
|
||||
const PhraseDictionary *pt = PhraseDictionary::GetColl()[0];
|
||||
const PhraseDictionaryTreeAdaptor *ptBin = dynamic_cast<const PhraseDictionaryTreeAdaptor*>(pt);
|
||||
|
||||
m_legacy = (ptBin != NULL);
|
||||
|
@ -34,10 +34,13 @@ using namespace std;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
std::vector<GenerationDictionary*> GenerationDictionary::s_staticColl;
|
||||
|
||||
GenerationDictionary::GenerationDictionary(const std::string &line)
|
||||
: DecodeFeature(line)
|
||||
{
|
||||
s_staticColl.push_back(this);
|
||||
|
||||
ReadParameters();
|
||||
}
|
||||
|
||||
|
@ -46,12 +46,18 @@ class GenerationDictionary : public DecodeFeature
|
||||
{
|
||||
typedef std::map<const Word* , OutputWordCollection, WordComparer> Collection;
|
||||
protected:
|
||||
static std::vector<GenerationDictionary*> s_staticColl;
|
||||
|
||||
Collection m_collection;
|
||||
// 1st = source
|
||||
// 2nd = target
|
||||
std::string m_filePath;
|
||||
|
||||
public:
|
||||
static const std::vector<GenerationDictionary*>& GetColl() {
|
||||
return s_staticColl;
|
||||
}
|
||||
|
||||
GenerationDictionary(const std::string &line);
|
||||
virtual ~GenerationDictionary();
|
||||
|
||||
|
@ -335,7 +335,7 @@ void Hypothesis::CleanupArcList()
|
||||
|
||||
if (!distinctNBest && m_arcList->size() > nBestSize * 5) {
|
||||
// prune arc list only if there too many arcs
|
||||
nth_element(m_arcList->begin()
|
||||
NTH_ELEMENT4(m_arcList->begin()
|
||||
, m_arcList->begin() + nBestSize - 1
|
||||
, m_arcList->end()
|
||||
, CompareHypothesisTotalScore());
|
||||
|
@ -119,8 +119,10 @@ LMResult LanguageModelDALM::GetValue(const vector<const Word*> &contextFactor, S
|
||||
ret.score = score;
|
||||
|
||||
// hash of n-1 words to use as state
|
||||
size_t startPos = (contextFactor.size() < m_nGramOrder) ? 0 : 1;
|
||||
|
||||
size_t hash = 0;
|
||||
for (size_t i = 1; i < contextFactor.size(); ++i) {
|
||||
for (size_t i = startPos; i < contextFactor.size(); ++i) {
|
||||
const Word &word = *contextFactor[i];
|
||||
const Factor *factor = word.GetFactor(m_factorType);
|
||||
boost::hash_combine(hash, factor);
|
||||
|
@ -464,7 +464,7 @@ void OutputWordGraph(std::ostream &outputWordGraphStream, const Hypothesis *hypo
|
||||
|
||||
// phrase table scores
|
||||
const StaticData &staticData = StaticData::Instance();
|
||||
const std::vector<PhraseDictionary*> &phraseTables = staticData.GetPhraseDictionaries();
|
||||
const std::vector<PhraseDictionary*> &phraseTables = PhraseDictionary::GetColl();
|
||||
std::vector<PhraseDictionary*>::const_iterator iterPhraseTable;
|
||||
for (iterPhraseTable = phraseTables.begin() ; iterPhraseTable != phraseTables.end() ; ++iterPhraseTable) {
|
||||
const PhraseDictionary *phraseTable = *iterPhraseTable;
|
||||
@ -687,11 +687,11 @@ void Manager::OutputFeatureWeightsForSLF(std::ostream &outputSearchGraphStream)
|
||||
featureIndex = OutputFeatureWeightsForSLF(featureIndex, slf[i], outputSearchGraphStream);
|
||||
}
|
||||
}
|
||||
const vector<PhraseDictionary*>& pds = staticData.GetPhraseDictionaries();
|
||||
const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
|
||||
for( size_t i=0; i<pds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureWeightsForSLF(featureIndex, pds[i], outputSearchGraphStream);
|
||||
}
|
||||
const vector<const GenerationDictionary*>& gds = staticData.GetGenerationDictionaries();
|
||||
const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
|
||||
for( size_t i=0; i<gds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureWeightsForSLF(featureIndex, gds[i], outputSearchGraphStream);
|
||||
}
|
||||
@ -726,11 +726,11 @@ void Manager::OutputFeatureValuesForSLF(const Hypothesis* hypo, bool zeros, std:
|
||||
featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, slf[i], outputSearchGraphStream);
|
||||
}
|
||||
}
|
||||
const vector<PhraseDictionary*>& pds = staticData.GetPhraseDictionaries();
|
||||
const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
|
||||
for( size_t i=0; i<pds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, pds[i], outputSearchGraphStream);
|
||||
}
|
||||
const vector<const GenerationDictionary*>& gds = staticData.GetGenerationDictionaries();
|
||||
const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
|
||||
for( size_t i=0; i<gds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureValuesForSLF(featureIndex, zeros, hypo, gds[i], outputSearchGraphStream);
|
||||
}
|
||||
@ -760,11 +760,11 @@ void Manager::OutputFeatureValuesForHypergraph(const Hypothesis* hypo, std::ostr
|
||||
featureIndex = OutputFeatureValuesForHypergraph(featureIndex, hypo, slf[i], outputSearchGraphStream);
|
||||
}
|
||||
}
|
||||
const vector<PhraseDictionary*>& pds = staticData.GetPhraseDictionaries();
|
||||
const vector<PhraseDictionary*>& pds = PhraseDictionary::GetColl();
|
||||
for( size_t i=0; i<pds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureValuesForHypergraph(featureIndex, hypo, pds[i], outputSearchGraphStream);
|
||||
}
|
||||
const vector<const GenerationDictionary*>& gds = staticData.GetGenerationDictionaries();
|
||||
const vector<GenerationDictionary*>& gds = GenerationDictionary::GetColl();
|
||||
for( size_t i=0; i<gds.size(); i++ ) {
|
||||
featureIndex = OutputFeatureValuesForHypergraph(featureIndex, hypo, gds[i], outputSearchGraphStream);
|
||||
}
|
||||
|
@ -47,7 +47,7 @@ protected:
|
||||
m_inputFeature = staticData.GetInputFeature();
|
||||
|
||||
if (m_inputFeature) {
|
||||
const PhraseDictionary *firstPt = staticData.GetPhraseDictionaries()[0];
|
||||
const PhraseDictionary *firstPt = PhraseDictionary::GetColl()[0];
|
||||
if (firstPt == m_obj) {
|
||||
m_numInputScores = m_inputFeature->GetNumScoreComponents();
|
||||
}
|
||||
@ -324,7 +324,7 @@ public:
|
||||
m_obj->m_tableLimit : costs.size());
|
||||
|
||||
// find the nth phrase according to future cost
|
||||
std::nth_element(costs.begin(),nth ,costs.end());
|
||||
NTH_ELEMENT3(costs.begin(),nth ,costs.end());
|
||||
|
||||
// add n top phrases to the return list
|
||||
for(std::vector<std::pair<float,size_t> >::iterator
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user