This commit is contained in:
Ulrich Germann 2014-06-27 01:12:22 +01:00
commit 5c77f17833
155 changed files with 16959 additions and 4810 deletions

View File

@ -66,10 +66,9 @@ int main (int argc, char * const argv[])
PhraseNode &rootNode = onDiskWrapper.GetRootSourceNode();
size_t lineNum = 0;
char line[100000];
string line;
//while(getline(inStream, line))
while(inStream.getline(line, 100000)) {
while(getline(inStream, line)) {
lineNum++;
if (lineNum%1000 == 0) cerr << "." << flush;
if (lineNum%10000 == 0) cerr << ":" << flush;
@ -107,8 +106,13 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc
return ret;
}
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, const std::string &lineStr, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
char line[lineStr.size() + 1];
strcpy(line, lineStr.c_str());
stringstream sparseFeatures, property;
size_t scoreInd = 0;
// MAIN LOOP
@ -118,6 +122,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
2 = scores
3 = align
4 = count
7 = properties
*/
char *tok = strtok (line," ");
OnDiskPt::PhrasePtr out(new Phrase());
@ -148,29 +153,20 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
targetPhrase.CreateAlignFromString(tok);
break;
}
case 4:
++stage;
break;
/* case 5: {
// count info. Only store the 2nd one
float val = Moses::Scan<float>(tok);
misc[0] = val;
++stage;
break;
}*/
case 4: {
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
break;
}
case 5: {
// count info. Only store the 2nd one
//float val = Moses::Scan<float>(tok);
//misc[0] = val;
++stage;
// sparse features
sparseFeatures << tok << " ";
break;
}
case 6: {
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
++stage;
break;
property << tok << " ";
break;
}
default:
cerr << "ERROR in line " << line << endl;
@ -183,6 +179,8 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
} // while (tok != NULL)
assert(scoreInd == numScores);
targetPhrase.SetSparseFeatures(Moses::Trim(sparseFeatures.str()));
targetPhrase.SetProperty(Moses::Trim(property.str()));
targetPhrase.SortAlign();
return out;
} // Tokenize()

View File

@ -29,7 +29,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
, OnDiskPt::OnDiskWrapper &onDiskWrapper, int retSourceTarget);
OnDiskPt::PhrasePtr Tokenize(OnDiskPt::SourcePhrase &sourcePhrase, OnDiskPt::TargetPhrase &targetPhrase
, char *line, OnDiskPt::OnDiskWrapper &onDiskWrapper
, const std::string &lineStr, OnDiskPt::OnDiskWrapper &onDiskWrapper
, int numScores
, std::vector<float> &misc);

View File

@ -31,7 +31,7 @@ using namespace std;
namespace OnDiskPt
{
int OnDiskWrapper::VERSION_NUM = 5;
int OnDiskWrapper::VERSION_NUM = 7;
OnDiskWrapper::OnDiskWrapper()
{

View File

@ -162,10 +162,14 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// allocate mem
size_t numScores = onDiskWrapper.GetNumScores()
,numAlign = GetAlign().size();
size_t sparseFeatureSize = m_sparseFeatures.size();
size_t propSize = m_property.size();
size_t memNeeded = sizeof(UINT64); // file pos (phrase id)
memNeeded += sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign; // align
memNeeded += sizeof(float) * numScores; // scores
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ sizeof(float) * numScores // scores
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
+ sizeof(UINT64) + propSize; // property string
char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded);
@ -183,11 +187,33 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// scores
memUsed += WriteScoresToMemory(mem + memUsed);
// sparse features
memUsed += WriteStringToMemory(mem + memUsed, m_sparseFeatures);
// property string
memUsed += WriteStringToMemory(mem + memUsed, m_property);
//DebugMem(mem, memNeeded);
assert(memNeeded == memUsed);
return mem;
}
size_t TargetPhrase::WriteStringToMemory(char *mem, const std::string &str) const
{
size_t memUsed = 0;
UINT64 *memTmp = (UINT64*) mem;
size_t strSize = str.size();
memTmp[0] = strSize;
memUsed += sizeof(UINT64);
const char *charStr = str.c_str();
memcpy(mem + memUsed, charStr, strSize);
memUsed += strSize;
return memUsed;
}
size_t TargetPhrase::WriteAlignToMemory(char *mem) const
{
size_t memUsed = 0;
@ -279,6 +305,13 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
// scores
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
// sparse features
ret->GetScoreBreakdown().Assign(&phraseDict, m_sparseFeatures);
// property
ret->SetProperties(m_property);
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
return ret;
@ -299,9 +332,36 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
memUsed += ReadScoresFromFile(fileTPColl);
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());
// sparse features
memUsed += ReadStringFromFile(fileTPColl, m_sparseFeatures);
// properties
memUsed += ReadStringFromFile(fileTPColl, m_property);
return memUsed;
}
UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &outStr)
{
UINT64 bytesRead = 0;
UINT64 strSize;
fileTPColl.read((char*) &strSize, sizeof(UINT64));
bytesRead += sizeof(UINT64);
if (strSize) {
char *mem = (char*) malloc(strSize + 1);
mem[strSize] = '\0';
fileTPColl.read(mem, strSize);
outStr = string(mem);
free(mem);
bytesRead += strSize;
}
return bytesRead;
}
UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP)
{
UINT64 bytesRead = 0;

View File

@ -50,15 +50,18 @@ class TargetPhrase: public Phrase
protected:
AlignType m_align;
PhrasePtr m_sourcePhrase;
std::string m_sparseFeatures, m_property;
std::vector<float> m_scores;
UINT64 m_filePos;
size_t WriteAlignToMemory(char *mem) const;
size_t WriteScoresToMemory(char *mem) const;
size_t WriteStringToMemory(char *mem, const std::string &str) const;
UINT64 ReadAlignFromFile(std::fstream &fileTPColl);
UINT64 ReadScoresFromFile(std::fstream &fileTPColl);
UINT64 ReadStringFromFile(std::fstream &fileTPColl, std::string &outStr);
public:
TargetPhrase() {
@ -110,6 +113,15 @@ public:
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
void SetProperty(const std::string &value)
{
m_property = value;
}
void SetSparseFeatures(const std::string &value)
{
m_sparseFeatures = value;
}
};
}

View File

@ -73,7 +73,7 @@ def gather_necessary_lines(logfile, date):
def append_date_to_table(resline):
"""Appends past dates to the html"""
cur_html = '<td>' + str(resline.current) + '</td>'
cur_html = '<td>' + str(resline.previous) + '</td>'
if resline.percentage > 0.05: #If we have improvement of more than 5%
cur_html = cur_html + '<td class="better">' + str(resline.percentage) + '</td>'
@ -129,7 +129,7 @@ def produce_html(path, global_config):
#Get the lines from the config file
(ll1, ll2) = getLastTwoLines(filenam, global_config.testlogs)
logLine1 = processLogLine(ll1)
logLine2 = processLogLine(ll2)
logLine2 = processLogLine(ll2) #This is the life from the latest revision
#Generate html
res1 = Result(logLine1.testname, logLine1.real, logLine2.real,\

View File

@ -0,0 +1,157 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.602770742">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.602770742" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.602770742" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.602770742." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1436139469" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.622899770" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1448999623" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2139008298" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2008193341" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.627728792" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1832148270" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1681469807" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.425758466" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.285185442" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.587301391" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2116328611" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.2129089003" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1464765114" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.606542044" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.813817495" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.1325292383" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>
<listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="dl"/>
</option>
<option id="gnu.cpp.link.option.paths.815001500" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2077999464" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1309273058" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1393504995" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.168814843">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.168814843" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.168814843" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.168814843." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.844577457" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1635721038" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/CreateOnDiskPt}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.361379130" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.799410017" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1404799808" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.696270987" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1052942304" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2139553528" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1633770352" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1936692829" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.2077864052" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1045097629" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.455462639" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.868037913" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1997666824" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.552535001" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.546084937" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="CreateOnDiskPt.cdt.managedbuild.target.gnu.exe.348559778" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.168814843;cdt.managedbuild.config.gnu.exe.release.168814843.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1404799808;cdt.managedbuild.tool.gnu.cpp.compiler.input.2139553528">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.602770742;cdt.managedbuild.config.gnu.exe.debug.602770742.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.587301391;cdt.managedbuild.tool.gnu.c.compiler.input.1464765114">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.168814843;cdt.managedbuild.config.gnu.exe.release.168814843.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1633770352;cdt.managedbuild.tool.gnu.c.compiler.input.1045097629">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.602770742;cdt.managedbuild.config.gnu.exe.debug.602770742.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2008193341;cdt.managedbuild.tool.gnu.cpp.compiler.input.285185442">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope"/>
</cproject>

View File

@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>CreateOnDiskPt</name>
<comment></comment>
<projects>
<project>lm</project>
<project>moses</project>
<project>OnDiskPt</project>
<project>search</project>
<project>util</project>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.h</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2091728208">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -14,41 +14,41 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1312813804" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1457158442" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/consolidate}/Debug" id="cdt.managedbuild.builder.gnu.cross.401817170" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.584773180" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.548826159" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.69309976" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1869389417" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1684035985" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1978964587" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1174628687" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1899244069" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2091728208" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2091728208." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.69362991" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.641760346" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/consolidate}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1286696537" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1571215005" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1186248186" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1416850495" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.534201039" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1369007077" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1468157552" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.988122551" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.580092188" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1224797947" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.82249493" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.83105790" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.937329669" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.461173729" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1950007837" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.110628197" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.1393924562" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.845281969" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<option id="gnu.cpp.link.option.paths.1967422094" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1562981657" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1093223502" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1813579853" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.660034723" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.2016181080" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1334927727" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.197989377" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -56,8 +56,8 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473" moduleId="org.eclipse.cdt.core.settings" name="Release">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.185559773">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.185559773" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -69,31 +69,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1197533473." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1193312581" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1614674218" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/consolidate}/Release" id="cdt.managedbuild.builder.gnu.cross.1921548268" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1402792534" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.172258714" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.949623548" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1960225725" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.185559773" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.185559773." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.33298530" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1524270442" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/consolidate}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1812036307" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1942293389" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.520681695" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.649091161" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1279967053" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.624630717" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1697856596" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1575999400" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.732263649" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1685852561" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.233526141" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1882834640" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.1438334736" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1338220126" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1332869586" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.484647585" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2140954002" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.2105674082" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1531731895" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.286541559" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.620666274" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1478840357" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.412043972" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1075374533" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.231041028" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -103,30 +103,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="consolidate.cdt.managedbuild.target.gnu.cross.exe.1166003694" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
<project id="consolidate.cdt.managedbuild.target.gnu.exe.1024637209" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686;cdt.managedbuild.config.gnu.cross.exe.debug.1847651686.;cdt.managedbuild.tool.gnu.cross.c.compiler.584773180;cdt.managedbuild.tool.gnu.c.compiler.input.1869389417">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2091728208;cdt.managedbuild.config.gnu.exe.debug.2091728208.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1626949654;cdt.managedbuild.tool.gnu.cpp.compiler.input.1468157552">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1197533473;cdt.managedbuild.config.gnu.cross.exe.release.1197533473.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1697856596;cdt.managedbuild.tool.gnu.cpp.compiler.input.1685852561">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2091728208;cdt.managedbuild.config.gnu.exe.debug.2091728208.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.82249493;cdt.managedbuild.tool.gnu.c.compiler.input.461173729">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1847651686;cdt.managedbuild.config.gnu.cross.exe.debug.1847651686.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1684035985;cdt.managedbuild.tool.gnu.cpp.compiler.input.1369007077">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.185559773;cdt.managedbuild.config.gnu.exe.release.185559773.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.233526141;cdt.managedbuild.tool.gnu.c.compiler.input.1338220126">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1197533473;cdt.managedbuild.config.gnu.cross.exe.release.1197533473.;cdt.managedbuild.tool.gnu.cross.c.compiler.1402792534;cdt.managedbuild.tool.gnu.c.compiler.input.1960225725">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.185559773;cdt.managedbuild.config.gnu.exe.release.185559773.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.520681695;cdt.managedbuild.tool.gnu.cpp.compiler.input.624630717">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/consolidate"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/consolidate"/>
</configuration>
</storageModule>
<storageModule moduleId="refreshScope"/>
</cproject>

View File

@ -1,59 +1,54 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1975272196">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1975272196" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../phrase-extract&quot;"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1975272196" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1975272196." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1513645956" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.621141597" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/extract-ghkm}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1641243676" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.150240237" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.494510261" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.520735766" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.730994342" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1461708548" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1669405610" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.849972124" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.154971011" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.600284918" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2129236570" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1041890522" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.674199351" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.1221354875" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<option id="gnu.cpp.link.option.paths.1494157787" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1468265945" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.882941613" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.387904024" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -61,44 +56,44 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1834059581">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1834059581" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1834059581" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1834059581." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.154645030" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.483189041" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/extract-ghkm}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.882065438" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1816735709" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.788831102" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1367749352" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.1361465069" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.162097682" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.394449415" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.573463904" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.361552728" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.769108402" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1636823200" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1458872383" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.961080011" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1285290074" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1686210477" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -108,30 +103,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
<project id="extract-ghkm.cdt.managedbuild.target.gnu.exe.283582370" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1975272196;cdt.managedbuild.config.gnu.exe.debug.1975272196.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.494510261;cdt.managedbuild.tool.gnu.cpp.compiler.input.1669405610">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1834059581;cdt.managedbuild.config.gnu.exe.release.1834059581.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.394449415;cdt.managedbuild.tool.gnu.c.compiler.input.769108402">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1975272196;cdt.managedbuild.config.gnu.exe.debug.1975272196.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.849972124;cdt.managedbuild.tool.gnu.c.compiler.input.2129236570">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1834059581;cdt.managedbuild.config.gnu.exe.release.1834059581.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.788831102;cdt.managedbuild.tool.gnu.cpp.compiler.input.162097682">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
</configuration>
</storageModule>
<storageModule moduleId="refreshScope"/>
</cproject>

View File

@ -26,49 +26,19 @@
</natures>
<linkedResources>
<link>
<name>Alignment.cpp</name>
<name>Hole.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
</link>
<link>
<name>Alignment.h</name>
<name>HoleCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
</link>
<link>
<name>AlignmentGraph.cpp</name>
<name>HoleCollection.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI>
</link>
<link>
<name>AlignmentGraph.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI>
</link>
<link>
<name>ComposedRule.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI>
</link>
<link>
<name>ComposedRule.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI>
</link>
<link>
<name>Exception.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI>
</link>
<link>
<name>ExtractGHKM.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI>
</link>
<link>
<name>ExtractGHKM.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
</link>
<link>
<name>InputFileStream.cpp</name>
@ -80,31 +50,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
</link>
<link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI>
</link>
<link>
<name>Node.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI>
</link>
<link>
<name>Node.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI>
</link>
<link>
<name>Options.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI>
</link>
<link>
<name>OutputFileStream.cpp</name>
<type>1</type>
@ -116,54 +61,24 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>ParseTree.cpp</name>
<name>SentenceAlignment.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
</link>
<link>
<name>ParseTree.h</name>
<name>SentenceAlignment.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
</link>
<link>
<name>ScfgRule.cpp</name>
<name>SentenceAlignmentWithSyntax.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp</locationURI>
</link>
<link>
<name>ScfgRule.h</name>
<name>SentenceAlignmentWithSyntax.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI>
</link>
<link>
<name>ScfgRuleWriter.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI>
</link>
<link>
<name>ScfgRuleWriter.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI>
</link>
<link>
<name>Span.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI>
</link>
<link>
<name>Span.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI>
</link>
<link>
<name>Subgraph.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI>
</link>
<link>
<name>Subgraph.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
</link>
<link>
<name>SyntaxTree.cpp</name>
@ -186,14 +101,9 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
</link>
<link>
<name>XmlTreeParser.cpp</name>
<name>extract-rules-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI>
</link>
<link>
<name>XmlTreeParser.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
</link>
<link>
<name>tables-core.cpp</name>

View File

@ -5,12 +5,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -27,18 +27,21 @@
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.855436310" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.506549229" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1497326561" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<option id="gnu.cpp.compiler.option.include.paths.1497326561" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:/${ProjName}}&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.606353571" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.740521305" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1946120010" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<option id="gnu.cpp.link.option.paths.1563475751" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<option id="gnu.cpp.link.option.paths.1563475751" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.106010037" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
@ -52,6 +55,9 @@
</tool>
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="util/sorted_uniform_test.cc|util/sized_iterator_test.cc|util/read_compressed_test.cc|util/probing_hash_table_test.cc|util/joint_sort_test.cc|util/multi_intersection_test.cc|util/file_piece_test.cc|util/bit_packing_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
@ -60,12 +66,12 @@
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
@ -130,4 +136,5 @@
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -0,0 +1,189 @@
/*
* AlignedSentence.cpp
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#include <sstream>
#include "moses/Util.h"
#include "AlignedSentence.h"
#include "Parameter.h"
using namespace std;
/////////////////////////////////////////////////////////////////////////////////
AlignedSentence::AlignedSentence(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment)
:m_lineNum(lineNum)
{
PopulateWordVec(m_source, source);
PopulateWordVec(m_target, target);
PopulateAlignment(alignment);
}
AlignedSentence::~AlignedSentence() {
Moses::RemoveAllInColl(m_source);
Moses::RemoveAllInColl(m_target);
}
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line)
{
std::vector<string> toks;
Moses::Tokenize(toks, line);
vec.resize(toks.size());
for (size_t i = 0; i < vec.size(); ++i) {
const string &tok = toks[i];
Word *word = new Word(i, tok);
vec[i] = word;
}
}
void AlignedSentence::PopulateAlignment(const std::string &line)
{
vector<string> alignStr;
Moses::Tokenize(alignStr, line);
for (size_t i = 0; i < alignStr.size(); ++i) {
vector<int> alignPair;
Moses::Tokenize(alignPair, alignStr[i], "-");
assert(alignPair.size() == 2);
int sourcePos = alignPair[0];
int targetPos = alignPair[1];
if (sourcePos >= m_source.size()) {
cerr << "ERROR1:AlignedSentence=" << Debug() << endl;
cerr << "m_source=" << m_source.size() << endl;
abort();
}
assert(sourcePos < m_source.size());
assert(targetPos < m_target.size());
Word *sourceWord = m_source[sourcePos];
Word *targetWord = m_target[targetPos];
sourceWord->AddAlignment(targetWord);
targetWord->AddAlignment(sourceWord);
}
}
std::string AlignedSentence::Debug() const
{
stringstream out;
out << "m_lineNum:";
out << m_lineNum;
out << endl;
out << "m_source:";
out << m_source.Debug();
out << endl;
out << "m_target:";
out << m_target.Debug();
out << endl;
out << "consistent phrases:" << endl;
out << m_consistentPhrases.Debug();
out << endl;
return out.str();
}
std::vector<int> AlignedSentence::GetSourceAlignmentCount() const
{
vector<int> ret(m_source.size());
for (size_t i = 0; i < m_source.size(); ++i) {
const Word &word = *m_source[i];
ret[i] = word.GetAlignmentIndex().size();
}
return ret;
}
void AlignedSentence::Create(const Parameter &params)
{
CreateConsistentPhrases(params);
m_consistentPhrases.AddHieroNonTerms(params);
}
void AlignedSentence::CreateConsistentPhrases(const Parameter &params)
{
int countT = m_target.size();
int countS = m_source.size();
m_consistentPhrases.Initialize(countS);
// check alignments for target phrase startT...endT
for(int lengthT=1;
lengthT <= params.maxSpan && lengthT <= countT;
lengthT++) {
for(int startT=0; startT < countT-(lengthT-1); startT++) {
// that's nice to have
int endT = startT + lengthT - 1;
// find find aligned source words
// first: find minimum and maximum source word
int minS = 9999;
int maxS = -1;
vector< int > usedS = GetSourceAlignmentCount();
for(int ti=startT; ti<=endT; ti++) {
const Word &word = *m_target[ti];
const std::set<int> &alignment = word.GetAlignmentIndex();
std::set<int>::const_iterator iterAlign;
for(iterAlign = alignment.begin(); iterAlign != alignment.end(); ++iterAlign) {
int si = *iterAlign;
if (si<minS) {
minS = si;
}
if (si>maxS) {
maxS = si;
}
usedS[ si ]--;
}
}
// unaligned phrases are not allowed
if( maxS == -1 )
continue;
// source phrase has to be within limits
if( maxS-minS >= params.maxSpan )
continue;
// check if source words are aligned to out of bound target words
bool out_of_bounds = false;
for(int si=minS; si<=maxS && !out_of_bounds; si++)
if (usedS[si]>0) {
out_of_bounds = true;
}
// if out of bound, you gotta go
if (out_of_bounds)
continue;
// done with all the checks, lets go over all consistent phrase pairs
// start point of source phrase may retreat over unaligned
for(int startS=minS;
(startS>=0 &&
startS>maxS - params.maxSpan && // within length limit
(startS==minS || m_source[startS]->GetAlignment().size()==0)); // unaligned
startS--) {
// end point of source phrase may advance over unaligned
for(int endS=maxS;
(endS<countS && endS<startS + params.maxSpan && // within length limit
(endS==maxS || m_source[endS]->GetAlignment().size()==0)); // unaligned
endS++) {
// take note that this is a valid phrase alignment
m_consistentPhrases.Add(startS, endS, startT, endT, params);
}
}
}
}
}

View File

@ -0,0 +1,51 @@
/*
* AlignedSentence.h
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#pragma once
#include <string>
#include <set>
#include "ConsistentPhrases.h"
#include "Phrase.h"
#include "moses/TypeDef.h"
class Parameter;
class AlignedSentence {
public:
AlignedSentence(int lineNum)
:m_lineNum(lineNum)
{}
AlignedSentence(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment);
virtual ~AlignedSentence();
virtual void Create(const Parameter &params);
const Phrase &GetPhrase(Moses::FactorDirection direction) const
{ return (direction == Moses::Input) ? m_source : m_target; }
const ConsistentPhrases &GetConsistentPhrases() const
{ return m_consistentPhrases; }
virtual std::string Debug() const;
int m_lineNum;
protected:
Phrase m_source, m_target;
ConsistentPhrases m_consistentPhrases;
void CreateConsistentPhrases(const Parameter &params);
void PopulateWordVec(Phrase &vec, const std::string &line);
// m_source and m_target MUST be populated before calling this
void PopulateAlignment(const std::string &line);
std::vector<int> GetSourceAlignmentCount() const;
};

View File

@ -0,0 +1,183 @@
/*
* AlignedSentenceSyntax.cpp
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#include "AlignedSentenceSyntax.h"
#include "Parameter.h"
#include "pugixml.hpp"
#include "moses/Util.h"
using namespace std;
AlignedSentenceSyntax::AlignedSentenceSyntax(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment)
:AlignedSentence(lineNum)
,m_sourceStr(source)
,m_targetStr(target)
,m_alignmentStr(alignment)
{
}
AlignedSentenceSyntax::~AlignedSentenceSyntax() {
// TODO Auto-generated destructor stub
}
void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
string line, Phrase &phrase, SyntaxTree &tree)
{
// parse source and target string
if (isSyntax) {
line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
XMLParse(phrase, tree, line, params);
if (mixedSyntaxType != 0) {
// mixed syntax. Always add [X] where there isn't 1
tree.SetHieroLabel(params.hieroNonTerm);
if (mixedSyntaxType == 2) {
tree.AddToAll(params.hieroNonTerm);
}
}
}
else {
PopulateWordVec(phrase, line);
tree.SetHieroLabel(params.hieroNonTerm);
}
}
void AlignedSentenceSyntax::Create(const Parameter &params)
{
Populate(params.sourceSyntax, params.mixedSyntaxType, params, m_sourceStr,
m_source, m_sourceTree);
Populate(params.targetSyntax, params.mixedSyntaxType, params, m_targetStr,
m_target, m_targetTree);
PopulateAlignment(m_alignmentStr);
CreateConsistentPhrases(params);
// create labels
CreateNonTerms();
}
void Escape(string &text)
{
text = Moses::Replace(text, "&", "&amp;");
text = Moses::Replace(text, "|", "&#124;");
text = Moses::Replace(text, "<", "&lt;");
text = Moses::Replace(text, ">", "&gt;");
text = Moses::Replace(text, "'", "&apos;");
text = Moses::Replace(text, "\"", "&quot;");
text = Moses::Replace(text, "[", "&#91;");
text = Moses::Replace(text, "]", "&#93;");
}
void AlignedSentenceSyntax::XMLParse(Phrase &output,
SyntaxTree &tree,
const pugi::xml_node &parentNode,
const Parameter &params)
{
int childNum = 0;
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling())
{
string nodeName = childNode.name();
// span label
string label;
int startPos = output.size();
if (!nodeName.empty()) {
pugi::xml_attribute attribute = childNode.attribute("label");
label = attribute.as_string();
// recursively call this function. For proper recursive trees
XMLParse(output, tree, childNode, params);
}
// fill phrase vector
string text = childNode.value();
Escape(text);
//cerr << childNum << " " << label << "=" << text << endl;
std::vector<string> toks;
Moses::Tokenize(toks, text);
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
Word *word = new Word(output.size(), tok);
output.push_back(word);
}
// is it a labelled span?
int endPos = output.size() - 1;
// fill syntax labels
if (!label.empty()) {
label = "[" + label + "]";
tree.Add(startPos, endPos, label, params);
}
++childNum;
}
}
void AlignedSentenceSyntax::XMLParse(Phrase &output,
SyntaxTree &tree,
const std::string input,
const Parameter &params)
{
pugi::xml_document doc;
pugi::xml_parse_result result = doc.load(input.c_str(),
pugi::parse_default | pugi::parse_comments);
pugi::xml_node topNode = doc.child("xml");
XMLParse(output, tree, topNode, params);
}
void AlignedSentenceSyntax::CreateNonTerms()
{
for (int sourceStart = 0; sourceStart < m_source.size(); ++sourceStart) {
for (int sourceEnd = sourceStart; sourceEnd < m_source.size(); ++sourceEnd) {
ConsistentPhrases::Coll &coll = m_consistentPhrases.GetColl(sourceStart, sourceEnd);
const SyntaxTree::Labels &sourceLabels = m_sourceTree.Find(sourceStart, sourceEnd);
ConsistentPhrases::Coll::iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
ConsistentPhrase &cp = **iter;
int targetStart = cp.corners[2];
int targetEnd = cp.corners[3];
const SyntaxTree::Labels &targetLabels = m_targetTree.Find(targetStart, targetEnd);
CreateNonTerms(cp, sourceLabels, targetLabels);
}
}
}
}
void AlignedSentenceSyntax::CreateNonTerms(ConsistentPhrase &cp,
const SyntaxTree::Labels &sourceLabels,
const SyntaxTree::Labels &targetLabels)
{
SyntaxTree::Labels::const_iterator iterSource;
for (iterSource = sourceLabels.begin(); iterSource != sourceLabels.end(); ++iterSource) {
const string &sourceLabel = *iterSource;
SyntaxTree::Labels::const_iterator iterTarget;
for (iterTarget = targetLabels.begin(); iterTarget != targetLabels.end(); ++iterTarget) {
const string &targetLabel = *iterTarget;
cp.AddNonTerms(sourceLabel, targetLabel);
}
}
}

View File

@ -0,0 +1,46 @@
/*
* AlignedSentenceSyntax.h
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#pragma once
#include "AlignedSentence.h"
#include "SyntaxTree.h"
#include "pugixml.hpp"
class AlignedSentenceSyntax : public AlignedSentence
{
public:
AlignedSentenceSyntax(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment);
virtual ~AlignedSentenceSyntax();
void Create(const Parameter &params);
//virtual std::string Debug() const;
protected:
std::string m_sourceStr, m_targetStr, m_alignmentStr;
SyntaxTree m_sourceTree, m_targetTree;
void XMLParse(Phrase &output,
SyntaxTree &tree,
const std::string input,
const Parameter &params);
void XMLParse(Phrase &output,
SyntaxTree &tree,
const pugi::xml_node &parentNode,
const Parameter &params);
void CreateNonTerms();
void CreateNonTerms(ConsistentPhrase &cp,
const SyntaxTree::Labels &sourceLabels,
const SyntaxTree::Labels &targetLabels);
void Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
std::string line, Phrase &phrase, SyntaxTree &tree);
};

View File

@ -0,0 +1,66 @@
/*
* ConsistentPhrase.cpp
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "ConsistentPhrase.h"
#include "Word.h"
#include "NonTerm.h"
#include "Parameter.h"
using namespace std;
ConsistentPhrase::ConsistentPhrase(
int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params)
:corners(4)
,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
{
corners[0] = sourceStart;
corners[1] = sourceEnd;
corners[2] = targetStart;
corners[3] = targetEnd;
}
ConsistentPhrase::~ConsistentPhrase() {
// TODO Auto-generated destructor stub
}
bool ConsistentPhrase::operator<(const ConsistentPhrase &other) const
{
return corners < other.corners;
}
void ConsistentPhrase::AddNonTerms(const std::string &source,
const std::string &target)
{
m_nonTerms.push_back(NonTerm(*this, source, target));
}
bool ConsistentPhrase::TargetOverlap(const ConsistentPhrase &other) const
{
if ( other.corners[3] < corners[2] || other.corners[2] > corners[3])
return false;
return true;
}
std::string ConsistentPhrase::Debug() const
{
stringstream out;
out << "[" << corners[0] << "-" << corners[1]
<< "][" << corners[2] << "-" << corners[3] << "]";
out << "NT:";
for (size_t i = 0; i < m_nonTerms.size(); ++i) {
const NonTerm &nonTerm = m_nonTerms[i];
out << nonTerm.GetLabel(Moses::Input) << ":" << nonTerm.GetLabel(Moses::Output);
}
return out.str();
}

View File

@ -0,0 +1,51 @@
/*
* ConsistentPhrase.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <cassert>
#include <vector>
#include <iostream>
#include "moses/TypeDef.h"
#include "NonTerm.h"
class ConsistentPhrase
{
public:
typedef std::vector<NonTerm> NonTerms;
std::vector<int> corners;
ConsistentPhrase(const ConsistentPhrase &copy); // do not implement
ConsistentPhrase(int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params);
virtual ~ConsistentPhrase();
int GetWidth(Moses::FactorDirection direction) const
{ return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1; }
void AddNonTerms(const std::string &source,
const std::string &target);
const NonTerms &GetNonTerms() const
{ return m_nonTerms;}
const NonTerm &GetHieroNonTerm() const
{ return m_hieroNonTerm;}
bool TargetOverlap(const ConsistentPhrase &other) const;
bool operator<(const ConsistentPhrase &other) const;
std::string Debug() const;
protected:
NonTerms m_nonTerms;
NonTerm m_hieroNonTerm;
};

View File

@ -0,0 +1,103 @@
/*
* ConsistentPhrases.cpp
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#include <sstream>
#include <cassert>
#include "ConsistentPhrases.h"
#include "NonTerm.h"
#include "Parameter.h"
#include "moses/Util.h"
using namespace std;
ConsistentPhrases::ConsistentPhrases()
{
}
ConsistentPhrases::~ConsistentPhrases() {
for (int start = 0; start < m_coll.size(); ++start) {
std::vector<Coll> &allSourceStart = m_coll[start];
for (int size = 0; size < allSourceStart.size(); ++size) {
Coll &coll = allSourceStart[size];
Moses::RemoveAllInColl(coll);
}
}
}
void ConsistentPhrases::Initialize(size_t size)
{
m_coll.resize(size);
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
allSourceStart.resize(size - sourceStart);
}
}
void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params)
{
Coll &coll = m_coll[sourceStart][sourceEnd - sourceStart];
ConsistentPhrase *cp = new ConsistentPhrase(sourceStart, sourceEnd,
targetStart, targetEnd,
params);
pair<Coll::iterator, bool> inserted = coll.insert(cp);
assert(inserted.second);
}
const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const
{
const std::vector<Coll> &allSourceStart = m_coll[sourceStart];
const Coll &ret = allSourceStart[sourceEnd - sourceStart];
return ret;
}
ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd)
{
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
Coll &ret = allSourceStart[sourceEnd - sourceStart];
return ret;
}
std::string ConsistentPhrases::Debug() const
{
std::stringstream out;
for (int start = 0; start < m_coll.size(); ++start) {
const std::vector<Coll> &allSourceStart = m_coll[start];
for (int size = 0; size < allSourceStart.size(); ++size) {
const Coll &coll = allSourceStart[size];
Coll::const_iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
const ConsistentPhrase &consistentPhrase = **iter;
out << consistentPhrase.Debug() << endl;
}
}
}
return out.str();
}
void ConsistentPhrases::AddHieroNonTerms(const Parameter &params)
{
// add [X] labels everywhere
for (int i = 0; i < m_coll.size(); ++i) {
vector<Coll> &inner = m_coll[i];
for (int j = 0; j < inner.size(); ++j) {
ConsistentPhrases::Coll &coll = inner[j];
ConsistentPhrases::Coll::iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
ConsistentPhrase &cp = **iter;
cp.AddNonTerms(params.hieroNonTerm, params.hieroNonTerm);
}
}
}
}

View File

@ -0,0 +1,40 @@
/*
* ConsistentPhrases.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <set>
#include <vector>
#include <iostream>
#include "ConsistentPhrase.h"
class Word;
class Parameter;
class ConsistentPhrases {
public:
typedef std::set<ConsistentPhrase*> Coll;
ConsistentPhrases();
virtual ~ConsistentPhrases();
void Initialize(size_t size);
void Add(int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params);
void AddHieroNonTerms(const Parameter &params);
const Coll &GetColl(int sourceStart, int sourceEnd) const;
Coll &GetColl(int sourceStart, int sourceEnd);
std::string Debug() const;
protected:
std::vector< std::vector<Coll> > m_coll;
};

View File

@ -1,37 +0,0 @@
/*
* Global.cpp
* extract
*
* Created by Hieu Hoang on 01/02/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "Global.h"
bool g_debug = false;
Global::Global()
: minHoleSpanSourceDefault(2)
, maxHoleSpanSourceDefault(7)
, minHoleSpanSourceSyntax(1)
, maxHoleSpanSourceSyntax(1000)
, maxUnaligned(5)
, maxSymbols(5)
, maxNonTerm(3)
, maxNonTermDefault(2)
// int minHoleSize(1)
// int minSubPhraseSize(1) // minimum size of a remaining lexical phrase
, glueGrammarFlag(false)
, unknownWordLabelFlag(false)
//bool zipFiles(false)
, sourceSyntax(true)
, targetSyntax(false)
, mixed(true)
, uppermostOnly(true)
, allowDefaultNonTermEdge(true)
, gzOutput(false)
{}

View File

@ -1,45 +0,0 @@
#pragma once
/*
* Global.h
* extract
*
* Created by Hieu Hoang on 01/02/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <set>
#include <map>
#include <string>
class Global
{
public:
int minHoleSpanSourceDefault;
int maxHoleSpanSourceDefault;
int minHoleSpanSourceSyntax;
int maxHoleSpanSourceSyntax;
int maxSymbols;
bool glueGrammarFlag;
bool unknownWordLabelFlag;
int maxNonTerm;
int maxNonTermDefault;
bool sourceSyntax;
bool targetSyntax;
bool mixed;
int maxUnaligned;
bool uppermostOnly;
bool allowDefaultNonTermEdge;
bool gzOutput;
Global();
Global(const Global&);
};
extern bool g_debug;
#define DEBUG_OUTPUT() void DebugOutput() const;

View File

@ -1,180 +0,0 @@
/*
* Lattice.cpp
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <cassert>
#include "Lattice.h"
#include "LatticeNode.h"
#include "Tunnel.h"
#include "TunnelCollection.h"
#include "SyntaxTree.h"
#include "SentenceAlignment.h"
#include "tables-core.h"
#include "Rule.h"
#include "RuleCollection.h"
using namespace std;
Lattice::Lattice(size_t sourceSize)
:m_stacks(sourceSize + 1)
{
}
Lattice::~Lattice()
{
std::vector<Stack>::iterator iterStack;
for (iterStack = m_stacks.begin(); iterStack != m_stacks.end(); ++iterStack)
{
Stack &stack = *iterStack;
RemoveAllInColl(stack);
}
}
void Lattice::CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global)
{
// term
Stack &startStack = GetStack(startPos);
LatticeNode *node = new LatticeNode(startPos, &sentence);
startStack.push_back(node);
// non-term
for (size_t endPos = startPos + 1; endPos <= sentence.source.size(); ++endPos)
{
const TunnelList &tunnels = tunnelColl.GetTunnels(startPos, endPos - 1);
TunnelList::const_iterator iterHole;
for (iterHole = tunnels.begin(); iterHole != tunnels.end(); ++iterHole)
{
const Tunnel &tunnel = *iterHole;
CreateArcsUsing1Hole(tunnel, sentence, global);
}
}
}
void Lattice::CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global)
{
size_t startPos = tunnel.GetRange(0).GetStartPos()
, endPos = tunnel.GetRange(0).GetEndPos();
size_t numSymbols = tunnel.GetRange(0).GetWidth();
assert(numSymbols > 0);
Stack &startStack = GetStack(startPos);
// non-terms. cartesian product of source & target labels
assert(startPos == tunnel.GetRange(0).GetStartPos() && endPos == tunnel.GetRange(0).GetEndPos());
size_t startT = tunnel.GetRange(1).GetStartPos()
,endT = tunnel.GetRange(1).GetEndPos();
const SyntaxNodes &nodesS = sentence.sourceTree.GetNodes(startPos, endPos);
const SyntaxNodes &nodesT = sentence.targetTree.GetNodes(startT, endT );
SyntaxNodes::const_iterator iterS, iterT;
for (iterS = nodesS.begin(); iterS != nodesS.end(); ++iterS)
{
const SyntaxNode *syntaxNodeS = *iterS;
for (iterT = nodesT.begin(); iterT != nodesT.end(); ++iterT)
{
const SyntaxNode *syntaxNodeT = *iterT;
bool isSyntax = syntaxNodeS->IsSyntax() || syntaxNodeT->IsSyntax();
size_t maxSourceNonTermSpan = isSyntax ? global.maxHoleSpanSourceSyntax : global.maxHoleSpanSourceDefault;
if (maxSourceNonTermSpan >= endPos - startPos)
{
LatticeNode *node = new LatticeNode(tunnel, syntaxNodeS, syntaxNodeT);
startStack.push_back(node);
}
}
}
}
Stack &Lattice::GetStack(size_t startPos)
{
assert(startPos < m_stacks.size());
return m_stacks[startPos];
}
const Stack &Lattice::GetStack(size_t startPos) const
{
assert(startPos < m_stacks.size());
return m_stacks[startPos];
}
void Lattice::CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global)
{
const Stack &startStack = GetStack(startPos);
Stack::const_iterator iterStack;
for (iterStack = startStack.begin(); iterStack != startStack.end(); ++iterStack)
{
const LatticeNode *node = *iterStack;
Rule *initRule = new Rule(node);
if (initRule->CanRecurse(global, sentence.GetTunnelCollection()))
{ // may or maynot be valid, but can continue to build on this rule
initRule->CreateRules(m_rules, *this, sentence, global);
}
if (initRule->IsValid(global, sentence.GetTunnelCollection()))
{ // add to rule collection
m_rules.Add(global, initRule, sentence);
}
else
{
delete initRule;
}
}
}
Stack Lattice::GetNonTermNode(const Range &sourceRange) const
{
Stack ret;
size_t sourcePos = sourceRange.GetStartPos();
const Stack &origStack = GetStack(sourcePos);
Stack::const_iterator iter;
for (iter = origStack.begin(); iter != origStack.end(); ++iter)
{
LatticeNode *node = *iter;
const Range &nodeRangeS = node->GetSourceRange();
assert(nodeRangeS.GetStartPos() == sourceRange.GetStartPos());
if (! node->IsTerminal() && nodeRangeS.GetEndPos() == sourceRange.GetEndPos())
{
ret.push_back(node);
}
}
return ret;
}
std::ostream& operator<<(std::ostream &out, const Lattice &obj)
{
std::vector<Stack>::const_iterator iter;
for (iter = obj.m_stacks.begin(); iter != obj.m_stacks.end(); ++iter)
{
const Stack &stack = *iter;
Stack::const_iterator iterStack;
for (iterStack = stack.begin(); iterStack != stack.end(); ++iterStack)
{
const LatticeNode &node = **iterStack;
out << node << " ";
}
}
return out;
}

View File

@ -1,47 +0,0 @@
#pragma once
/*
* Lattice.h
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <iostream>
#include <vector>
#include "RuleCollection.h"
class Global;
class LatticeNode;
class Tunnel;
class TunnelCollection;
class SentenceAlignment;
typedef std::vector<LatticeNode*> Stack;
class Lattice
{
friend std::ostream& operator<<(std::ostream&, const Lattice&);
std::vector<Stack> m_stacks;
RuleCollection m_rules;
Stack &GetStack(size_t endPos);
void CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global);
public:
Lattice(size_t sourceSize);
~Lattice();
void CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global);
void CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global);
const Stack &GetStack(size_t startPos) const;
const RuleCollection &GetRules() const
{ return m_rules; }
Stack GetNonTermNode(const Range &sourceRange) const;
};

View File

@ -1,149 +0,0 @@
/*
* LatticeNode.cpp
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <sstream>
#include "LatticeNode.h"
#include "SyntaxTree.h"
#include "Tunnel.h"
#include "SentenceAlignment.h"
#include "SymbolSequence.h"
size_t LatticeNode::s_count = 0;
using namespace std;
// for terms
LatticeNode::LatticeNode(size_t pos, const SentenceAlignment *sentence)
:m_tunnel(NULL)
,m_isTerminal(true)
,m_sourceTreeNode(NULL)
,m_targetTreeNode(NULL)
,m_sentence(sentence)
,m_sourceRange(pos, pos)
{
s_count++;
//cerr << *this << endl;
}
// for non-terms
LatticeNode::LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode)
:m_tunnel(&tunnel)
,m_isTerminal(false)
,m_sourceTreeNode(sourceTreeNode)
,m_targetTreeNode(targetTreeNode)
,m_sentence(NULL)
,m_sourceRange(tunnel.GetRange(0))
{
s_count++;
//cerr << *this << endl;
}
bool LatticeNode::IsSyntax() const
{
assert(!m_isTerminal);
bool ret = m_sourceTreeNode->IsSyntax() || m_targetTreeNode->IsSyntax();
return ret;
}
size_t LatticeNode::GetNumSymbols(size_t direction) const
{
return 1;
}
int LatticeNode::Compare(const LatticeNode &otherNode) const
{
int ret = 0;
if (m_isTerminal != otherNode.m_isTerminal)
{
ret = m_isTerminal ? -1 : 1;
}
// both term or non-term
else if (m_isTerminal)
{ // term. compare source span
if (m_sourceRange.GetStartPos() == otherNode.m_sourceRange.GetStartPos())
ret = 0;
else
ret = (m_sourceRange.GetStartPos() < otherNode.m_sourceRange.GetStartPos()) ? -1 : +1;
}
else
{ // non-term. compare source span and BOTH label
assert(!m_isTerminal);
assert(!otherNode.m_isTerminal);
if (m_sourceTreeNode->IsSyntax())
{
ret = m_tunnel->Compare(*otherNode.m_tunnel, 0);
if (ret == 0 && m_sourceTreeNode->GetLabel() != otherNode.m_sourceTreeNode->GetLabel())
{
ret = (m_sourceTreeNode->GetLabel() < otherNode.m_sourceTreeNode->GetLabel()) ? -1 : +1;
}
}
if (ret == 0 && m_targetTreeNode->IsSyntax())
{
ret = m_tunnel->Compare(*otherNode.m_tunnel, 1);
if (ret == 0 && m_targetTreeNode->GetLabel() != otherNode.m_targetTreeNode->GetLabel())
{
ret = (m_targetTreeNode->GetLabel() < otherNode.m_targetTreeNode->GetLabel()) ? -1 : +1;
}
}
}
return ret;
}
void LatticeNode::CreateSymbols(size_t direction, SymbolSequence &symbols) const
{
if (m_isTerminal)
{
/*
const std::vector<std::string> &words = (direction == 0 ? m_sentence->source : m_sentence->target);
size_t startPos = m_tunnel.GetStart(direction)
,endPos = m_tunnel.GetEnd(direction);
for (size_t pos = startPos; pos <= endPos; ++pos)
{
Symbol symbol(words[pos], pos);
symbols.Add(symbol);
}
*/
}
else
{ // output both
Symbol symbol(m_sourceTreeNode->GetLabel(), m_targetTreeNode->GetLabel()
, m_tunnel->GetRange(0).GetStartPos(), m_tunnel->GetRange(0).GetEndPos()
, m_tunnel->GetRange(1).GetStartPos(), m_tunnel->GetRange(1).GetEndPos()
, m_sourceTreeNode->IsSyntax(), m_targetTreeNode->IsSyntax());
symbols.Add(symbol);
}
}
std::ostream& operator<<(std::ostream &out, const LatticeNode &obj)
{
if (obj.m_isTerminal)
{
assert(obj.m_sourceRange.GetWidth() == 1);
size_t pos = obj.m_sourceRange.GetStartPos();
const SentenceAlignment &sentence = *obj.m_sentence;
out << obj.m_sourceRange << "=" << sentence.source[pos];
}
else
{
assert(obj.m_tunnel);
out << obj.GetTunnel() << "=" << obj.m_sourceTreeNode->GetLabel() << obj.m_targetTreeNode->GetLabel() << " ";
}
return out;
}

View File

@ -1,77 +0,0 @@
#pragma once
/*
* LatticeNode.h
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include <iostream>
#include <cassert>
#include "Range.h"
class Tunnel;
class SyntaxNode;
class SentenceAlignment;
class SymbolSequence;
class LatticeNode
{
friend std::ostream& operator<<(std::ostream&, const LatticeNode&);
bool m_isTerminal;
// for terms & non-term
Range m_sourceRange;
// non-terms. source range should be same as m_sourceRange
const Tunnel *m_tunnel;
public:
static size_t s_count;
const SyntaxNode *m_sourceTreeNode, *m_targetTreeNode;
const SentenceAlignment *m_sentence;
// for terms
LatticeNode(size_t pos, const SentenceAlignment *sentence);
// for non-terms
LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode);
bool IsTerminal() const
{ return m_isTerminal; }
bool IsSyntax() const;
size_t GetNumSymbols(size_t direction) const;
std::string ToString() const;
int Compare(const LatticeNode &otherNode) const;
void CreateSymbols(size_t direction, SymbolSequence &symbols) const;
const Tunnel &GetTunnel() const
{
assert(m_tunnel);
return *m_tunnel;
}
const Range &GetSourceRange() const
{
return m_sourceRange;
}
const SyntaxNode &GetSyntaxNode(size_t direction) const
{
const SyntaxNode *node = direction == 0 ? m_sourceTreeNode : m_targetTreeNode;
assert(node);
return *node;
}
};

View File

@ -0,0 +1,174 @@
#include <iostream>
#include <cstdlib>
#include <boost/program_options.hpp>
#include "Main.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "AlignedSentence.h"
#include "AlignedSentenceSyntax.h"
#include "Parameter.h"
#include "Rules.h"
using namespace std;
bool g_debug = false;
int main(int argc, char** argv)
{
cerr << "Starting" << endl;
Parameter params;
namespace po = boost::program_options;
po::options_description desc("Options");
desc.add_options()
("help", "Print help messages")
("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
("GZOutput", "Compress extract files")
("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
("MinHoleSource", po::value<int>()->default_value(params.minHoleSource), "Minimum source span for a non-term.")
("SourceSyntax", "Source sentence is a parse tree")
("TargetSyntax", "Target sentence is a parse tree")
("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere")
("MultiLabel", po::value<int>()->default_value(params.multiLabel), "What to do with multiple labels on the same span. 0(default)=keep them all, 1=keep only top-most, 2=keep only bottom-most")
("HieroSourceLHS", "Always use Hiero source LHS? Default = 0")
("MaxSpanFreeNonTermSource", po::value<int>()->default_value(params.maxSpanFreeNonTermSource), "Max number of words covered by beginning/end NT. Default = 0 (no limit)")
("NoNieceTerminal", "Don't extract rule if 1 of the non-term covers the same word as 1 of the terminals")
("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
("SpanLength", "Property - span length of RHS each non-term")
("NonTermContext", "Property - left and right, inside and outside words of each non-term");
po::variables_map vm;
try
{
po::store(po::parse_command_line(argc, argv, desc),
vm); // can throw
/** --help option
*/
if ( vm.count("help") || argc < 5 )
{
std::cout << argv[0] << " target source alignment [options...]" << std::endl
<< desc << std::endl;
return EXIT_SUCCESS;
}
po::notify(vm); // throws on error, so do after help in case
// there are any problems
}
catch(po::error& e)
{
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return EXIT_FAILURE;
}
if (vm.count("MaxSpan")) params.maxSpan = vm["MaxSpan"].as<int>();
if (vm.count("GZOutput")) params.gzOutput = true;
if (vm.count("GlueGrammar")) params.gluePath = vm["GlueGrammar"].as<string>();
if (vm.count("SentenceOffset")) params.sentenceOffset = vm["SentenceOffset"].as<long>();
if (vm.count("MaxNonTerm")) params.maxNonTerm = vm["MaxNonTerm"].as<int>();
if (vm.count("MaxHieroNonTerm")) params.maxHieroNonTerm = vm["MaxHieroNonTerm"].as<int>();
if (vm.count("MinHoleSource")) params.minHoleSource = vm["MinHoleSource"].as<int>();
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
if (vm.count("TargetSyntax")) params.targetSyntax = true;
if (vm.count("MixedSyntaxType")) params.mixedSyntaxType = vm["MixedSyntaxType"].as<int>();
if (vm.count("MultiLabel")) params.multiLabel = vm["MultiLabel"].as<int>();
if (vm.count("HieroSourceLHS")) params.hieroSourceLHS = true;
if (vm.count("MaxSpanFreeNonTermSource")) params.maxSpanFreeNonTermSource = vm["MaxSpanFreeNonTermSource"].as<int>();
if (vm.count("NoNieceTerminal")) params.nieceTerminal = false;
if (vm.count("MaxScope")) params.maxScope = vm["MaxScope"].as<int>();
// properties
if (vm.count("SpanLength")) params.spanLength = true;
if (vm.count("NonTermContext")) params.nonTermContext = true;
// input files;
string pathTarget = argv[1];
string pathSource = argv[2];
string pathAlignment = argv[3];
string pathExtract = argv[4];
string pathExtractInv = pathExtract + ".inv";
if (params.gzOutput) {
pathExtract += ".gz";
pathExtractInv += ".gz";
}
Moses::InputFileStream strmTarget(pathTarget);
Moses::InputFileStream strmSource(pathSource);
Moses::InputFileStream strmAlignment(pathAlignment);
Moses::OutputFileStream extractFile(pathExtract);
Moses::OutputFileStream extractInvFile(pathExtractInv);
// MAIN LOOP
int lineNum = 1;
string lineTarget, lineSource, lineAlignment;
while (getline(strmTarget, lineTarget)) {
if (lineNum % 10000 == 0) {
cerr << lineNum << " ";
}
bool success;
success = getline(strmSource, lineSource);
if (!success) {
throw "Couldn't read source";
}
success = getline(strmAlignment, lineAlignment);
if (!success) {
throw "Couldn't read alignment";
}
/*
cerr << "lineTarget=" << lineTarget << endl;
cerr << "lineSource=" << lineSource << endl;
cerr << "lineAlignment=" << lineAlignment << endl;
*/
AlignedSentence *alignedSentence;
if (params.sourceSyntax || params.targetSyntax) {
alignedSentence = new AlignedSentenceSyntax(lineNum, lineSource, lineTarget, lineAlignment);
}
else {
alignedSentence = new AlignedSentence(lineNum, lineSource, lineTarget, lineAlignment);
}
alignedSentence->Create(params);
//cerr << alignedSentence->Debug();
Rules rules(*alignedSentence);
rules.Extend(params);
rules.Consolidate(params);
//cerr << rules.Debug();
rules.Output(extractFile, true, params);
rules.Output(extractInvFile, false, params);
delete alignedSentence;
++lineNum;
}
if (!params.gluePath.empty()) {
Moses::OutputFileStream glueFile(params.gluePath);
CreateGlueGrammar(glueFile);
}
cerr << "Finished" << endl;
}
void CreateGlueGrammar(Moses::OutputFileStream &glueFile)
{
glueFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
<< "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
<< "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
}

View File

@ -0,0 +1,12 @@
/*
* Main.h
*
* Created on: 28 Feb 2014
* Author: hieu
*/
#pragma once
#include "OutputFileStream.h"
void CreateGlueGrammar(Moses::OutputFileStream &glueFile);

View File

@ -1,13 +1,17 @@
all: extract
all: extract-mixed-syntax
clean:
rm -f *.o extract-mixed-syntax
.cpp.o:
g++ -O6 -g -c $<
g++ -O4 -g -c -I../../../boost/include -I../../../ $<
extract: tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o
OBJECTS = AlignedSentence.o ConsistentPhrase.o ConsistentPhrases.o InputFileStream.o \
Main.o OutputFileStream.o Parameter.o Phrase.o Rule.o Rules.o RuleSymbol.o \
SyntaxTree.o Word.o NonTerm.o RulePhrase.o AlignedSentenceSyntax.o pugixml.o
g++ tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o -lz -lboost_iostreams-mt -o extract-mixed-syntax
extract-mixed-syntax: $(OBJECTS)
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_iostreams-mt -lboost_program_options-mt -o extract-mixed-syntax

View File

@ -0,0 +1,65 @@
/*
* NonTerm.cpp
*
* Created on: 22 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "NonTerm.h"
#include "Word.h"
#include "ConsistentPhrase.h"
#include "Parameter.h"
using namespace std;
NonTerm::NonTerm(const ConsistentPhrase &consistentPhrase,
const std::string &source,
const std::string &target)
:m_consistentPhrase(&consistentPhrase)
,m_source(source)
,m_target(target)
{
// TODO Auto-generated constructor stub
}
NonTerm::~NonTerm() {
// TODO Auto-generated destructor stub
}
std::string NonTerm::Debug() const
{
stringstream out;
out << m_source << m_target;
out << m_consistentPhrase->Debug();
return out.str();
}
void NonTerm::Output(std::ostream &out) const
{
out << m_source << m_target;
}
void NonTerm::Output(std::ostream &out, Moses::FactorDirection direction) const
{
out << GetLabel(direction);
}
const std::string &NonTerm::GetLabel(Moses::FactorDirection direction) const
{
return (direction == Moses::Input) ? m_source : m_target;
}
bool NonTerm::IsHiero(Moses::FactorDirection direction, const Parameter &params) const
{
const std::string &label = NonTerm::GetLabel(direction);
return label == params.hieroNonTerm;
}
bool NonTerm::IsHiero(const Parameter &params) const
{
return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
}
int NonTerm::GetWidth(Moses::FactorDirection direction) const
{ return GetConsistentPhrase().GetWidth(direction); }

View File

@ -0,0 +1,47 @@
/*
* NonTerm.h
*
* Created on: 22 Feb 2014
* Author: hieu
*/
#pragma once
#include <string>
#include "RuleSymbol.h"
#include "moses/TypeDef.h"
class ConsistentPhrase;
class Parameter;
class NonTerm : public RuleSymbol
{
public:
NonTerm(const ConsistentPhrase &consistentPhrase,
const std::string &source,
const std::string &target);
virtual ~NonTerm();
const ConsistentPhrase &GetConsistentPhrase() const
{ return *m_consistentPhrase; }
int GetWidth(Moses::FactorDirection direction) const;
virtual bool IsNonTerm() const
{ return true; }
std::string GetString() const
{ return m_source + m_target; }
virtual std::string Debug() const;
virtual void Output(std::ostream &out) const;
void Output(std::ostream &out, Moses::FactorDirection direction) const;
const std::string &GetLabel(Moses::FactorDirection direction) const;
bool IsHiero(Moses::FactorDirection direction, const Parameter &params) const;
bool IsHiero(const Parameter &params) const;
protected:
const ConsistentPhrase *m_consistentPhrase;
std::string m_source, m_target;
};

View File

@ -0,0 +1,41 @@
/*
* Parameter.cpp
*
* Created on: 17 Feb 2014
* Author: hieu
*/
#include "Parameter.h"
Parameter::Parameter()
:maxSpan(10)
,maxNonTerm(2)
,maxHieroNonTerm(999)
,maxSymbolsTarget(999)
,maxSymbolsSource(5)
,minHoleSource(2)
,sentenceOffset(0)
,nonTermConsecSource(false)
,requireAlignedWord(true)
,fractionalCounting(true)
,gzOutput(false)
,hieroNonTerm("[X]")
,sourceSyntax(false)
,targetSyntax(false)
,mixedSyntaxType(0)
,multiLabel(0)
,nonTermConsecSourceMixed(true)
,hieroSourceLHS(false)
,maxSpanFreeNonTermSource(0)
,nieceTerminal(true)
,maxScope(UNDEFINED)
,spanLength(false)
,nonTermContext(false)
{}
Parameter::~Parameter() {
// TODO Auto-generated destructor stub
}

View File

@ -0,0 +1,51 @@
/*
* Parameter.h
*
* Created on: 17 Feb 2014
* Author: hieu
*/
#pragma once
#include <string>
#include <limits>
#define UNDEFINED std::numeric_limits<int>::max()
class Parameter
{
public:
Parameter();
virtual ~Parameter();
int maxSpan;
int maxNonTerm;
int maxHieroNonTerm;
int maxSymbolsTarget;
int maxSymbolsSource;
int minHoleSource;
long sentenceOffset;
bool nonTermConsecSource;
bool requireAlignedWord;
bool fractionalCounting;
bool gzOutput;
std::string hieroNonTerm;
std::string gluePath;
bool sourceSyntax, targetSyntax;
int mixedSyntaxType, multiLabel;
bool nonTermConsecSourceMixed;
bool hieroSourceLHS;
int maxSpanFreeNonTermSource;
bool nieceTerminal;
int maxScope;
// prperties
bool spanLength;
bool nonTermContext;
};

View File

@ -0,0 +1,14 @@
#include <sstream>
#include "Phrase.h"
std::string Phrase::Debug() const
{
std::stringstream out;
for (size_t i = 0; i < size(); ++i) {
Word &word = *at(i);
out << word.Debug() << " ";
}
return out.str();
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <vector>
#include "Word.h"
// a vector of terminals
class Phrase : public std::vector<Word*>
{
public:
Phrase()
{}
Phrase(size_t size)
:std::vector<Word*>(size)
{}
std::string Debug() const;
};

View File

@ -1,74 +0,0 @@
/*
* Range.cpp
* extract
*
* Created by Hieu Hoang on 22/02/2011.
* Copyright 2011 __MyCompanyName__. All rights reserved.
*
*/
#include "Range.h"
using namespace std;
void Range::Merge(const Range &a, const Range &b)
{
if (a.m_startPos == NOT_FOUND)
{ // get the other regardless
m_startPos = b.m_startPos;
}
else if (b.m_startPos == NOT_FOUND)
{
m_startPos = a.m_startPos;
}
else
{
m_startPos = min(a.m_startPos, b.m_startPos);
}
if (a.m_endPos == NOT_FOUND)
{ // get the other regardless
m_endPos = b.m_endPos;
}
else if (b.m_endPos == NOT_FOUND)
{ // do nothing
m_endPos = a.m_endPos;
}
else
{
m_endPos = max(a.m_endPos, b.m_endPos);
}
}
int Range::Compare(const Range &other) const
{
if (m_startPos < other.m_startPos)
return -1;
else if (m_startPos > other.m_startPos)
return +1;
else if (m_endPos < other.m_endPos)
return -1;
else if (m_endPos > other.m_endPos)
return +1;
return 0;
}
bool Range::Overlap(const Range &other) const
{
if ( other.m_endPos < m_startPos || other.m_startPos > m_endPos)
return false;
return true;
}
std::ostream& operator<<(std::ostream &out, const Range &range)
{
out << "[" << range.m_startPos << "-" << range.m_endPos << "]";
return out;
}

View File

@ -1,57 +0,0 @@
/*
* Range.h
* extract
*
* Created by Hieu Hoang on 22/02/2011.
* Copyright 2011 __MyCompanyName__. All rights reserved.
*
*/
#pragma once
#include <string>
#include <iostream>
#include <limits>
#define NOT_FOUND std::numeric_limits<size_t>::max()
class Range
{
friend std::ostream& operator<<(std::ostream&, const Range&);
size_t m_startPos, m_endPos;
public:
Range()
:m_startPos(NOT_FOUND)
,m_endPos(NOT_FOUND)
{}
Range(const Range &copy)
:m_startPos(copy.m_startPos)
,m_endPos(copy.m_endPos)
{}
Range(size_t startPos, size_t endPos)
:m_startPos(startPos)
,m_endPos(endPos)
{}
size_t GetStartPos() const
{ return m_startPos; }
size_t GetEndPos() const
{ return m_endPos; }
size_t GetWidth() const
{ return m_endPos - m_startPos + 1; }
void SetStartPos(size_t startPos)
{ m_startPos = startPos; }
void SetEndPos(size_t endPos)
{ m_endPos = endPos; }
void Merge(const Range &a, const Range &b);
int Compare(const Range &other) const;
bool Overlap(const Range &other) const;
};

File diff suppressed because it is too large Load Diff

View File

@ -1,96 +1,87 @@
#pragma once
/*
* Rule.h
* extract
*
* Created by Hieu Hoang on 19/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
* Rule.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <vector>
#include <iostream>
#include "LatticeNode.h"
#include "SymbolSequence.h"
#include "Global.h"
#include "Phrase.h"
#include "RulePhrase.h"
#include "moses/TypeDef.h"
class Lattice;
class SentenceAlignment;
class Global;
class RuleCollection;
class SyntaxNode;
class TunnelCollection;
class Range;
class ConsistentPhrase;
class AlignedSentence;
class NonTerm;
class Parameter;
class RuleElement
{
protected:
const LatticeNode *m_latticeNode;
class Rule {
public:
std::pair<size_t, size_t> m_alignmentPos;
RuleElement(const RuleElement &copy);
RuleElement(const LatticeNode &latticeNode)
:m_latticeNode(&latticeNode)
,m_alignmentPos(NOT_FOUND, NOT_FOUND)
{}
typedef std::set<std::pair<int,int> > Alignments;
const LatticeNode &GetLatticeNode() const
{ return *m_latticeNode; }
Rule(const Rule &copy); // do not implement
};
// original rule with no non-term
Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence);
class Rule
{
protected:
typedef std::vector<RuleElement> CollType;
CollType m_coll;
const LatticeNode *m_lhs;
SymbolSequence m_source, m_target;
bool IsHole(const TunnelCollection &tunnelColl) const;
bool NonTermOverlap() const;
const LatticeNode &GetLatticeNode(size_t ind) const;
void CreateSymbols(const Global &global, bool &isValid, const SentenceAlignment &sentence);
public:
// init
Rule(const LatticeNode *latticeNode);
// create new rule by appending node to prev rule
Rule(const Rule &prevRule, const LatticeNode *latticeNode);
// create copy with lhs
Rule(const Global &global, bool &isValid, const Rule &copy, const LatticeNode *lhs, const SentenceAlignment &sentence);
// can continue to add to this rule
bool CanRecurse(const Global &global, const TunnelCollection &tunnelColl) const;
// extend a rule, adding 1 new non-term
Rule(const Rule &copy, const NonTerm &nonTerm);
virtual ~Rule();
// can add this to the set of rules
bool IsValid(const Global &global, const TunnelCollection &tunnelColl) const;
bool IsValid() const
{ return m_isValid; }
size_t GetNumSymbols() const;
bool AdjacentDefaultNonTerms() const;
bool MaxNonTerm(const Global &global) const;
bool MoreDefaultNonTermThanTerm() const;
bool SourceHasEdgeDefaultNonTerm() const;
bool CanRecurse() const
{ return m_canRecurse; }
void CreateRules(RuleCollection &rules
, const Lattice &lattice
, const SentenceAlignment &sentence
, const Global &global);
int Compare(const Rule &compare) const;
bool operator<(const Rule &compare) const;
Range GetSourceRange() const;
DEBUG_OUTPUT();
const NonTerm &GetLHS() const
{ return m_lhs; }
void Output(std::ostream &out) const;
void OutputInv(std::ostream &out) const;
const ConsistentPhrase &GetConsistentPhrase() const;
int GetNextSourcePosForNonTerm() const;
void SetCount(float count)
{ m_count = count; }
float GetCount() const
{ return m_count; }
const Alignments &GetAlignments() const
{ return m_alignments; }
std::string Debug() const;
void Output(std::ostream &out, bool forward, const Parameter &params) const;
void Prevalidate(const Parameter &params);
void CreateTarget(const Parameter &params);
const RulePhrase &GetPhrase(Moses::FactorDirection direction) const
{ return (direction == Moses::Input) ? m_source : m_target; }
protected:
const NonTerm &m_lhs;
const AlignedSentence &m_alignedSentence;
RulePhrase m_source, m_target;
float m_count;
Alignments m_alignments;
// in source order
std::vector<const NonTerm*> m_nonterms;
bool m_isValid, m_canRecurse;
void CreateSource();
void CreateAlignments();
void CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords);
void CreateAlignments(int sourcePos, const RuleSymbol *targetSought);
bool ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const;
int CalcScope() const; // not yet correctly calculated
void NonTermContext(size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
};

View File

@ -1,102 +0,0 @@
/*
* RuleCollection.cpp
* extract
*
* Created by Hieu Hoang on 19/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "RuleCollection.h"
#include "Rule.h"
#include "SentenceAlignment.h"
#include "tables-core.h"
#include "Lattice.h"
#include "SyntaxTree.h"
using namespace std;
RuleCollection::~RuleCollection()
{
RemoveAllInColl(m_coll);
}
void RuleCollection::Add(const Global &global, Rule *rule, const SentenceAlignment &sentence)
{
Range spanS = rule->GetSourceRange();
// cartesian product of lhs
Stack nontermNodes = sentence.GetLattice().GetNonTermNode(spanS);
Stack::const_iterator iterStack;
for (iterStack = nontermNodes.begin(); iterStack != nontermNodes.end(); ++iterStack)
{
const LatticeNode &node = **iterStack;
assert(!node.IsTerminal());
bool isValid;
// create rules with LHS
//cerr << "old:" << *rule << endl;
Rule *newRule = new Rule(global, isValid, *rule, &node, sentence);
if (!isValid)
{ // lhs doesn't match non-term spans
delete newRule;
continue;
}
/*
stringstream s;
s << *newRule;
if (s.str().find("Wiederaufnahme der [X] ||| resumption of the [X] ||| ||| 1") == 0)
{
cerr << "READY:" << *newRule << endl;
g_debug = true;
}
else {
g_debug = false;
}
*/
typedef set<const Rule*, CompareRule>::iterator Iterator;
pair<Iterator,bool> ret = m_coll.insert(newRule);
if (ret.second)
{
//cerr << "ACCEPTED:" << *newRule << endl;
//cerr << "";
}
else
{
//cerr << "REJECTED:" << *newRule << endl;
delete newRule;
}
}
delete rule;
}
void RuleCollection::Output(std::ostream &out) const
{
RuleCollection::CollType::const_iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
{
const Rule &rule = **iter;
rule.Output(out);
out << endl;
}
}
void RuleCollection::OutputInv(std::ostream &out) const
{
RuleCollection::CollType::const_iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
{
const Rule &rule = **iter;
rule.OutputInv(out);
out << endl;
}
}

View File

@ -1,55 +0,0 @@
#pragma once
/*
* RuleCollection.h
* extract
*
* Created by Hieu Hoang on 19/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <set>
#include <iostream>
#include "Rule.h"
class SentenceAlignment;
// helper for sort. Don't compare default non-terminals
struct CompareRule
{
bool operator() (const Rule *a, const Rule *b)
{
/*
if (g_debug)
{
std::cerr << std::endl << (*a) << std::endl << (*b) << " ";
}
*/
bool ret = (*a) < (*b);
/*
if (g_debug)
{
std::cerr << ret << std::endl;
}
*/
return ret;
}
};
class RuleCollection
{
protected:
typedef std::set<const Rule*, CompareRule> CollType;
CollType m_coll;
public:
~RuleCollection();
void Add(const Global &global, Rule *rule, const SentenceAlignment &sentence);
size_t GetSize() const
{ return m_coll.size(); }
void Output(std::ostream &out) const;
void OutputInv(std::ostream &out) const;
};

View File

@ -0,0 +1,50 @@
/*
* RulePhrase.cpp
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "RulePhrase.h"
#include "RuleSymbol.h"
using namespace std;
extern bool g_debug;
int RulePhrase::Compare(const RulePhrase &other) const
{
if (GetSize() != other.GetSize()) {
return GetSize() < other.GetSize() ? -1 : +1;
}
for (size_t i = 0; i < m_coll.size(); ++i) {
const RuleSymbol &symbol = *m_coll[i];
const RuleSymbol &otherSymbol = *other.m_coll[i];
int compare = symbol.Compare(otherSymbol);
if (compare) {
return compare;
}
}
return 0;
}
void RulePhrase::Output(std::ostream &out) const
{
for (size_t i = 0; i < m_coll.size(); ++i) {
const RuleSymbol &symbol = *m_coll[i];
symbol.Output(out);
out << " ";
}
}
std::string RulePhrase::Debug() const
{
std::stringstream out;
Output(out);
return out.str();
}

View File

@ -0,0 +1,49 @@
/*
* RulePhrase.h
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#ifndef RULEPHRASE_H_
#define RULEPHRASE_H_
#include <vector>
#include <cstddef>
#include <iostream>
class RuleSymbol;
// a phrase of terms and non-terms for 1 side of a rule
class RulePhrase
{
public:
typedef std::vector<const RuleSymbol*> Coll;
Coll m_coll;
size_t GetSize() const
{ return m_coll.size(); }
void Add(const RuleSymbol *symbol)
{
m_coll.push_back(symbol);
}
const RuleSymbol* operator[](size_t index) const {
return m_coll[index];
}
const RuleSymbol* Front() const {
return m_coll.front();
}
const RuleSymbol* Back() const {
return m_coll.back();
}
int Compare(const RulePhrase &other) const;
void Output(std::ostream &out) const;
std::string Debug() const;
};
#endif /* RULEPHRASE_H_ */

View File

@ -0,0 +1,36 @@
/*
* RuleSymbol.cpp
*
* Created on: 21 Feb 2014
* Author: hieu
*/
#include "RuleSymbol.h"
using namespace std;
RuleSymbol::RuleSymbol() {
// TODO Auto-generated constructor stub
}
RuleSymbol::~RuleSymbol() {
// TODO Auto-generated destructor stub
}
int RuleSymbol::Compare(const RuleSymbol &other) const
{
if (IsNonTerm() != other.IsNonTerm()) {
return IsNonTerm() ? -1 : +1;
}
string str = GetString();
string otherStr = other.GetString();
if (str == otherStr) {
return 0;
}
else {
return (str < otherStr) ? -1 : +1;
}
}

View File

@ -0,0 +1,31 @@
/*
* RuleSymbol.h
*
* Created on: 21 Feb 2014
* Author: hieu
*/
#ifndef RULESYMBOL_H_
#define RULESYMBOL_H_
#include <iostream>
#include <string>
// base class - terminal or non-term
class RuleSymbol {
public:
RuleSymbol();
virtual ~RuleSymbol();
virtual bool IsNonTerm() const = 0;
virtual std::string Debug() const = 0;
virtual void Output(std::ostream &out) const = 0;
virtual std::string GetString() const = 0;
int Compare(const RuleSymbol &other) const;
};
#endif /* RULESYMBOL_H_ */

View File

@ -0,0 +1,227 @@
/*
* Rules.cpp
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "Rules.h"
#include "ConsistentPhrase.h"
#include "ConsistentPhrases.h"
#include "AlignedSentence.h"
#include "Rule.h"
#include "Parameter.h"
#include "moses/Util.h"
using namespace std;
extern bool g_debug;
Rules::Rules(const AlignedSentence &alignedSentence)
:m_alignedSentence(alignedSentence)
{
}
Rules::~Rules() {
Moses::RemoveAllInColl(m_keepRules);
}
void Rules::CreateRules(const ConsistentPhrase &cp,
const Parameter &params)
{
if (params.hieroSourceLHS) {
const NonTerm &nonTerm = cp.GetHieroNonTerm();
CreateRule(nonTerm, params);
}
else {
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
for (size_t i = 0; i < nonTerms.size(); ++i) {
const NonTerm &nonTerm = nonTerms[i];
CreateRule(nonTerm, params);
}
}
}
void Rules::CreateRule(const NonTerm &nonTerm,
const Parameter &params)
{
Rule *rule = new Rule(nonTerm, m_alignedSentence);
rule->Prevalidate(params);
rule->CreateTarget(params);
if (rule->CanRecurse()) {
Extend(*rule, params);
}
if (rule->IsValid()) {
m_keepRules.insert(rule);
}
else {
delete rule;
}
}
void Rules::Extend(const Parameter &params)
{
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
size_t size = m_alignedSentence.GetPhrase(Moses::Input).size();
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
for (size_t sourceEnd = sourceStart; sourceEnd < size; ++sourceEnd) {
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
ConsistentPhrases::Coll::const_iterator iter;
for (iter = cps.begin(); iter != cps.end(); ++iter) {
const ConsistentPhrase &cp = **iter;
CreateRules(cp, params);
}
}
}
}
void Rules::Extend(const Rule &rule, const Parameter &params)
{
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
int sourceMin = rule.GetNextSourcePosForNonTerm();
int ruleStart = rule.GetConsistentPhrase().corners[0];
int ruleEnd = rule.GetConsistentPhrase().corners[1];
for (int sourceStart = sourceMin; sourceStart <= ruleEnd; ++sourceStart) {
for (int sourceEnd = sourceStart; sourceEnd <= ruleEnd; ++sourceEnd) {
if (sourceStart == ruleStart && sourceEnd == ruleEnd) {
// don't cover whole rule with 1 non-term
continue;
}
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
Extend(rule, cps, params);
}
}
}
void Rules::Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter &params)
{
ConsistentPhrases::Coll::const_iterator iter;
for (iter = cps.begin(); iter != cps.end(); ++iter) {
const ConsistentPhrase &cp = **iter;
Extend(rule, cp, params);
}
}
void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter &params)
{
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
for (size_t i = 0; i < nonTerms.size(); ++i) {
const NonTerm &nonTerm = nonTerms[i];
Rule *newRule = new Rule(rule, nonTerm);
newRule->Prevalidate(params);
newRule->CreateTarget(params);
if (newRule->CanRecurse()) {
// recursively extend
Extend(*newRule, params);
}
if (newRule->IsValid()) {
m_keepRules.insert(newRule);
}
else {
delete newRule;
}
}
}
std::string Rules::Debug() const
{
stringstream out;
std::set<Rule*>::const_iterator iter;
out << "m_keepRules:" << endl;
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
const Rule &rule = **iter;
out << rule.Debug() << endl;
}
return out.str();
}
void Rules::Output(std::ostream &out, bool forward, const Parameter &params) const
{
std::set<Rule*, CompareRules>::const_iterator iter;
for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
const Rule &rule = **iter;
rule.Output(out, forward, params);
out << endl;
}
}
void Rules::Consolidate(const Parameter &params)
{
if (params.fractionalCounting) {
CalcFractionalCount();
}
else {
std::set<Rule*>::iterator iter;
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
Rule &rule = **iter;
rule.SetCount(1);
}
}
MergeRules(params);
}
void Rules::MergeRules(const Parameter &params)
{
typedef std::set<Rule*, CompareRules> MergeRules;
std::set<Rule*>::const_iterator iterOrig;
for (iterOrig = m_keepRules.begin(); iterOrig != m_keepRules.end(); ++iterOrig) {
Rule *origRule = *iterOrig;
pair<MergeRules::iterator, bool> inserted = m_mergeRules.insert(origRule);
if (!inserted.second) {
// already there, just add count
Rule &rule = **inserted.first;
float newCount = rule.GetCount() + origRule->GetCount();
rule.SetCount(newCount);
}
}
}
void Rules::CalcFractionalCount()
{
typedef std::set<Rule*> RuleColl;
typedef std::map<const ConsistentPhrase*, RuleColl> RuleByConsistentPhrase;
RuleByConsistentPhrase allRules;
// sort by source AND target ranges
std::set<Rule*>::const_iterator iter;
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
Rule *rule = *iter;
const ConsistentPhrase &cp = rule->GetConsistentPhrase();
RuleColl &ruleColl = allRules[&cp];
ruleColl.insert(rule);
}
// fractional count
RuleByConsistentPhrase::iterator iterOuter;
for (iterOuter = allRules.begin(); iterOuter != allRules.end(); ++iterOuter) {
RuleColl &rules = iterOuter->second;
RuleColl::iterator iterInner;
for (iterInner = rules.begin(); iterInner != rules.end(); ++iterInner) {
Rule &rule = **iterInner;
rule.SetCount(1.0f / (float) rules.size());
}
}
}

View File

@ -0,0 +1,72 @@
/*
* Rules.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <set>
#include <iostream>
#include "ConsistentPhrases.h"
#include "Rule.h"
extern bool g_debug;
class AlignedSentence;
class Parameter;
struct CompareRules {
bool operator()(const Rule *a, const Rule *b)
{
int compare;
compare = a->GetPhrase(Moses::Input).Compare(b->GetPhrase(Moses::Input));
if (compare) return compare < 0;
compare = a->GetPhrase(Moses::Output).Compare(b->GetPhrase(Moses::Output));
if (compare) return compare < 0;
if (a->GetAlignments() != b->GetAlignments()) {
return a->GetAlignments() < b->GetAlignments();
}
if (a->GetLHS().GetString() != b->GetLHS().GetString()) {
return a->GetLHS().GetString() < b->GetLHS().GetString();
}
return false;
}
};
class Rules {
public:
Rules(const AlignedSentence &alignedSentence);
virtual ~Rules();
void Extend(const Parameter &params);
void Consolidate(const Parameter &params);
std::string Debug() const;
void Output(std::ostream &out, bool forward, const Parameter &params) const;
protected:
const AlignedSentence &m_alignedSentence;
std::set<Rule*> m_keepRules;
std::set<Rule*, CompareRules> m_mergeRules;
void Extend(const Rule &rule, const Parameter &params);
void Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter &params);
void Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter &params);
// create original rules
void CreateRules(const ConsistentPhrase &cp,
const Parameter &params);
void CreateRule(const NonTerm &nonTerm,
const Parameter &params);
void MergeRules(const Parameter &params);
void CalcFractionalCount();
};

View File

@ -1,331 +0,0 @@
/*
* SentenceAlignment.cpp
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <set>
#include <map>
#include <sstream>
#include "SentenceAlignment.h"
#include "XmlTree.h"
#include "tables-core.h"
#include "TunnelCollection.h"
#include "Lattice.h"
#include "LatticeNode.h"
using namespace std;
extern std::set< std::string > targetLabelCollection, sourceLabelCollection;
extern std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;
SentenceAlignment::SentenceAlignment()
:m_tunnelCollection(NULL)
,m_lattice(NULL)
{}
SentenceAlignment::~SentenceAlignment()
{
delete m_tunnelCollection;
delete m_lattice;
}
int SentenceAlignment::Create( const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global )
{
// tokenizing English (and potentially extract syntax spans)
if (global.targetSyntax) {
string targetStringCPP = string(targetString);
ProcessAndStripXMLTags( targetStringCPP, targetTree, targetLabelCollection , targetTopLabelCollection );
target = tokenize( targetStringCPP.c_str() );
// cerr << "E: " << targetStringCPP << endl;
}
else {
target = tokenize( targetString.c_str() );
}
// tokenizing source (and potentially extract syntax spans)
if (global.sourceSyntax) {
string sourceStringCPP = string(sourceString);
ProcessAndStripXMLTags( sourceStringCPP, sourceTree, sourceLabelCollection , sourceTopLabelCollection );
source = tokenize( sourceStringCPP.c_str() );
// cerr << "F: " << sourceStringCPP << endl;
}
else {
source = tokenize( sourceString.c_str() );
}
// check if sentences are empty
if (target.size() == 0 || source.size() == 0) {
cerr << "no target (" << target.size() << ") or source (" << source.size() << ") words << end insentence " << sentenceID << endl;
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return 0;
}
// prepare data structures for alignments
for(int i=0; i<source.size(); i++) {
alignedCountS.push_back( 0 );
}
for(int i=0; i<target.size(); i++) {
vector< int > dummy;
alignedToT.push_back( dummy );
}
//InitTightest(m_s2tTightest, source.size());
//InitTightest(m_t2sTightest, target.size());
// reading in alignments
vector<string> alignmentSequence = tokenize( alignmentString.c_str() );
for(int i=0; i<alignmentSequence.size(); i++) {
int s,t;
// cout << "scaning " << alignmentSequence[i].c_str() << endl;
if (! sscanf(alignmentSequence[i].c_str(), "%d-%d", &s, &t)) {
cerr << "WARNING: " << alignmentSequence[i] << " is a bad alignment point in sentence " << sentenceID << endl;
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return 0;
}
// cout << "alignmentSequence[i] " << alignmentSequence[i] << " is " << s << ", " << t << endl;
if (t >= target.size() || s >= source.size()) {
cerr << "WARNING: sentence " << sentenceID << " has alignment point (" << s << ", " << t << ") out of bounds (" << source.size() << ", " << target.size() << ")\n";
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return 0;
}
alignedToT[t].push_back( s );
alignedCountS[s]++;
//SetAlignment(s, t);
}
bool mixed = global.mixed;
sourceTree.AddDefaultNonTerms(global.sourceSyntax, mixed, source.size());
targetTree.AddDefaultNonTerms(global.targetSyntax, mixed, target.size());
//CalcTightestSpan(m_s2tTightest);
//CalcTightestSpan(m_t2sTightest);
return 1;
}
/*
void SentenceAlignment::InitTightest(Outer &tightest, size_t len)
{
tightest.resize(len);
for (size_t posOuter = 0; posOuter < len; ++posOuter)
{
Inner &inner = tightest[posOuter];
size_t innerSize = len - posOuter;
inner.resize(innerSize);
}
}
void SentenceAlignment::CalcTightestSpan(Outer &tightest)
{
size_t len = tightest.size();
for (size_t startPos = 0; startPos < len; ++startPos)
{
for (size_t endPos = startPos + 1; endPos < len; ++endPos)
{
const Range &prevRange = GetTightest(tightest, startPos, endPos - 1);
const Range &smallRange = GetTightest(tightest, endPos, endPos);
Range &newRange = GetTightest(tightest, startPos, endPos);
newRange.Merge(prevRange, smallRange);
//cerr << "[" << startPos << "-" << endPos << "] --> [" << newRange.GetStartPos() << "-" << newRange.GetEndPos() << "]";
}
}
}
Range &SentenceAlignment::GetTightest(Outer &tightest, size_t startPos, size_t endPos)
{
assert(endPos < tightest.size());
assert(endPos >= startPos);
Inner &inner = tightest[startPos];
size_t ind = endPos - startPos;
Range &ret = inner[ind];
return ret;
}
void SentenceAlignment::SetAlignment(size_t source, size_t target)
{
SetAlignment(m_s2tTightest, source, target);
SetAlignment(m_t2sTightest, target, source);
}
void SentenceAlignment::SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos)
{
Range &range = GetTightest(tightest, thisPos, thisPos);
if (range.GetStartPos() == NOT_FOUND)
{ // not yet set, do them both
assert(range.GetEndPos() == NOT_FOUND);
range.SetStartPos(thatPos);
range.SetEndPos(thatPos);
}
else
{
assert(range.GetEndPos() != NOT_FOUND);
range.SetStartPos( (range.GetStartPos() > thatPos) ? thatPos : range.GetStartPos() );
range.SetEndPos( (range.GetEndPos() < thatPos) ? thatPos : range.GetEndPos() );
}
}
*/
void SentenceAlignment::FindTunnels(const Global &global )
{
int countT = target.size();
int countS = source.size();
int maxSpan = max(global.maxHoleSpanSourceDefault, global.maxHoleSpanSourceSyntax);
m_tunnelCollection = new TunnelCollection(countS);
m_tunnelCollection->alignedCountS = alignedCountS;
m_tunnelCollection->alignedCountT.resize(alignedToT.size());
for (size_t ind = 0; ind < alignedToT.size(); ind++)
{
m_tunnelCollection->alignedCountT[ind] = alignedToT[ind].size();
}
// phrase repository for creating hiero phrases
// check alignments for target phrase startT...endT
for(int lengthT=1;
lengthT <= maxSpan && lengthT <= countT;
lengthT++) {
for(int startT=0; startT < countT-(lengthT-1); startT++) {
// that's nice to have
int endT = startT + lengthT - 1;
// if there is target side syntax, there has to be a node
if (global.targetSyntax && !targetTree.HasNode(startT,endT))
continue;
// find find aligned source words
// first: find minimum and maximum source word
int minS = 9999;
int maxS = -1;
vector< int > usedS = alignedCountS;
for(int ti=startT;ti<=endT;ti++) {
for(int i=0;i<alignedToT[ti].size();i++) {
int si = alignedToT[ti][i];
// cerr << "point (" << si << ", " << ti << ")\n";
if (si<minS) { minS = si; }
if (si>maxS) { maxS = si; }
usedS[ si ]--;
}
}
// unaligned phrases are not allowed
if( maxS == -1 )
continue;
// source phrase has to be within limits
if( maxS-minS >= maxSpan )
{
continue;
}
// check if source words are aligned to out of bound target words
bool out_of_bounds = false;
for(int si=minS;si<=maxS && !out_of_bounds;si++)
{
if (usedS[si]>0) {
out_of_bounds = true;
}
}
// if out of bound, you gotta go
if (out_of_bounds)
continue;
if (m_tunnelCollection->NumUnalignedWord(1, startT, endT) >= global.maxUnaligned)
continue;
// done with all the checks, lets go over all consistent phrase pairs
// start point of source phrase may retreat over unaligned
for(int startS=minS;
(startS>=0 &&
startS>maxS - maxSpan && // within length limit
(startS==minS || alignedCountS[startS]==0)); // unaligned
startS--)
{
// end point of source phrase may advance over unaligned
for(int endS=maxS;
(endS<countS && endS<startS + maxSpan && // within length limit
(endS==maxS || alignedCountS[endS]==0)); // unaligned
endS++)
{
if (m_tunnelCollection->NumUnalignedWord(0, startS, endS) >= global.maxUnaligned)
continue;
// take note that this is a valid phrase alignment
m_tunnelCollection->Add(startS, endS, startT, endT);
}
}
}
}
//cerr << *tunnelCollection << endl;
}
void SentenceAlignment::CreateLattice(const Global &global)
{
size_t countS = source.size();
m_lattice = new Lattice(countS);
for (size_t startPos = 0; startPos < countS; ++startPos)
{
//cerr << "creating arcs for " << startPos << "=";
m_lattice->CreateArcs(startPos, *m_tunnelCollection, *this, global);
//cerr << LatticeNode::s_count << endl;
}
}
void SentenceAlignment::CreateRules(const Global &global)
{
size_t countS = source.size();
for (size_t startPos = 0; startPos < countS; ++startPos)
{
//cerr << "creating rules for " << startPos << "\n";
m_lattice->CreateRules(startPos, *this, global);
}
}
void OutputSentenceStr(std::ostream &out, const std::vector<std::string> &vec)
{
for (size_t pos = 0; pos < vec.size(); ++pos)
{
out << vec[pos] << " ";
}
}
std::ostream& operator<<(std::ostream &out, const SentenceAlignment &obj)
{
OutputSentenceStr(out, obj.target);
out << " ==> ";
OutputSentenceStr(out, obj.source);
out << endl;
out << *obj.m_tunnelCollection;
if (obj.m_lattice)
out << endl << *obj.m_lattice;
return out;
}

View File

@ -1,69 +0,0 @@
#pragma once
/*
* SentenceAlignment.h
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include <cassert>
#include <iostream>
#include "SyntaxTree.h"
#include "Global.h"
#include "Range.h"
class TunnelCollection;
class Lattice;
class SentenceAlignment
{
friend std::ostream& operator<<(std::ostream&, const SentenceAlignment&);
public:
std::vector<std::string> target;
std::vector<std::string> source;
std::vector<int> alignedCountS;
std::vector< std::vector<int> > alignedToT;
SyntaxTree sourceTree, targetTree;
//typedef std::vector<Range> Inner;
//typedef std::vector<Inner> Outer;
//Outer m_s2tTightest, m_t2sTightest;
SentenceAlignment();
~SentenceAlignment();
int Create(const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global);
// void clear() { delete(alignment); };
void FindTunnels( const Global &global ) ;
void CreateLattice(const Global &global);
void CreateRules(const Global &global);
const TunnelCollection &GetTunnelCollection() const
{
assert(m_tunnelCollection);
return *m_tunnelCollection;
}
const Lattice &GetLattice() const
{
assert(m_lattice);
return *m_lattice;
}
protected:
TunnelCollection *m_tunnelCollection;
Lattice *m_lattice;
/*
void CalcTightestSpan(Outer &tightest);
void InitTightest(Outer &tightest, size_t len);
Range &GetTightest(Outer &tightest, size_t startPos, size_t endPos);
void SetAlignment(size_t source, size_t target);
void SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos);
*/
};

View File

@ -1,101 +0,0 @@
/*
* Symbol.cpp
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <cassert>
#include "Symbol.h"
using namespace std;
Symbol::Symbol(const std::string &label, size_t pos)
:m_label(label)
,m_isTerminal(true)
,m_span(2)
{
m_span[0].first = pos;
}
Symbol::Symbol(const std::string &labelS, const std::string &labelT
, size_t startS, size_t endS
, size_t startT, size_t endT
, bool isSourceSyntax, bool isTargetSyntax)
:m_label(labelS)
,m_labelT(labelT)
,m_isTerminal(false)
,m_span(2)
,m_isSourceSyntax(isSourceSyntax)
,m_isTargetSyntax(isTargetSyntax)
{
m_span[0] = std::pair<size_t, size_t>(startS, endS);
m_span[1] = std::pair<size_t, size_t>(startT, endT);
}
int CompareNonTerm(bool thisIsSyntax, bool otherIsSyntax
, const std::pair<size_t, size_t> &thisSpan, const std::pair<size_t, size_t> &otherSpan
, std::string thisLabel, std::string otherLabel)
{
if (thisIsSyntax != otherIsSyntax)
{ // 1 is [X] & the other is [NP] on the source
return thisIsSyntax ? -1 : +1;
}
assert(thisIsSyntax == otherIsSyntax);
if (thisIsSyntax)
{ // compare span & label
if (thisSpan != otherSpan)
return thisSpan < otherSpan ? -1 : +1;
if (thisLabel != otherLabel)
return thisLabel < otherLabel ? -1 : +1;
}
return 0;
}
int Symbol::Compare(const Symbol &other) const
{
if (m_isTerminal != other.m_isTerminal)
return m_isTerminal ? -1 : +1;
assert(m_isTerminal == other.m_isTerminal);
if (m_isTerminal)
{ // compare labels & pos
if (m_span[0].first != other.m_span[0].first)
return (m_span[0].first < other.m_span[0].first) ? -1 : +1;
if (m_label != other.m_label)
return (m_label < other.m_label) ? -1 : +1;
}
else
{ // non terms
int ret = CompareNonTerm(m_isSourceSyntax, other.m_isSourceSyntax
,m_span[0], other.m_span[0]
,m_label, other.m_label);
if (ret != 0)
return ret;
ret = CompareNonTerm(m_isTargetSyntax, other.m_isTargetSyntax
,m_span[1], other.m_span[1]
,m_label, other.m_label);
if (ret != 0)
return ret;
}
return 0;
}
std::ostream& operator<<(std::ostream &out, const Symbol &obj)
{
if (obj.m_isTerminal)
out << obj.m_label;
else
out << obj.m_label + obj.m_labelT;
return out;
}

View File

@ -1,36 +0,0 @@
#pragma once
/*
* Symbol.h
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <string>
#include <iostream>
#include <vector>
class Symbol
{
friend std::ostream& operator<<(std::ostream &out, const Symbol &obj);
protected:
std::string m_label, m_labelT; // m_labelT only for non-term
std::vector<std::pair<size_t, size_t> > m_span;
bool m_isTerminal, m_isSourceSyntax, m_isTargetSyntax;
public:
// for terminals
Symbol(const std::string &label, size_t pos);
// for non-terminals
Symbol(const std::string &labelS, const std::string &labelT
, size_t startS, size_t endS
, size_t startT, size_t endT
, bool isSourceSyntax, bool isTargetSyntax);
int Compare(const Symbol &other) const;
};

View File

@ -1,56 +0,0 @@
/*
* SymbolSequence.cpp
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <cassert>
#include <sstream>
#include "SymbolSequence.h"
using namespace std;
int SymbolSequence::Compare(const SymbolSequence &other) const
{
int ret;
size_t thisSize = GetSize();
size_t otherSize = other.GetSize();
if (thisSize != otherSize)
{
ret = (thisSize < otherSize) ? -1 : +1;
return ret;
}
else
{
assert(thisSize == otherSize);
for (size_t ind = 0; ind < thisSize; ++ind)
{
const Symbol &thisSymbol = GetSymbol(ind);
const Symbol &otherSymbol = other.GetSymbol(ind);
ret = thisSymbol.Compare(otherSymbol);
if (ret != 0)
{
return ret;
}
}
}
assert(ret == 0);
return ret;
}
std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj)
{
SymbolSequence::CollType::const_iterator iterSymbol;
for (iterSymbol = obj.m_coll.begin(); iterSymbol != obj.m_coll.end(); ++iterSymbol)
{
const Symbol &symbol = *iterSymbol;
out << symbol << " ";
}
return out;
}

View File

@ -1,42 +0,0 @@
#pragma once
/*
* SymbolSequence.h
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <iostream>
#include <vector>
#include "Symbol.h"
class SymbolSequence
{
friend std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj);
protected:
typedef std::vector<Symbol> CollType;
CollType m_coll;
public:
typedef CollType::iterator iterator;
typedef CollType::const_iterator const_iterator;
const_iterator begin() const { return m_coll.begin(); }
const_iterator end() const { return m_coll.end(); }
void Add(const Symbol &symbol)
{
m_coll.push_back(symbol);
}
size_t GetSize() const
{ return m_coll.size(); }
const Symbol &GetSymbol(size_t ind) const
{ return m_coll[ind]; }
void Clear()
{ m_coll.clear(); }
int Compare(const SymbolSequence &other) const;
};

View File

@ -1,245 +1,47 @@
// $Id: SyntaxTree.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <cassert>
#include <iostream>
#include "SyntaxTree.h"
//#include "extract.h"
#include "Global.h"
//extern const Global g_debug;
extern const Global *g_global;
#include "Parameter.h"
using namespace std;
bool SyntaxNode::IsSyntax() const
void SyntaxTree::Add(int startPos, int endPos, const std::string &label, const Parameter &params)
{
bool ret = GetLabel() != "[X]";
return ret;
}
//cerr << "add " << label << " to " << "[" << startPos << "-" << endPos << "]" << endl;
SyntaxTree::SyntaxTree()
:m_defaultLHS(0,0, "[X]")
{
m_emptyNode.clear();
}
Range range(startPos, endPos);
Labels &labels = m_coll[range];
SyntaxTree::~SyntaxTree()
{
// loop through all m_nodes, delete them
for(int i=0; i<m_nodes.size(); i++)
{
delete m_nodes[i];
}
}
bool HasDuplicates(const SyntaxNodes &nodes)
{
string prevLabel;
SyntaxNodes::const_iterator iter;
for (iter = nodes.begin(); iter != nodes.end(); ++iter)
{
const SyntaxNode &node = **iter;
string label = node.GetLabel();
if (label == prevLabel)
return true;
}
return false;
}
void SyntaxTree::AddNode( int startPos, int endPos, std::string label )
{
SyntaxNode* newNode = new SyntaxNode( startPos, endPos, "[" + label + "]");
m_nodes.push_back( newNode );
SyntaxNodes &nodesChart = m_index[ startPos ][ endPos ];
if (!g_global->uppermostOnly)
{
nodesChart.push_back( newNode );
//assert(!HasDuplicates(m_index[ startPos ][ endPos ]));
}
else
{
if (nodesChart.size() > 0)
{
assert(nodesChart.size() == 1);
//delete nodes[0];
nodesChart.resize(0);
bool add = true;
if (labels.size()) {
if (params.multiLabel == 1) {
// delete the label in collection and add new
assert(labels.size() == 1);
labels.clear();
}
assert(nodesChart.size() == 0);
nodesChart.push_back( newNode );
}
}
ParentNodes SyntaxTree::Parse() {
ParentNodes parents;
int size = m_index.size();
// looping through all spans of size >= 2
for( int length=2; length<=size; length++ )
{
for( int startPos = 0; startPos <= size-length; startPos++ )
{
if (HasNode( startPos, startPos+length-1 ))
{
// processing one (parent) span
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
SplitPoints splitPoints;
splitPoints.push_back( startPos );
//std::cerr << " " << startPos;
int first = 1;
int covered = 0;
while( covered < length )
{
// find largest covering subspan (child)
// starting at last covered position
for( int midPos=length-first; midPos>covered; midPos-- )
{
if( HasNode( startPos+covered, startPos+midPos-1 ) )
{
covered = midPos;
splitPoints.push_back( startPos+covered );
// std::cerr << " " << ( startPos+covered );
first = 0;
}
}
}
// std::cerr << std::endl;
parents.push_back( splitPoints );
}
else if (params.multiLabel == 2) {
// ignore this label
add = false;
}
}
return parents;
}
bool SyntaxTree::HasNode( int startPos, int endPos ) const
{
return GetNodes( startPos, endPos).size() > 0;
}
const SyntaxNodes &SyntaxTree::GetNodes( int startPos, int endPos ) const
{
SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
if (startIndex == m_index.end() )
return m_emptyNode;
SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
if (endIndex == startIndex->second.end())
return m_emptyNode;
return endIndex->second;
}
// for printing out tree
std::string SyntaxTree::ToString() const
{
std::stringstream out;
out << *this;
return out.str();
}
void SyntaxTree::AddDefaultNonTerms(size_t phraseSize)
{
for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
{
for (size_t endPos = startPos; endPos < phraseSize; ++endPos)
{
AddNode(startPos, endPos, "X");
}
if (add) {
labels.push_back(label);
}
}
void SyntaxTree::AddDefaultNonTerms(bool isSyntax, bool mixed, size_t phraseSize)
void SyntaxTree::AddToAll(const std::string &label)
{
if (isSyntax)
{
AddDefaultNonTerms(!mixed, phraseSize);
}
else
{ // add X everywhere
AddDefaultNonTerms(phraseSize);
Coll::iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
Labels &labels = iter->second;
labels.push_back(label);
}
}
void SyntaxTree::AddDefaultNonTerms(bool addEverywhere, size_t phraseSize)
const SyntaxTree::Labels &SyntaxTree::Find(int startPos, int endPos) const
{
//cerr << "GetNumWords()=" << GetNumWords() << endl;
//assert(phraseSize == GetNumWords() || GetNumWords() == 1); // 1 if syntax sentence doesn't have any xml. TODO fix syntax tree obj
for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
{
for (size_t endPos = startPos; endPos <= phraseSize; ++endPos)
{
const SyntaxNodes &nodes = GetNodes(startPos, endPos);
if (!addEverywhere && nodes.size() > 0)
{ // only add if no label
continue;
}
AddNode(startPos, endPos, "X");
}
}
Coll::const_iterator iter;
iter = m_coll.find(Range(startPos, endPos));
return (iter == m_coll.end()) ? m_defaultLabels : iter->second;
}
const SyntaxNodes SyntaxTree::GetNodesForLHS( int startPos, int endPos ) const
{
SyntaxNodes ret(GetNodes(startPos, endPos));
if (ret.size() == 0)
ret.push_back(&m_defaultLHS);
return ret;
}
std::ostream& operator<<(std::ostream& os, const SyntaxTree& t)
{
int size = t.m_index.size();
for(size_t length=1; length<=size; length++)
{
for(size_t space=0; space<length; space++)
{
os << " ";
}
for(size_t start=0; start<=size-length; start++)
{
if (t.HasNode( start, start+(length-1) ))
{
std::string label = t.GetNodes( start, start+(length-1) )[0]->GetLabel() + "#######";
os << label.substr(0,7) << " ";
}
else
{
os << "------- ";
}
}
os << std::endl;
}
return os;
}

View File

@ -1,96 +1,32 @@
#pragma once
// $Id: SyntaxTree.h 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <string>
class SyntaxNode;
class Parameter;
typedef std::vector<const SyntaxNode*> SyntaxNodes;
class SyntaxNode {
protected:
int m_start, m_end;
std::string m_label;
SyntaxNodes m_children;
SyntaxNode* m_parent;
class SyntaxTree
{
public:
SyntaxNode( int startPos, int endPos, const std::string &label)
:m_start(startPos)
,m_end(endPos)
,m_label(label)
{}
int GetStart() const
{ return m_start; }
int GetEnd() const
{ return m_end; }
const std::string &GetLabel() const
{ return m_label; }
bool IsSyntax() const;
typedef std::pair<int, int> Range;
typedef std::vector<std::string> Labels;
typedef std::map<Range, Labels> Coll;
void Add(int startPos, int endPos, const std::string &label, const Parameter &params);
void AddToAll(const std::string &label);
const Labels &Find(int startPos, int endPos) const;
void SetHieroLabel(const std::string &label) {
m_defaultLabels.push_back(label);
}
protected:
Coll m_coll;
Labels m_defaultLabels;
};
typedef std::vector< int > SplitPoints;
typedef std::vector< SplitPoints > ParentNodes;
class SyntaxTree {
protected:
SyntaxNodes m_nodes;
SyntaxNode* m_top;
SyntaxNode m_defaultLHS;
typedef std::map< int, SyntaxNodes > SyntaxTreeIndex2;
typedef SyntaxTreeIndex2::const_iterator SyntaxTreeIndexIterator2;
typedef std::map< int, SyntaxTreeIndex2 > SyntaxTreeIndex;
typedef SyntaxTreeIndex::const_iterator SyntaxTreeIndexIterator;
SyntaxTreeIndex m_index;
SyntaxNodes m_emptyNode;
friend std::ostream& operator<<(std::ostream&, const SyntaxTree&);
public:
SyntaxTree();
~SyntaxTree();
void AddNode( int startPos, int endPos, std::string label );
ParentNodes Parse();
bool HasNode( int startPos, int endPos ) const;
const SyntaxNodes &GetNodes( int startPos, int endPos ) const;
const SyntaxNodes &GetAllNodes() const { return m_nodes; } ;
size_t GetNumWords() const { return m_index.size(); }
std::string ToString() const;
void AddDefaultNonTerms(bool isSyntax, bool addEverywhere, size_t phraseSize);
void AddDefaultNonTerms(bool mixed, size_t phraseSize);
void AddDefaultNonTerms(size_t phraseSize);
const SyntaxNodes GetNodesForLHS( int startPos, int endPos ) const;
};
std::ostream& operator<<(std::ostream&, const SyntaxTree&);

View File

@ -1,38 +0,0 @@
/*
* Tunnel.cpp
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "Tunnel.h"
int Tunnel::Compare(const Tunnel &other) const
{
int ret = m_sourceRange.Compare(other.m_sourceRange);
if (ret != 0)
return ret;
ret = m_targetRange.Compare(other.m_targetRange);
return ret;
}
int Tunnel::Compare(const Tunnel &other, size_t direction) const
{
const Range &thisRange = (direction == 0) ? m_sourceRange : m_targetRange;
const Range &otherRange = (direction == 0) ? other.m_sourceRange : other.m_targetRange;
int ret = thisRange.Compare(otherRange);
return ret;
}
std::ostream& operator<<(std::ostream &out, const Tunnel &tunnel)
{
out << tunnel.m_sourceRange << "==>" << tunnel.m_targetRange;
return out;
}

View File

@ -1,49 +0,0 @@
#pragma once
/*
* Tunnel.h
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include <cassert>
#include <string>
#include <iostream>
#include "Range.h"
// for unaligned source terminal
class Tunnel
{
friend std::ostream& operator<<(std::ostream&, const Tunnel&);
protected:
Range m_sourceRange, m_targetRange;
public:
Tunnel()
{}
Tunnel(const Tunnel &copy)
:m_sourceRange(copy.m_sourceRange)
,m_targetRange(copy.m_targetRange)
{}
Tunnel(const Range &sourceRange, const Range &targetRange)
:m_sourceRange(sourceRange)
,m_targetRange(targetRange)
{}
const Range &GetRange(size_t direction) const
{ return (direction == 0) ? m_sourceRange : m_targetRange; }
int Compare(const Tunnel &other) const;
int Compare(const Tunnel &other, size_t direction) const;
};
typedef std::vector<Tunnel> TunnelList;

View File

@ -1,70 +0,0 @@
/*
* TunnelCollection.cpp
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "TunnelCollection.h"
#include "Range.h"
using namespace std;
size_t TunnelCollection::NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const
{
assert(startPos <= endPos);
if (direction == 0)
assert(endPos < alignedCountS.size());
else
assert(endPos < alignedCountT.size());
size_t ret = 0;
for (size_t ind = startPos; ind <= endPos; ++ind)
{
if (direction == 0 && alignedCountS[ind] == 0)
{
ret++;
}
else if (direction == 1 && alignedCountT[ind] == 0)
{
ret++;
}
}
return ret;
}
void TunnelCollection::Add(int startS, int endS, int startT, int endT)
{
// m_phraseExist[startS][endS - startS].push_back(Tunnel(startT, endT));
m_coll[startS][endS - startS].push_back(Tunnel(Range(startS, endS), Range(startT, endT)));
}
std::ostream& operator<<(std::ostream &out, const TunnelCollection &TunnelCollection)
{
size_t size = TunnelCollection.GetSize();
for (size_t startPos = 0; startPos < size; ++startPos)
{
for (size_t endPos = startPos; endPos < size; ++endPos)
{
const TunnelList &tunnelList = TunnelCollection.GetTunnels(startPos, endPos);
TunnelList::const_iterator iter;
for (iter = tunnelList.begin(); iter != tunnelList.end(); ++iter)
{
const Tunnel &tunnel = *iter;
out << tunnel << " ";
}
}
}
return out;
}

View File

@ -1,61 +0,0 @@
#pragma once
/*
* TunnelCollection.h
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include "Tunnel.h"
// reposity of extracted phrase pairs
// which are potential tunnels in larger phrase pairs
class TunnelCollection
{
friend std::ostream& operator<<(std::ostream&, const TunnelCollection&);
protected:
std::vector< std::vector<TunnelList> > m_coll;
// indexed by source pos. and source length
// maps to list of tunnels where <int, int> are target pos
public:
std::vector<int> alignedCountS, alignedCountT;
TunnelCollection(const TunnelCollection &);
TunnelCollection(size_t size)
:m_coll(size)
{
// size is the length of the source sentence
for (size_t pos = 0; pos < size; ++pos)
{
// create empty tunnel lists
std::vector<TunnelList> &endVec = m_coll[pos];
endVec.resize(size - pos);
}
}
void Add(int startS, int endS, int startT, int endT);
//const TunnelList &GetTargetHoles(int startS, int endS) const
//{
// const TunnelList &targetHoles = m_phraseExist[startS][endS - startS];
// return targetHoles;
//}
const TunnelList &GetTunnels(int startS, int endS) const
{
const TunnelList &sourceHoles = m_coll[startS][endS - startS];
return sourceHoles;
}
const size_t GetSize() const
{ return m_coll.size(); }
size_t NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const;
};

View File

@ -0,0 +1,56 @@
/*
* Word.cpp
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#include <limits>
#include "Word.h"
using namespace std;
Word::Word(int pos, const std::string &str)
:m_pos(pos)
,m_str(str)
{
// TODO Auto-generated constructor stub
}
Word::~Word() {
// TODO Auto-generated destructor stub
}
void Word::AddAlignment(const Word *other)
{
m_alignment.insert(other);
}
std::set<int> Word::GetAlignmentIndex() const
{
std::set<int> ret;
std::set<const Word *>::const_iterator iter;
for (iter = m_alignment.begin(); iter != m_alignment.end(); ++iter) {
const Word &otherWord = **iter;
int otherPos = otherWord.GetPos();
ret.insert(otherPos);
}
return ret;
}
void Word::Output(std::ostream &out) const
{
out << m_str;
}
std::string Word::Debug() const
{
return m_str;
}
int Word::CompareString(const Word &other) const
{
return m_str.compare(other.m_str);
}

View File

@ -0,0 +1,47 @@
/*
* Word.h
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#pragma once
#include <string>
#include <set>
#include "RuleSymbol.h"
// a terminal
class Word : public RuleSymbol
{
public:
Word(const Word&); // do not implement
Word(int pos, const std::string &str);
virtual ~Word();
virtual bool IsNonTerm() const
{ return false; }
std::string GetString() const
{ return m_str; }
int GetPos() const
{ return m_pos; }
void AddAlignment(const Word *other);
const std::set<const Word *> &GetAlignment() const
{ return m_alignment; }
std::set<int> GetAlignmentIndex() const;
void Output(std::ostream &out) const;
std::string Debug() const;
int CompareString(const Word &other) const;
protected:
int m_pos; // original position in sentence, NOT in lattice
std::string m_str;
std::set<const Word *> m_alignment;
};

View File

@ -1,344 +0,0 @@
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <vector>
#include <string>
#include <set>
#include <iostream>
#include <stdlib.h>
#include "SyntaxTree.h"
using namespace std;
inline std::vector<std::string> Tokenize(const std::string& str,
const std::string& delimiters = " \t")
{
std::vector<std::string> tokens;
// Skip delimiters at beginning.
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first "non-delimiter".
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
while (std::string::npos != pos || std::string::npos != lastPos)
{
// Found a token, add it to the vector.
tokens.push_back(str.substr(lastPos, pos - lastPos));
// Skip delimiters. Note the "not_of"
lastPos = str.find_first_not_of(delimiters, pos);
// Find next "non-delimiter"
pos = str.find_first_of(delimiters, lastPos);
}
return tokens;
}
const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r")
{
std::string res = str;
res.erase(str.find_last_not_of(dropChars)+1);
return res.erase(0, res.find_first_not_of(dropChars));
}
string ParseXmlTagAttribute(const string& tag,const string& attributeName){
/*TODO deal with unescaping \"*/
string tagOpen = attributeName + "=\"";
size_t contentsStart = tag.find(tagOpen);
if (contentsStart == string::npos) return "";
contentsStart += tagOpen.size();
size_t contentsEnd = tag.find_first_of('"',contentsStart+1);
if (contentsEnd == string::npos) {
cerr << "Malformed XML attribute: "<< tag;
return "";
}
size_t possibleEnd;
while (tag.at(contentsEnd-1) == '\\' && (possibleEnd = tag.find_first_of('"',contentsEnd+1)) != string::npos) {
contentsEnd = possibleEnd;
}
return tag.substr(contentsStart,contentsEnd-contentsStart);
}
/**
* Remove "<" and ">" from XML tag
*
* \param str xml token to be stripped
*/
string TrimXml(const string& str)
{
// too short to be xml token -> do nothing
if (str.size() < 2) return str;
// strip first and last character
if (str[0] == '<' && str[str.size() - 1] == '>')
{
return str.substr(1, str.size() - 2);
}
// not an xml token -> do nothing
else { return str; }
}
/**
* Check if the token is an XML tag, i.e. starts with "<"
*
* \param tag token to be checked
*/
bool isXmlTag(const string& tag)
{
return tag[0] == '<';
}
/**
* Split up the input character string into tokens made up of
* either XML tags or text.
* example: this <b> is a </b> test .
* => (this ), (<b>), ( is a ), (</b>), ( test .)
*
* \param str input string
*/
inline vector<string> TokenizeXml(const string& str)
{
string lbrack = "<";
string rbrack = ">";
vector<string> tokens; // vector of tokens to be returned
string::size_type cpos = 0; // current position in string
string::size_type lpos = 0; // left start of xml tag
string::size_type rpos = 0; // right end of xml tag
// walk thorugh the string (loop vver cpos)
while (cpos != str.size())
{
// find the next opening "<" of an xml tag
lpos = str.find_first_of(lbrack, cpos);
if (lpos != string::npos)
{
// find the end of the xml tag
rpos = str.find_first_of(rbrack, lpos);
// sanity check: there has to be closing ">"
if (rpos == string::npos)
{
cerr << "ERROR: malformed XML: " << str << endl;
return tokens;
}
}
else // no more tags found
{
// add the rest as token
tokens.push_back(str.substr(cpos));
break;
}
// add stuff before xml tag as token, if there is any
if (lpos - cpos > 0)
tokens.push_back(str.substr(cpos, lpos - cpos));
// add xml tag as token
tokens.push_back(str.substr(lpos, rpos-lpos+1));
cpos = rpos + 1;
}
return tokens;
}
/**
* Process a sentence with xml annotation
* Xml tags may specifiy additional/replacing translation options
* and reordering constraints
*
* \param line in: sentence, out: sentence without the xml
* \param res vector with translation options specified by xml
* \param reorderingConstraint reordering constraint zones specified by xml
* \param walls reordering constraint walls specified by xml
*/
/*TODO: we'd only have to return a vector of XML options if we dropped linking. 2-d vector
is so we can link things up afterwards. We can't create TranslationOptions as we
parse because we don't have the completed source parsed until after this function
removes all the markup from it (CreateFromString in Sentence::Read).
*/
bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &labelCollection, map< string, int > &topLabelCollection ) {
//parse XML markup in translation line
// no xml tag? we're done.
if (line.find_first_of('<') == string::npos) { return true; }
// break up input into a vector of xml tags and text
// example: (this), (<b>), (is a), (</b>), (test .)
vector<string> xmlTokens = TokenizeXml(line);
// we need to store opened tags, until they are closed
// tags are stored as tripled (tagname, startpos, contents)
typedef pair< string, pair< size_t, string > > OpenedTag;
vector< OpenedTag > tagStack; // stack that contains active opened tags
string cleanLine; // return string (text without xml)
size_t wordPos = 0; // position in sentence (in terms of number of words)
bool isLinked = false;
// loop through the tokens
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++)
{
// not a xml tag, but regular text (may contain many words)
if(!isXmlTag(xmlTokens[xmlTokenPos]))
{
// add a space at boundary, if necessary
if (cleanLine.size()>0 &&
cleanLine[cleanLine.size() - 1] != ' ' &&
xmlTokens[xmlTokenPos][0] != ' ')
{
cleanLine += " ";
}
cleanLine += xmlTokens[xmlTokenPos]; // add to output
wordPos = Tokenize(cleanLine).size(); // count all the words
}
// process xml tag
else
{
// *** get essential information about tag ***
// strip extra boundary spaces and "<" and ">"
string tag = Trim(TrimXml(xmlTokens[xmlTokenPos]));
// cerr << "XML TAG IS: " << tag << std::endl;
if (tag.size() == 0)
{
cerr << "ERROR: empty tag name: " << line << endl;
return false;
}
// check if unary (e.g., "<wall/>")
bool isUnary = ( tag[tag.size() - 1] == '/' );
// check if opening tag (e.g. "<a>", not "</a>")g
bool isClosed = ( tag[0] == '/' );
bool isOpen = !isClosed;
if (isClosed && isUnary)
{
cerr << "ERROR: can't have both closed and unary tag <" << tag << ">: " << line << endl;
return false;
}
if (isClosed)
tag = tag.substr(1); // remove "/" at the beginning
if (isUnary)
tag = tag.substr(0,tag.size()-1); // remove "/" at the end
// find the tag name and contents
string::size_type endOfName = tag.find_first_of(' ');
string tagName = tag;
string tagContent = "";
if (endOfName != string::npos) {
tagName = tag.substr(0,endOfName);
tagContent = tag.substr(endOfName+1);
}
// *** process new tag ***
if (isOpen || isUnary)
{
// put the tag on the tag stack
OpenedTag openedTag = make_pair( tagName, make_pair( wordPos, tagContent ) );
tagStack.push_back( openedTag );
// cerr << "XML TAG " << tagName << " (" << tagContent << ") added to stack, now size " << tagStack.size() << endl;
}
// *** process completed tag ***
if (isClosed || isUnary)
{
// pop last opened tag from stack;
if (tagStack.size() == 0)
{
cerr << "ERROR: tag " << tagName << " closed, but not opened" << ":" << line << endl;
return false;
}
OpenedTag openedTag = tagStack.back();
tagStack.pop_back();
// tag names have to match
if (openedTag.first != tagName)
{
cerr << "ERROR: tag " << openedTag.first << " closed by tag " << tagName << ": " << line << endl;
return false;
}
// assemble remaining information about tag
size_t startPos = openedTag.second.first;
string tagContent = openedTag.second.second;
size_t endPos = wordPos;
// span attribute overwrites position
string span = ParseXmlTagAttribute(tagContent,"span");
if (! span.empty())
{
vector<string> ij = Tokenize(span, "-");
if (ij.size() != 1 && ij.size() != 2) {
cerr << "ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl;
return false;
}
startPos = atoi(ij[0].c_str());
if (ij.size() == 1) endPos = startPos + 1;
else endPos = atoi(ij[1].c_str()) + 1;
}
// cerr << "XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl;
if (startPos >= endPos)
{
cerr << "ERROR: tag " << tagName << " must span at least one word (" << startPos << "-" << endPos << "): " << line << endl;
return false;
}
string label = ParseXmlTagAttribute(tagContent,"label");
labelCollection.insert( label );
// report what we have processed so far
if (0) {
cerr << "XML TAG NAME IS: '" << tagName << "'" << endl;
cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
}
tree.AddNode( startPos, endPos-1, label );
}
}
}
// we are done. check if there are tags that are still open
if (tagStack.size() > 0)
{
cerr << "ERROR: some opened tags were never closed: " << line << endl;
return false;
}
// collect top labels
const SyntaxNodes &topNodes = tree.GetNodes( 0, wordPos-1 );
for( SyntaxNodes::const_iterator node = topNodes.begin(); node != topNodes.end(); node++ )
{
const SyntaxNode *n = *node;
const string &label = n->GetLabel();
if (topLabelCollection.find( label ) == topLabelCollection.end())
topLabelCollection[ label ] = 0;
topLabelCollection[ label ]++;
}
// return de-xml'ed sentence in line
line = cleanLine;
return true;
}

View File

@ -1,35 +0,0 @@
#pragma once
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <string>
#include <vector>
#include <set>
#include <map>
#include "SyntaxTree.h"
std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
std::string TrimXml(const std::string& str);
bool isXmlTag(const std::string& tag);
inline std::vector<std::string> TokenizeXml(const std::string& str);
bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection );

View File

@ -1,310 +0,0 @@
// $Id: extract.cpp 2828 2010-02-01 16:07:58Z hieuhoang1972 $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <cstdio>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <cstring>
#include <sstream>
#include <iostream>
#include "extract.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "Lattice.h"
#ifdef WIN32
// Include Visual Leak Detector
#include <vld.h>
#endif
using namespace std;
void writeGlueGrammar(const string &, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection);
int main(int argc, char* argv[])
{
cerr << "Extract v2.0, written by Philipp Koehn\n"
<< "rule extraction from an aligned parallel corpus\n";
//time_t starttime = time(NULL);
Global *global = new Global();
g_global = global;
int sentenceOffset = 0;
if (argc < 5) {
cerr << "syntax: extract-mixed-syntax corpus.target corpus.source corpus.align extract "
<< " [ --Hierarchical | --Orientation"
<< " | --GlueGrammar FILE | --UnknownWordLabel FILE"
<< " | --OnlyDirect"
<< " | --MinHoleSpanSourceDefault[" << global->minHoleSpanSourceDefault << "]"
<< " | --MaxHoleSpanSourceDefault[" << global->maxHoleSpanSourceDefault << "]"
<< " | --MinHoleSpanSourceSyntax[" << global->minHoleSpanSourceSyntax << "]"
<< " | --MaxHoleSpanSourceSyntax[" << global->maxHoleSpanSourceSyntax << "]"
<< " | --MaxSymbols[" << global->maxSymbols<< "]"
<< " | --MaxNonTerm[" << global->maxNonTerm << "]"
<< " | --SourceSyntax | --TargetSyntax"
<< " | --UppermostOnly[" << g_global->uppermostOnly << "]"
<< endl;
exit(1);
}
char* &fileNameT = argv[1];
char* &fileNameS = argv[2];
char* &fileNameA = argv[3];
string fileNameGlueGrammar;
string fileNameUnknownWordLabel;
string fileNameExtract = string(argv[4]);
int optionInd = 5;
for(int i=optionInd;i<argc;i++)
{
if (strcmp(argv[i],"--MinHoleSpanSourceDefault") == 0) {
global->minHoleSpanSourceDefault = atoi(argv[++i]);
if (global->minHoleSpanSourceDefault < 1) {
cerr << "extract error: --minHoleSourceDefault should be at least 1" << endl;
exit(1);
}
}
else if (strcmp(argv[i],"--MaxHoleSpanSourceDefault") == 0) {
global->maxHoleSpanSourceDefault = atoi(argv[++i]);
if (global->maxHoleSpanSourceDefault < 1) {
cerr << "extract error: --maxHoleSourceDefault should be at least 1" << endl;
exit(1);
}
}
else if (strcmp(argv[i],"--MinHoleSpanSourceSyntax") == 0) {
global->minHoleSpanSourceSyntax = atoi(argv[++i]);
if (global->minHoleSpanSourceSyntax < 1) {
cerr << "extract error: --minHoleSourceSyntax should be at least 1" << endl;
exit(1);
}
}
else if (strcmp(argv[i],"--UppermostOnly") == 0) {
global->uppermostOnly = atoi(argv[++i]);
}
else if (strcmp(argv[i],"--MaxHoleSpanSourceSyntax") == 0) {
global->maxHoleSpanSourceSyntax = atoi(argv[++i]);
if (global->maxHoleSpanSourceSyntax < 1) {
cerr << "extract error: --maxHoleSourceSyntax should be at least 1" << endl;
exit(1);
}
}
// maximum number of words in hierarchical phrase
else if (strcmp(argv[i],"--maxSymbols") == 0) {
global->maxSymbols = atoi(argv[++i]);
if (global->maxSymbols < 1) {
cerr << "extract error: --maxSymbols should be at least 1" << endl;
exit(1);
}
}
// maximum number of non-terminals
else if (strcmp(argv[i],"--MaxNonTerm") == 0) {
global->maxNonTerm = atoi(argv[++i]);
if (global->maxNonTerm < 1) {
cerr << "extract error: --MaxNonTerm should be at least 1" << endl;
exit(1);
}
}
// allow consecutive non-terminals (X Y | X Y)
else if (strcmp(argv[i],"--TargetSyntax") == 0) {
global->targetSyntax = true;
}
else if (strcmp(argv[i],"--SourceSyntax") == 0) {
global->sourceSyntax = true;
}
// do not create many part00xx files!
else if (strcmp(argv[i],"--NoFileLimit") == 0) {
// now default
}
else if (strcmp(argv[i],"--GlueGrammar") == 0) {
global->glueGrammarFlag = true;
if (++i >= argc)
{
cerr << "ERROR: Option --GlueGrammar requires a file name" << endl;
exit(0);
}
fileNameGlueGrammar = string(argv[i]);
cerr << "creating glue grammar in '" << fileNameGlueGrammar << "'" << endl;
}
else if (strcmp(argv[i],"--UnknownWordLabel") == 0) {
global->unknownWordLabelFlag = true;
if (++i >= argc)
{
cerr << "ERROR: Option --UnknownWordLabel requires a file name" << endl;
exit(0);
}
fileNameUnknownWordLabel = string(argv[i]);
cerr << "creating unknown word labels in '" << fileNameUnknownWordLabel << "'" << endl;
}
// TODO: this should be a useful option
//else if (strcmp(argv[i],"--ZipFiles") == 0) {
// zipFiles = true;
//}
// if an source phrase is paired with two target phrases, then count(t|s) = 0.5
else if (strcmp(argv[i],"--Mixed") == 0) {
global->mixed = true;
}
else if (strcmp(argv[i],"--AllowDefaultNonTermEdge") == 0) {
global->allowDefaultNonTermEdge = atoi(argv[++i]);
}
else if (strcmp(argv[i], "--GZOutput") == 0) {
global->gzOutput = true;
}
else if (strcmp(argv[i],"--MaxSpan") == 0) {
// ignore
++i;
}
else if (strcmp(argv[i],"--SentenceOffset") == 0) {
if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
exit(1);
}
sentenceOffset = atoi(argv[++i]);
}
else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
}
}
// open input files
Moses::InputFileStream tFile(fileNameT);
Moses::InputFileStream sFile(fileNameS);
Moses::InputFileStream aFile(fileNameA);
// open output files
string fileNameExtractInv = fileNameExtract + ".inv";
if (global->gzOutput) {
fileNameExtract += ".gz";
fileNameExtractInv += ".gz";
}
Moses::OutputFileStream extractFile;
Moses::OutputFileStream extractFileInv;
extractFile.Open(fileNameExtract.c_str());
extractFileInv.Open(fileNameExtractInv.c_str());
// loop through all sentence pairs
int i = sentenceOffset;
while(true) {
i++;
if (i % 1000 == 0) {
cerr << i << " " << flush;
}
string targetString;
string sourceString;
string alignmentString;
bool ok = getline(tFile, targetString);
if (!ok)
break;
getline(sFile, sourceString);
getline(aFile, alignmentString);
//cerr << endl << targetString << endl << sourceString << endl << alignmentString << endl;
//time_t currTime = time(NULL);
//cerr << "A " << (currTime - starttime) << endl;
SentenceAlignment sentencePair;
if (sentencePair.Create( targetString, sourceString, alignmentString, i, *global ))
{
//cerr << sentence.sourceTree << endl;
//cerr << sentence.targetTree << endl;
sentencePair.FindTunnels(*g_global);
//cerr << "C " << (time(NULL) - starttime) << endl;
//cerr << sentencePair << endl;
sentencePair.CreateLattice(*g_global);
//cerr << "D " << (time(NULL) - starttime) << endl;
//cerr << sentencePair << endl;
sentencePair.CreateRules(*g_global);
//cerr << "E " << (time(NULL) - starttime) << endl;
//cerr << sentence.lattice->GetRules().GetSize() << endl;
sentencePair.GetLattice().GetRules().Output(extractFile);
sentencePair.GetLattice().GetRules().OutputInv(extractFileInv);
}
}
tFile.Close();
sFile.Close();
aFile.Close();
extractFile.Close();
extractFileInv.Close();
if (global->glueGrammarFlag) {
writeGlueGrammar(fileNameGlueGrammar, *global, targetLabelCollection, targetTopLabelCollection);
}
delete global;
}
void writeGlueGrammar( const string & fileName, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection )
{
ofstream grammarFile;
grammarFile.open(fileName.c_str());
if (!options.targetSyntax) {
grammarFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
<< "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
<< "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
} else {
// chose a top label that is not already a label
string topLabel = "QQQQQQ";
for( unsigned int i=1; i<=topLabel.length(); i++) {
if(targetLabelCollection.find( topLabel.substr(0,i) ) == targetLabelCollection.end() ) {
topLabel = topLabel.substr(0,i);
break;
}
}
// basic rules
grammarFile << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| " << endl
<< "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 " << endl;
// top rules
for( map<string,int>::const_iterator i = targetTopLabelCollection.begin();
i != targetTopLabelCollection.end(); i++ ) {
grammarFile << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1" << endl;
}
// glue rules
for( set<string>::const_iterator i = targetLabelCollection.begin();
i != targetLabelCollection.end(); i++ ) {
grammarFile << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1" << endl;
}
grammarFile << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 " << endl; // glue rule for unknown word...
}
grammarFile.close();
}

View File

@ -1,34 +0,0 @@
#pragma once
#include <vector>
#include <list>
#include <map>
#include <set>
#include <string>
#include <fstream>
#include <algorithm>
#include "SyntaxTree.h"
#include "XmlTree.h"
#include "Tunnel.h"
#include "TunnelCollection.h"
#include "SentenceAlignment.h"
#include "Global.h"
std::vector<std::string> tokenize( const char [] );
#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
_IS.getline(_LINE, _SIZE, _DELIM); \
if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
if (_IS.gcount() == _SIZE-1) { \
cerr << "Line too long! Buffer overflow. Delete lines >=" \
<< _SIZE << " chars or raise LINE_MAX_LENGTH in phrase-extract/extract.cpp" \
<< endl; \
exit(1); \
} \
}
#define LINE_MAX_LENGTH 1000000
const Global *g_global;
std::set< std::string > targetLabelCollection, sourceLabelCollection;
std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;

View File

@ -0,0 +1,27 @@
#!/usr/bin/perl
use strict;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
my $maxNumWords = $ARGV[0];
while (my $line = <STDIN>) {
chomp($line);
my @toks = split(/ /,$line);
my $numSourceWords = 0;
my $tok = $toks[$numSourceWords];
while ($tok ne "|||") {
++$numSourceWords;
$tok = $toks[$numSourceWords];
}
if ($numSourceWords <= $maxNumWords) {
print "$line\n";
}
}

View File

@ -0,0 +1,33 @@
#! /usr/bin/perl -w
use strict;
sub trim($);
my $file1 = $ARGV[0];
my $file2 = $ARGV[1];
open (FILE1, $file1);
open (FILE2, $file2);
my $countEqual = 0;
while (my $line1 = <FILE1>) {
my $line2 = <FILE2>;
if (trim($line1) eq trim($line2)) {
++$countEqual;
}
}
print $countEqual ."\n";
######################
# Perl trim function to remove whitespace from the start and end of the string
sub trim($) {
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}

View File

@ -0,0 +1,29 @@
#! /usr/bin/perl -w
use strict;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
my $fileLineNum = $ARGV[0];
open (FILE_LINE_NUM, $fileLineNum);
my $nextLineNum = <FILE_LINE_NUM>;
my $lineNum = 1;
while (my $line = <STDIN>) {
if (defined($nextLineNum) && $lineNum == $nextLineNum) {
# matches. output line
chomp($line);
print "$line\n";
# next line number
$nextLineNum = <FILE_LINE_NUM>;
}
++$lineNum;
}

View File

@ -0,0 +1,108 @@
#! /usr/bin/perl -w
use strict;
my $iniPath = $ARGV[0];
my $isHiero = $ARGV[1];
my $decoderExec = $ARGV[2];
my $extractExec = $ARGV[3];
my $tmpName = $ARGV[4];
my $WORK_DIR = `pwd`;
chomp($WORK_DIR);
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu";
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
$extractExec = "$MOSES_DIR/bin/$extractExec";
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
if($SPLIT_EXEC) {
$SPLIT_EXEC = 'gsplit';
}
else {
$SPLIT_EXEC = 'split';
}
my $SORT_EXEC = `gsort --help 2>/dev/null`;
if($SORT_EXEC) {
$SORT_EXEC = 'gsort';
}
else {
$SORT_EXEC = 'sort';
}
my $hieroFlag = "";
if ($isHiero == 1) {
$hieroFlag = "--Hierarchical";
}
print STDERR "WORK_DIR=$WORK_DIR \n";
my $cmd;
open (SOURCE, "source");
open (TARGET, "target");
open (ALIGNMENT, "alignment");
my $lineNum = 0;
my ($source, $target, $alignment);
while ($source = <SOURCE>) {
chomp($source);
$target = <TARGET>; chomp($target);
$alignment = <ALIGNMENT>; chomp($alignment);
#print STDERR "$source ||| $target ||| $alignment \n";
# write out 1 line
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
`mkdir -p $tmpDir`;
open (SOURCE1, ">$tmpDir/source");
open (TARGET1, ">$tmpDir/target");
open (ALIGNMENT1, ">$tmpDir/alignment");
print SOURCE1 "$source\n";
print TARGET1 "$target\n";
print ALIGNMENT1 "$alignment\n";
close (SOURCE1);
close (TARGET1);
close (ALIGNMENT1);
# train
if ($isHiero == 1) {
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract --GZOutput";
}
else {
# pb
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract 7 --GZOutput";
}
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
print STDERR "Executing: $cmd\n";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
`$cmd`;
# decode
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source -feature-add \"ConstrainedDecoding path=$tmpDir/target\"";
print STDERR "Executing: $cmd\n";
`$cmd`;
# `rm -rf $tmpDir`;
++$lineNum;
}
close(SOURCE);
close(TARGET);
close(ALIGNMENT);

View File

@ -0,0 +1,151 @@
#! /usr/bin/perl -w
use strict;
sub Write1Line;
sub WriteCorpus1Holdout;
my $iniPath = $ARGV[0];
my $isHiero = $ARGV[1];
my $decoderExec = $ARGV[2];
my $extractExec = $ARGV[3];
my $tmpName = $ARGV[4];
my $startLine = $ARGV[5];
my $endLine = $ARGV[6];
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
my $WORK_DIR = `pwd`;
chomp($WORK_DIR);
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
$extractExec = "$MOSES_DIR/bin/$extractExec";
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
if($SPLIT_EXEC) {
$SPLIT_EXEC = 'gsplit';
}
else {
$SPLIT_EXEC = 'split';
}
my $SORT_EXEC = `gsort --help 2>/dev/null`;
if($SORT_EXEC) {
$SORT_EXEC = 'gsort';
}
else {
$SORT_EXEC = 'sort';
}
my $hieroFlag = "";
if ($isHiero == 1) {
$hieroFlag = "--Hierarchical";
}
print STDERR "WORK_DIR=$WORK_DIR \n";
my $cmd;
open (SOURCE, "source");
open (TARGET, "target");
open (ALIGNMENT, "alignment");
my $numLines = `cat source | wc -l`;
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
my $source = <SOURCE>; chomp($source);
my $target = <TARGET>; chomp($target);
my $alignment = <ALIGNMENT>; chomp($alignment);
if ($lineNum < $startLine || $lineNum >= $endLine) {
next;
}
#print STDERR "$source ||| $target ||| $alignment \n";
# write out 1 line
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
`mkdir -p $tmpDir`;
Write1Line($source, $tmpDir, "source.1");
Write1Line($target, $tmpDir, "target.1");
Write1Line($alignment, $tmpDir, "alignment.1");
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
# train
if ($isHiero == 1) {
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
}
else {
# pb
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
}
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
print STDERR "Executing: $cmd\n";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
`$cmd`;
# decode
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -n-best-list $tmpDir/nbest 10000 distinct -v 2";
print STDERR "Executing: $cmd\n";
`$cmd`;
# count the number of translation in nbest list
$cmd = "wc -l $tmpDir/nbest >> out";
`$cmd`;
`rm -rf $tmpDir`;
}
close(SOURCE);
close(TARGET);
close(ALIGNMENT);
######################
sub Write1Line
{
my ($line, $tmpDir, $fileName) = @_;
open (HANDLE, ">$tmpDir/$fileName");
print HANDLE "$line\n";
close (HANDLE);
}
sub WriteCorpus1Holdout
{
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
open (INFILE, "$inFilePath");
open (OUTFILE, ">$tmpDir/$outFileName");
my $lineNum = 0;
while (my $line = <INFILE>) {
chomp($line);
if ($lineNum != $holdoutLineNum) {
print OUTFILE "$line\n";
}
++$lineNum;
}
close (OUTFILE);
close(INFILE);
}

View File

@ -0,0 +1,147 @@
#! /usr/bin/perl -w
use strict;
sub Write1Line;
sub WriteCorpus1Holdout;
my $iniPath = $ARGV[0];
my $isHiero = $ARGV[1];
my $decoderExec = $ARGV[2];
my $extractExec = $ARGV[3];
my $tmpName = $ARGV[4];
my $startLine = $ARGV[5];
my $endLine = $ARGV[6];
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
my $WORK_DIR = `pwd`;
chomp($WORK_DIR);
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
$extractExec = "$MOSES_DIR/bin/$extractExec";
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
if($SPLIT_EXEC) {
$SPLIT_EXEC = 'gsplit';
}
else {
$SPLIT_EXEC = 'split';
}
my $SORT_EXEC = `gsort --help 2>/dev/null`;
if($SORT_EXEC) {
$SORT_EXEC = 'gsort';
}
else {
$SORT_EXEC = 'sort';
}
my $hieroFlag = "";
if ($isHiero == 1) {
$hieroFlag = "--Hierarchical";
}
print STDERR "WORK_DIR=$WORK_DIR \n";
my $cmd;
open (SOURCE, "source");
open (TARGET, "target");
open (ALIGNMENT, "alignment");
my $numLines = `cat source | wc -l`;
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
my $source = <SOURCE>; chomp($source);
my $target = <TARGET>; chomp($target);
my $alignment = <ALIGNMENT>; chomp($alignment);
if ($lineNum < $startLine || $lineNum >= $endLine) {
next;
}
#print STDERR "$source ||| $target ||| $alignment \n";
# write out 1 line
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
`mkdir -p $tmpDir`;
Write1Line($source, $tmpDir, "source.1");
Write1Line($target, $tmpDir, "target.1");
Write1Line($alignment, $tmpDir, "alignment.1");
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
# train
if ($isHiero == 1) {
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
}
else {
# pb
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
}
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
print STDERR "Executing: $cmd\n";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
`$cmd`;
# decode
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -feature-add \"ConstrainedDecoding path=$tmpDir/target.1\" -v 2";
print STDERR "Executing: $cmd\n";
`$cmd`;
`rm -rf $tmpDir`;
}
close(SOURCE);
close(TARGET);
close(ALIGNMENT);
######################
sub Write1Line
{
my ($line, $tmpDir, $fileName) = @_;
open (HANDLE, ">$tmpDir/$fileName");
print HANDLE "$line\n";
close (HANDLE);
}
sub WriteCorpus1Holdout
{
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
open (INFILE, "$inFilePath");
open (OUTFILE, ">$tmpDir/$outFileName");
my $lineNum = 0;
while (my $line = <INFILE>) {
chomp($line);
if ($lineNum != $holdoutLineNum) {
print OUTFILE "$line\n";
}
++$lineNum;
}
close (OUTFILE);
close(INFILE);
}

View File

@ -0,0 +1,17 @@
#! /usr/bin/perl -w
my $iniPath = $ARGV[0];
my $SPLIT_LINES = 200;
my $lineCount = `cat source | wc -l`;
print STDERR "lineCount=$lineCount \n";
for (my $startLine = 0; $startLine < $lineCount; $startLine += $SPLIT_LINES) {
my $endLine = $startLine + $SPLIT_LINES;
my $cmd = "../../scripts/reachable.perl $iniPath 1 moses_chart extract-rules tmp-reachable $startLine $endLine &>out.reachable.$startLine &";
print STDERR "Executing: $cmd \n";
system($cmd);
}

View File

@ -0,0 +1,69 @@
/**
* pugixml parser - version 1.2
* --------------------------------------------------------
* Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
* of this file.
*
* This work is based on the pugxml parser, which is:
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
*/
#ifndef HEADER_PUGICONFIG_HPP
#define HEADER_PUGICONFIG_HPP
// Uncomment this to enable wchar_t mode
// #define PUGIXML_WCHAR_MODE
// Uncomment this to disable XPath
// #define PUGIXML_NO_XPATH
// Uncomment this to disable STL
// #define PUGIXML_NO_STL
// Uncomment this to disable exceptions
// #define PUGIXML_NO_EXCEPTIONS
// Set this to control attributes for public classes/functions, i.e.:
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
// Uncomment this to switch to header-only version
// #define PUGIXML_HEADER_ONLY
// #include "pugixml.cpp"
// Tune these constants to adjust memory-related behavior
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
#endif
/**
* Copyright (c) 2006-2012 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,110 +0,0 @@
// $Id: tables-core.cpp 3131 2010-04-13 16:29:55Z pjwilliams $
//#include "beammain.h"
//#include "SafeGetLine.h"
#include "tables-core.h"
#define TABLE_LINE_MAX_LENGTH 1000
#define UNKNOWNSTR "UNK"
// as in beamdecoder/tables.cpp
vector<string> tokenize( const char* input ) {
vector< string > token;
bool betweenWords = true;
int start=0;
int i=0;
for(; input[i] != '\0'; i++) {
bool isSpace = (input[i] == ' ' || input[i] == '\t');
if (!isSpace && betweenWords) {
start = i;
betweenWords = false;
}
else if (isSpace && !betweenWords) {
token.push_back( string( input+start, i-start ) );
betweenWords = true;
}
}
if (!betweenWords)
token.push_back( string( input+start, i-start ) );
return token;
}
WORD_ID Vocabulary::storeIfNew( const WORD& word ) {
map<WORD, WORD_ID>::iterator i = lookup.find( word );
if( i != lookup.end() )
return i->second;
WORD_ID id = vocab.size();
vocab.push_back( word );
lookup[ word ] = id;
return id;
}
WORD_ID Vocabulary::getWordID( const WORD& word ) {
map<WORD, WORD_ID>::iterator i = lookup.find( word );
if( i == lookup.end() )
return 0;
return i->second;
}
PHRASE_ID PhraseTable::storeIfNew( const PHRASE& phrase ) {
map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
if( i != lookup.end() )
return i->second;
PHRASE_ID id = phraseTable.size();
phraseTable.push_back( phrase );
lookup[ phrase ] = id;
return id;
}
PHRASE_ID PhraseTable::getPhraseID( const PHRASE& phrase ) {
map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
if( i == lookup.end() )
return 0;
return i->second;
}
void PhraseTable::clear() {
lookup.clear();
phraseTable.clear();
}
void DTable::init() {
for(int i = -10; i<10; i++)
dtable[i] = -abs( i );
}
/*
void DTable::load( const string& fileName ) {
ifstream inFile;
inFile.open(fileName.c_str());
istream *inFileP = &inFile;
char line[TABLE_LINE_MAX_LENGTH];
int i=0;
while(true) {
i++;
SAFE_GETLINE((*inFileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__);
if (inFileP->eof()) break;
vector<string> token = tokenize( line );
if (token.size() < 2) {
cerr << "line " << i << " in " << fileName << " too short, skipping\n";
continue;
}
int d = atoi( token[0].c_str() );
double prob = log( atof( token[1].c_str() ) );
dtable[ d ] = prob;
}
}
*/
double DTable::get( int distortion ) {
if (dtable.find( distortion ) == dtable.end())
return log( 0.00001 );
return dtable[ distortion ];
}

View File

@ -1,72 +0,0 @@
#pragma once
// $Id: tables-core.h 2416 2009-07-30 11:07:38Z hieuhoang1972 $
#include <iostream>
#include <fstream>
#include <assert.h>
#include <stdlib.h>
#include <string>
#include <queue>
#include <map>
#include <cmath>
using namespace std;
#define TABLE_LINE_MAX_LENGTH 1000
#define UNKNOWNSTR "UNK"
vector<string> tokenize( const char[] );
//! delete and remove every element of a collection object such as map, set, list etc
template<class COLL>
void RemoveAllInColl(COLL &coll)
{
for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter)
{
delete (*iter);
}
coll.clear();
}
typedef string WORD;
typedef unsigned int WORD_ID;
class Vocabulary {
public:
map<WORD, WORD_ID> lookup;
vector< WORD > vocab;
WORD_ID storeIfNew( const WORD& );
WORD_ID getWordID( const WORD& );
inline WORD &getWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
};
typedef vector< WORD_ID > PHRASE;
typedef unsigned int PHRASE_ID;
class PhraseTable {
public:
map< PHRASE, PHRASE_ID > lookup;
vector< PHRASE > phraseTable;
PHRASE_ID storeIfNew( const PHRASE& );
PHRASE_ID getPhraseID( const PHRASE& );
void clear();
inline PHRASE &getPhrase( const PHRASE_ID id ) { return phraseTable[ id ]; }
};
typedef vector< pair< PHRASE_ID, double > > PHRASEPROBVEC;
class TTable {
public:
map< PHRASE_ID, vector< pair< PHRASE_ID, double > > > ttable;
map< PHRASE_ID, vector< pair< PHRASE_ID, vector< double > > > > ttableMulti;
};
class DTable {
public:
map< int, double > dtable;
void init();
void load( const string& );
double get( int );
};

View File

@ -1,134 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.499747849" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.798364121" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ordering}/Debug" id="cdt.managedbuild.builder.gnu.cross.1976289814" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1324749613" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1750299246" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.719498215" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.251118848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.99297656" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.106920816" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1844372739" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1178164658" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1434184833" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.974811544" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.904916320" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1005231499" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1318928675" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.604255673" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.818331963">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.818331963." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1489025499" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1052477856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ordering}/Release" id="cdt.managedbuild.builder.gnu.cross.33925527" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1884790737" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.197048136" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.106898878" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1920378037" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.37950410" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1197641703" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1356351201" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2053623412" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1988048517" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1494470963" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1553727957" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-ordering.cdt.managedbuild.target.gnu.cross.exe.1840421491" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417;cdt.managedbuild.tool.gnu.c.compiler.input.106898878">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446;cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964;cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827;cdt.managedbuild.tool.gnu.c.compiler.input.719498215">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
</configuration>
</storageModule>
</cproject>

View File

@ -1,74 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>extract-ordering</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>InputFileStream.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.cpp</locationURI>
</link>
<link>
<name>InputFileStream.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
</link>
<link>
<name>OutputFileStream.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.cpp</locationURI>
</link>
<link>
<name>OutputFileStream.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>SentenceAlignment.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
</link>
<link>
<name>SentenceAlignment.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
</link>
<link>
<name>extract-ordering-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ordering-main.cpp</locationURI>
</link>
<link>
<name>tables-core.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.cpp</locationURI>
</link>
<link>
<name>tables-core.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/tables-core.h</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1909818145">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1909818145" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -14,45 +14,41 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.124769989" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.266544803" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-rules}/Debug" id="cdt.managedbuild.builder.gnu.cross.335858926" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.947547329" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.426953885" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<option id="gnu.c.compiler.option.include.paths.1671695899" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths"/>
<option id="gnu.c.compiler.option.include.files.1838960067" name="Include files (-include)" superClass="gnu.c.compiler.option.include.files"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.985831394" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1726371873" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.899893408" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1099087456" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1909818145" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1909818145." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.702289239" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.769221744" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/extract-rules}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1538811811" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.417385938" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.274036343" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1227466042" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.640603457" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.231971122" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1616232021" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1411857637" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.109133121" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.1030374421" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.212337827" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.831633145" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1948518292" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1036034505" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.982611610" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.165444158" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.1351410350" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.272393234" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<option id="gnu.cpp.link.option.libs.1356683866" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1569179988" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1391783790" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2066621509" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1945638157" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1433595017" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.879628838" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -60,8 +56,8 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" moduleId="org.eclipse.cdt.core.settings" name="Release">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.130284564">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.130284564" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -73,31 +69,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1113964425" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1722595316" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-rules}/Release" id="cdt.managedbuild.builder.gnu.cross.691589832" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.593530229" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1320426973" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.947026588" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1217031668" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.130284564" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.130284564." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.933956450" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1114636926" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/extract-rules}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.1972638661" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1382194499" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.605692631" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1543139461" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.307019882" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.771498068" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1504181086" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1645775798" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1332689416" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1372281360" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.2028047264" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1645644335" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1807515346" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.44234391" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1468234013" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.586184465" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.1438048814" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.733316869" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.467923425" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1673313707" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.518252425" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.225998350" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1649512548" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -107,30 +103,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-rules.cdt.managedbuild.target.gnu.cross.exe.1916763759" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
<project id="extract-rules.cdt.managedbuild.target.gnu.exe.1608401758" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469;cdt.managedbuild.tool.gnu.c.compiler.input.985831394">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.130284564;cdt.managedbuild.config.gnu.exe.release.130284564.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1332689416;cdt.managedbuild.tool.gnu.c.compiler.input.1645644335">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540;cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.130284564;cdt.managedbuild.config.gnu.exe.release.130284564.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.605692631;cdt.managedbuild.tool.gnu.cpp.compiler.input.771498068">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863;cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1909818145;cdt.managedbuild.config.gnu.exe.debug.1909818145.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.212337827;cdt.managedbuild.tool.gnu.c.compiler.input.1036034505">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.c.compiler.593530229;cdt.managedbuild.tool.gnu.c.compiler.input.1217031668">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1909818145;cdt.managedbuild.config.gnu.exe.debug.1909818145.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.274036343;cdt.managedbuild.tool.gnu.cpp.compiler.input.61884195">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-rules"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-rules"/>
</configuration>
</storageModule>
<storageModule moduleId="refreshScope"/>
</cproject>

View File

@ -1 +0,0 @@
/Debug

View File

@ -25,26 +25,6 @@
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>ExtractedRule.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ExtractedRule.h</locationURI>
</link>
<link>
<name>Hole.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
</link>
<link>
<name>HoleCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
</link>
<link>
<name>HoleCollection.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
</link>
<link>
<name>InputFileStream.cpp</name>
<type>1</type>
@ -65,11 +45,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>RuleExtractionOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h</locationURI>
</link>
<link>
<name>SentenceAlignment.cpp</name>
<type>1</type>
@ -111,14 +86,9 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
</link>
<link>
<name>extract-rules-main.cpp</name>
<name>extract-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
</link>
<link>
<name>gzfilebuf.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/gzfilebuf.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-main.cpp</locationURI>
</link>
<link>
<name>tables-core.cpp</name>

View File

@ -1,8 +1,8 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2119725657">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -14,43 +14,42 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.671913278" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1231657738" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract}/Debug" id="cdt.managedbuild.builder.gnu.cross.571044108" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.332036857" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1292572253" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1873227592" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1165888615" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.698819695" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1451916947" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1702398011" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2119725657" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2119725657." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1708444053" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.645190133" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/extract}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.1816006533" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.876593881" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1859867372" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.1585316374" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.535775760" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.874182289" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1856691234" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1699542791" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1880730637" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1202195555" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.1840757183" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.876682032" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676382830" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.83617569" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.943560690" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.libs.599256050" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<option id="gnu.cpp.link.option.paths.298225069" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<option id="gnu.cpp.link.option.paths.1223834298" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value=""/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1339210059" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1129315792" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.976825054" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1971927463" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.704926167" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.942430539" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1676263707" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -58,8 +57,8 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.140124152">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" moduleId="org.eclipse.cdt.core.settings" name="Release">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1230189043">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1230189043" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
@ -71,31 +70,31 @@
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.140124152." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1250240843" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.597335968" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract}/Release" id="cdt.managedbuild.builder.gnu.cross.95066247" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.88795016" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.383328020" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.681105644" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1230189043" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1230189043." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.280378247" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1881910636" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/extract}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.872962284" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1342549060" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1229278587" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.509799885" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.682561415" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1043901368" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.553394848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1420596769" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.1628542348" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1033362550" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.429156793" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.389761516" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.234409052" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.320346578" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2045242811" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1299282565" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.461289078" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1586085606" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.417132714" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1944597759" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.203400619" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.1190745343" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.221147938" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -105,21 +104,21 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract.cdt.managedbuild.target.gnu.cross.exe.1220534104" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
<project id="extract.cdt.managedbuild.target.gnu.exe.1053550598" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544;cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1230189043;cdt.managedbuild.config.gnu.exe.release.1230189043.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.1628542348;cdt.managedbuild.tool.gnu.c.compiler.input.389761516">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162;cdt.managedbuild.tool.gnu.c.compiler.input.681105644">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2119725657;cdt.managedbuild.config.gnu.exe.debug.2119725657.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.1202195555;cdt.managedbuild.tool.gnu.c.compiler.input.676382830">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.c.compiler.332036857;cdt.managedbuild.tool.gnu.c.compiler.input.1165888615">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1230189043;cdt.managedbuild.config.gnu.exe.release.1230189043.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1229278587;cdt.managedbuild.tool.gnu.cpp.compiler.input.1043901368">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600;cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2119725657;cdt.managedbuild.config.gnu.exe.debug.2119725657.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1859867372;cdt.managedbuild.tool.gnu.cpp.compiler.input.1355287045">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>

View File

@ -46,6 +46,7 @@
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="rt"/>
<listOptionValue builtIn="false" value="pthread"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.656319745" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
@ -136,4 +137,5 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
</cproject>

View File

@ -1,54 +1,54 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2107801703">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1899954923" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1645930772" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/manual-label/Debug}" id="cdt.managedbuild.builder.gnu.cross.1703642277" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1938374607" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1888648788" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1838052643" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.798368516" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.950686503" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.153015988" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.418888584" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.406065865" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2107801703." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.502948364" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1431969079" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/manual-label}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.2101075234" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1118840081" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.400985496" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1160903812" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.404589863" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="${workspace_loc:}/../.."/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.596589558" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1741441821" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1626431978" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1886912770" superClass="gnu.cpp.link.option.libs" valueType="libs">
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2033266575" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1568929819" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676866714" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.254144861" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.319879082" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.132164474" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.1017214824" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.1541583695" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1367999206" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1672776758" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.31522559" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.826957235" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.350181339" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1104732611" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.372096550" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -56,44 +56,44 @@
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815" moduleId="org.eclipse.cdt.core.settings" name="Release">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.649050588">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.649050588" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1335379815." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.97427761" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.564169339" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/manual-label/Release}" id="cdt.managedbuild.builder.gnu.cross.663164336" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2104943437" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.2135645103" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.764935013" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1841809129" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.649050588" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.649050588." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1107402972" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1038954684" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/manual-label}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.100518450" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2005888378" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.968169340" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.977676916" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1180544943" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1877584345" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.935490779" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1084298301" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1914416581" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.826081780" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2048171432" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.355530813" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.940299092" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.17718999" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.940327646" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.369758737" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1186766936" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1527322008" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.480337803" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1788533940" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.266174128" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.558116084" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
@ -103,22 +103,23 @@
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="manual-label.cdt.managedbuild.target.gnu.cross.exe.2117548180" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
<project id="manual-label.cdt.managedbuild.target.gnu.exe.1701243340" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1335379815;cdt.managedbuild.config.gnu.cross.exe.release.1335379815.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1180544943;cdt.managedbuild.tool.gnu.cpp.compiler.input.1084298301">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968;cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639;cdt.managedbuild.config.gnu.cross.exe.debug.1096604639.;cdt.managedbuild.tool.gnu.cross.c.compiler.1938374607;cdt.managedbuild.tool.gnu.c.compiler.input.798368516">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295;cdt.managedbuild.tool.gnu.c.compiler.input.2048171432">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1335379815;cdt.managedbuild.config.gnu.cross.exe.release.1335379815.;cdt.managedbuild.tool.gnu.cross.c.compiler.2104943437;cdt.managedbuild.tool.gnu.c.compiler.input.1841809129">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673;cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1096604639;cdt.managedbuild.config.gnu.cross.exe.debug.1096604639.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.950686503;cdt.managedbuild.tool.gnu.cpp.compiler.input.596589558">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964;cdt.managedbuild.tool.gnu.c.compiler.input.676866714">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope"/>
</cproject>

View File

@ -1,30 +1,12 @@
#include <list>
#include "DeEn.h"
#include "Main.h"
#include "moses/Util.h"
using namespace std;
extern bool g_debug;
bool IsA(const Phrase &source, int pos, int offset, int factor, const string &str)
{
pos += offset;
if (pos >= source.size() || pos < 0) {
return false;
}
const string &word = source[pos][factor];
vector<string> soughts = Moses::Tokenize(str, " ");
for (int i = 0; i < soughts.size(); ++i) {
string &sought = soughts[i];
bool found = (word == sought);
if (found) {
return true;
}
}
return false;
}
bool Contains(const Phrase &source, int start, int end, int factor, const string &str)
{
for (int pos = start; pos <= end; ++pos) {
@ -38,8 +20,6 @@ bool Contains(const Phrase &source, int start, int end, int factor, const string
void LabelDeEn(const Phrase &source, ostream &out)
{
typedef pair<int,int> Range;
typedef list<Range> Ranges;
Ranges ranges;
// find ranges to label
@ -48,39 +28,19 @@ void LabelDeEn(const Phrase &source, ostream &out)
if (IsA(source, start, -1, 1, "VAFIN")
&& IsA(source, end, +1, 1, "VVINF VVPP")
&& !Contains(source, start, end, 1, "VAFIN VVINF VVPP VVFIN")) {
Range range(start, end);
Range range(start, end, "reorder-label");
ranges.push_back(range);
}
else if ((start == 0 || IsA(source, start, -1, 1, "$,"))
&& IsA(source, end, +1, 0, "zu")
&& IsA(source, end, +2, 1, "VVINF")
&& !Contains(source, start, end, 1, "$,")) {
Range range(start, end);
Range range(start, end, "reorder-label");
ranges.push_back(range);
}
}
}
// output sentence, with labels
for (int pos = 0; pos < source.size(); ++pos) {
// output beginning of label
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
const Range &range = *iter;
if (range.first == pos) {
out << "<tree label=\"reorder-label\"> ";
}
}
const Word &word = source[pos];
out << word[0] << " ";
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
const Range &range = *iter;
if (range.second == pos) {
out << "</tree> ";
}
}
}
out << endl;
OutputWithLabels(source, ranges, out);
}

View File

@ -1,10 +1,5 @@
#pragma once
#include <iostream>
#include <vector>
#include <string>
typedef std::vector<std::string> Word;
typedef std::vector<Word> Phrase;
#include "Main.h"
void LabelDeEn(const Phrase &source, std::ostream &out);

View File

@ -0,0 +1,201 @@
/*
* EnApacheChunker.cpp
*
* Created on: 28 Feb 2014
* Author: hieu
*/
#include <stdlib.h>
#include <stdio.h>
#include <algorithm>
#include <fstream>
#include "EnOpenNLPChunker.h"
#include "moses/Util.h"
using namespace std;
EnOpenNLPChunker::EnOpenNLPChunker(const std::string &openNLPPath)
:m_openNLPPath(openNLPPath)
{
// TODO Auto-generated constructor stub
}
EnOpenNLPChunker::~EnOpenNLPChunker() {
// TODO Auto-generated destructor stub
}
void EnOpenNLPChunker::Process(std::istream &in, std::ostream &out, const vector<string> &filterList)
{
// read all input to a temp file
char *ptr = tmpnam(NULL);
string inStr(ptr);
ofstream inFile(ptr);
string line;
while (getline(in, line)) {
Unescape(line);
inFile << line << endl;
}
inFile.close();
ptr = tmpnam(NULL);
string outStr(ptr);
// execute chunker
string cmd = "cat " + inStr + " | "
+ m_openNLPPath + "/bin/opennlp POSTagger "
+ m_openNLPPath + "/models/en-pos-maxent.bin | "
+ m_openNLPPath + "/bin/opennlp ChunkerME "
+ m_openNLPPath + "/models/en-chunker.bin > "
+ outStr;
//g << "Executing:" << cmd << endl;
int ret = system(cmd.c_str());
// read result of chunker and output as Moses xml trees
ifstream outFile(outStr.c_str());
size_t lineNum = 0;
while (getline(outFile, line)) {
//cerr << line << endl;
MosesReformat(line, out, filterList);
out << endl;
++lineNum;
}
outFile.close();
// clean up temporary files
remove(inStr.c_str());
remove(outStr.c_str());
}
void EnOpenNLPChunker::MosesReformat(const string &line, std::ostream &out, const vector<string> &filterList)
{
//cerr << "REFORMATING:" << line << endl;
bool inLabel = false;
vector<string> toks;
Moses::Tokenize(toks, line);
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
if (tok.substr(0, 1) == "[" && tok.substr(1,1) != "_") {
// start of chunk
string label = tok.substr(1);
if (UseLabel(label, filterList)) {
out << "<tree label=\"" << label << "\">";
inLabel = true;
}
}
else if (tok.substr(tok.size()-1, 1) == "]") {
// end of chunk
if (tok.size() > 1) {
if (tok.substr(1,1) == "_") {
// just a word that happens to be ]
vector<string> factors;
Moses::Tokenize(factors, tok, "_");
assert(factors.size() == 2);
Escape(factors[0]);
out << factors[0] << " ";
}
else {
// a word and end of tree
string word = tok.substr(0, tok.size()-1);
vector<string> factors;
Moses::Tokenize(factors, word, "_");
assert(factors.size() == 2);
Escape(factors[0]);
out << factors[0] << " ";
}
if (inLabel) {
out << "</tree> ";
inLabel = false;
}
}
else {
if (inLabel) {
out << "</tree> ";
inLabel = false;
}
}
}
else {
// lexical item
vector<string> factors;
Moses::Tokenize(factors, tok, "_");
if (factors.size() == 2) {
Escape(factors[0]);
out << factors[0] << " ";
}
else if (factors.size() == 1) {
// word is _
assert(tok.substr(0, 2) == "__");
out << "_ ";
}
else {
throw "Unknown format:" + tok;
}
}
}
}
std::string
replaceAll( std::string const& original,
std::string const& before,
std::string const& after )
{
std::string retval;
std::string::const_iterator end = original.end();
std::string::const_iterator current = original.begin();
std::string::const_iterator next =
std::search( current, end, before.begin(), before.end() );
while ( next != end ) {
retval.append( current, next );
retval.append( after );
current = next + before.size();
next = std::search( current, end, before.begin(), before.end() );
}
retval.append( current, next );
return retval;
}
void EnOpenNLPChunker::Escape(string &line)
{
line = replaceAll(line, "&", "&amp;");
line = replaceAll(line, "|", "&#124;");
line = replaceAll(line, "<", "&lt;");
line = replaceAll(line, ">", "&gt;");
line = replaceAll(line, "'", "&apos;");
line = replaceAll(line, "\"", "&quot;");
line = replaceAll(line, "[", "&#91;");
line = replaceAll(line, "]", "&#93;");
}
void EnOpenNLPChunker::Unescape(string &line)
{
line = replaceAll(line, "&#124;", "|");
line = replaceAll(line, "&lt;", "<");
line = replaceAll(line, "&gt;", ">");
line = replaceAll(line, "&quot;", "\"");
line = replaceAll(line, "&apos;", "'");
line = replaceAll(line, "&#91;", "[");
line = replaceAll(line, "&#93;", "]");
line = replaceAll(line, "&amp;", "&");
}
bool EnOpenNLPChunker::UseLabel(const std::string &label, const std::vector<std::string> &filterList) const
{
if (filterList.size() == 0) {
return true;
}
for (size_t i = 0; i < filterList.size(); ++i) {
if (label == filterList[i]) {
return true;
}
}
return false;
}

View File

@ -0,0 +1,29 @@
/*
* EnApacheChunker.h
*
* Created on: 28 Feb 2014
* Author: hieu
*/
#pragma once
#include <vector>
#include <string>
#include <iostream>
class EnOpenNLPChunker {
public:
EnOpenNLPChunker(const std::string &openNLPPath);
virtual ~EnOpenNLPChunker();
void Process(std::istream &in, std::ostream &out, const std::vector<std::string> &filterList);
protected:
const std::string m_openNLPPath;
void Escape(std::string &line);
void Unescape(std::string &line);
void MosesReformat(const std::string &line, std::ostream &out, const std::vector<std::string> &filterList);
bool UseLabel(const std::string &label, const std::vector<std::string> &filterList) const;
};

View File

@ -0,0 +1,226 @@
#include <iostream>
#include <list>
#include <limits>
#include <algorithm>
#include "EnPhrasalVerb.h"
#include "moses/Util.h"
using namespace std;
void EnPhrasalVerb(const Phrase &source, int revision, ostream &out)
{
Ranges ranges;
// find ranges to label
for (int start = 0; start < source.size(); ++start) {
size_t end = std::numeric_limits<size_t>::max();
if (IsA(source, start, 0, 0, "ask asked asking")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "back backed backing")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "blow blown blew")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "break broke broken")) {
end = Found(source, start, 0, "down up in");
}
else if (IsA(source, start, 0, 0, "bring brought bringing")) {
end = Found(source, start, 0, "down up in");
}
else if (IsA(source, start, 0, 0, "call called calling")) {
end = Found(source, start, 0, "back up off");
}
else if (IsA(source, start, 0, 0, "check checked checking")) {
end = Found(source, start, 0, "out in");
}
else if (IsA(source, start, 0, 0, "cheer cheered cheering")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "clean cleaned cleaning")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "cross crossed crossing")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "cut cutting")) {
end = Found(source, start, 0, "down off out");
}
else if (IsA(source, start, 0, 0, "do did done")) {
end = Found(source, start, 0, "over up");
}
else if (IsA(source, start, 0, 0, "drop dropped dropping")) {
end = Found(source, start, 0, "off");
}
else if (IsA(source, start, 0, 0, "figure figured figuring")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "fill filled filling")) {
end = Found(source, start, 0, "in out up");
}
else if (IsA(source, start, 0, 0, "find found finding")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "get got getting gotten")) {
end = Found(source, start, 0, "across over back");
}
else if (IsA(source, start, 0, 0, "give given gave giving")) {
end = Found(source, start, 0, "away back out up");
}
else if (IsA(source, start, 0, 0, "hand handed handing")) {
end = Found(source, start, 0, "down in over");
}
else if (IsA(source, start, 0, 0, "hold held holding")) {
end = Found(source, start, 0, "back up");
}
else if (IsA(source, start, 0, 0, "keep kept keeping")) {
end = Found(source, start, 0, "from up");
}
else if (IsA(source, start, 0, 0, "let letting")) {
end = Found(source, start, 0, "down in");
}
else if (IsA(source, start, 0, 0, "look looked looking")) {
end = Found(source, start, 0, "over up");
}
else if (IsA(source, start, 0, 0, "make made making")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "mix mixed mixing")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "pass passed passing")) {
end = Found(source, start, 0, "out up");
}
else if (IsA(source, start, 0, 0, "pay payed paying")) {
end = Found(source, start, 0, "back");
}
else if (IsA(source, start, 0, 0, "pick picked picking")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "point pointed pointing")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "put putting")) {
end = Found(source, start, 0, "down off out together on");
}
else if (IsA(source, start, 0, 0, "send sending")) {
end = Found(source, start, 0, "back");
}
else if (IsA(source, start, 0, 0, "set setting")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "sort sorted sorting")) {
end = Found(source, start, 0, "out");
}
else if (IsA(source, start, 0, 0, "switch switched switching")) {
end = Found(source, start, 0, "off on");
}
else if (IsA(source, start, 0, 0, "take took taking")) {
end = Found(source, start, 0, "apart back off out");
}
else if (IsA(source, start, 0, 0, "tear torn tearing")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "think thought thinking")) {
end = Found(source, start, 0, "over");
}
else if (IsA(source, start, 0, 0, "thrown threw thrown throwing")) {
end = Found(source, start, 0, "away");
}
else if (IsA(source, start, 0, 0, "turn turned turning")) {
end = Found(source, start, 0, "down off on");
}
else if (IsA(source, start, 0, 0, "try tried trying")) {
end = Found(source, start, 0, "on out");
}
else if (IsA(source, start, 0, 0, "use used using")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "warm warmed warming")) {
end = Found(source, start, 0, "up");
}
else if (IsA(source, start, 0, 0, "work worked working")) {
end = Found(source, start, 0, "out");
}
// found range to label
if (end != std::numeric_limits<size_t>::max() &&
end > start + 1) {
bool add = true;
if (revision == 1 && Exist(source,
start + 1,
end - 1,
1,
"VB VBD VBG VBN VBP VBZ")) {
// there's a verb in between
add = false;
}
if (add) {
Range range(start + 1, end - 1, "reorder-label");
ranges.push_back(range);
}
}
}
OutputWithLabels(source, ranges, out);
}
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str)
{
vector<string> soughts = Moses::Tokenize(str, " ");
for (size_t i = start; i <= end; ++i) {
const Word &word = source[i];
bool found = Found(word, factor, soughts);
if (found) {
return true;
}
}
return false;
}
size_t Found(const Phrase &source, int pos, int factor, const std::string &str)
{
const size_t MAX_RANGE = 10;
vector<string> soughts = Moses::Tokenize(str, " ");
vector<string> puncts = Moses::Tokenize(". : , ;", " ");
size_t maxEnd = std::min(source.size(), (size_t) pos + MAX_RANGE);
for (size_t i = pos + 1; i < maxEnd; ++i) {
const Word &word = source[i];
bool found;
found = Found(word, factor, puncts);
if (found) {
return std::numeric_limits<size_t>::max();
}
found = Found(word, factor, soughts);
if (found) {
return i;
}
}
return std::numeric_limits<size_t>::max();
}
bool Found(const Word &word, int factor, const vector<string> &soughts)
{
const string &element = word[factor];
for (size_t i = 0; i < soughts.size(); ++i) {
const string &sought = soughts[i];
bool found = (element == sought);
if (found) {
return true;
}
}
return false;
}

View File

@ -0,0 +1,11 @@
#pragma once
#include "Main.h"
// roll your own identification of phrasal verbs
void EnPhrasalVerb(const Phrase &source, int revision, std::ostream &out);
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str);
size_t Found(const Phrase &source, int pos, int factor, const std::string &str);
bool Found(const Word &word, int factor, const std::vector<std::string> &soughts);

View File

@ -0,0 +1,29 @@
#include "LabelByInitialLetter.h"
#include "Main.h"
using namespace std;
void LabelByInitialLetter(const Phrase &source, std::ostream &out)
{
Ranges ranges;
for (int start = 0; start < source.size(); ++start) {
const string &startWord = source[start][0];
string startChar = startWord.substr(0,1);
for (int end = start + 1; end < source.size(); ++end) {
const string &endWord = source[end][0];
string endChar = endWord.substr(0,1);
if (startChar == endChar) {
Range range(start, end, startChar + "-label");
ranges.push_back(range);
}
}
}
OutputWithLabels(source, ranges, out);
}

View File

@ -0,0 +1,6 @@
#pragma once
#include "Main.h"
void LabelByInitialLetter(const Phrase &source, std::ostream &out);

View File

@ -0,0 +1,195 @@
#include <iostream>
#include <cstdlib>
#include <boost/program_options.hpp>
#include "moses/Util.h"
#include "Main.h"
#include "DeEn.h"
#include "EnPhrasalVerb.h"
#include "EnOpenNLPChunker.h"
#include "LabelByInitialLetter.h"
using namespace std;
bool g_debug = false;
Phrase Tokenize(const string &line);
int main(int argc, char** argv)
{
cerr << "Starting" << endl;
namespace po = boost::program_options;
po::options_description desc("Options");
desc.add_options()
("help", "Print help messages")
("input,i", po::value<string>(), "Input file. Otherwise it will read from standard in")
("output,o", po::value<string>(), "Output file. Otherwise it will print from standard out")
("source-language,s", po::value<string>()->required(), "Source Language")
("target-language,t", po::value<string>()->required(), "Target Language")
("revision,r", po::value<int>()->default_value(0), "Revision")
("filter", po::value<string>(), "Only use labels from this comma-separated list")
("opennlp", po::value<string>()->default_value(""), "Path to Apache OpenNLP toolkit")
;
po::variables_map vm;
try
{
po::store(po::parse_command_line(argc, argv, desc),
vm); // can throw
/** --help option
*/
if ( vm.count("help") )
{
std::cout << "Basic Command Line Parameter App" << std::endl
<< desc << std::endl;
return EXIT_SUCCESS;
}
po::notify(vm); // throws on error, so do after help in case
// there are any problems
}
catch(po::error& e)
{
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return EXIT_FAILURE;
}
istream *inStrm = &cin;
if (vm.count("input")) {
string inStr = vm["input"].as<string>();
cerr << "inStr=" << inStr << endl;
ifstream *inFile = new ifstream(inStr.c_str());
inStrm = inFile;
}
ostream *outStrm = &cout;
if (vm.count("output")) {
string outStr = vm["output"].as<string>();
cerr << "outStr=" << outStr << endl;
ostream *outFile = new ofstream(outStr.c_str());
outStrm = outFile;
}
vector<string> filterList;
if (vm.count("filter")) {
string filter = vm["filter"].as<string>();
Moses::Tokenize(filterList, filter, ",");
}
string sourceLang = vm["source-language"].as<string>();
string targetLang = vm["target-language"].as<string>();
int revision = vm["revision"].as<int>();
cerr << sourceLang << " " << targetLang << " " << revision << endl;
if (sourceLang == "en" && revision == 2) {
if (vm.count("opennlp") == 0) {
throw "Need path to openNLP toolkit";
}
string openNLPPath = vm["opennlp"].as<string>();
EnOpenNLPChunker chunker(openNLPPath);
chunker.Process(*inStrm, *outStrm, filterList);
}
else {
// process line-by-line
string line;
size_t lineNum = 1;
while (getline(*inStrm, line)) {
//cerr << lineNum << ":" << line << endl;
if (lineNum % 1000 == 0) {
cerr << lineNum << " ";
}
Phrase source = Tokenize(line);
if (revision == 600 ) {
LabelByInitialLetter(source, *outStrm);
}
else if (sourceLang == "de" && targetLang == "en") {
LabelDeEn(source, *outStrm);
}
else if (sourceLang == "en") {
if (revision == 0 || revision == 1) {
EnPhrasalVerb(source, revision, *outStrm);
}
else if (revision == 2) {
string openNLPPath = vm["opennlp-path"].as<string>();
EnOpenNLPChunker chunker(openNLPPath);
}
}
++lineNum;
}
}
cerr << "Finished" << endl;
return EXIT_SUCCESS;
}
Phrase Tokenize(const string &line)
{
Phrase ret;
vector<string> toks = Moses::Tokenize(line);
for (size_t i = 0; i < toks.size(); ++i) {
Word word = Moses::Tokenize(toks[i], "|");
ret.push_back(word);
}
return ret;
}
bool IsA(const Phrase &source, int pos, int offset, int factor, const string &str)
{
pos += offset;
if (pos >= source.size() || pos < 0) {
return false;
}
const string &word = source[pos][factor];
vector<string> soughts = Moses::Tokenize(str, " ");
for (int i = 0; i < soughts.size(); ++i) {
string &sought = soughts[i];
bool found = (word == sought);
if (found) {
return true;
}
}
return false;
}
void OutputWithLabels(const Phrase &source, const Ranges ranges, ostream &out)
{
// output sentence, with labels
for (int pos = 0; pos < source.size(); ++pos) {
// output beginning of label
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
const Range &range = *iter;
if (range.range.first == pos) {
out << "<tree label=\"" + range.label + "\"> ";
}
}
const Word &word = source[pos];
out << word[0] << " ";
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
const Range &range = *iter;
if (range.range.second == pos) {
out << "</tree> ";
}
}
}
out << endl;
}

View File

@ -0,0 +1,27 @@
#pragma once
#include <iostream>
#include <vector>
#include <string>
#include <list>
typedef std::vector<std::string> Word;
typedef std::vector<Word> Phrase;
struct Range
{
Range(int start,int end, const std::string &l)
:range(start, end)
,label(l)
{}
std::pair<int,int> range;
std::string label;
};
typedef std::list<Range> Ranges;
bool IsA(const Phrase &source, int pos, int offset, int factor, const std::string &str);
void OutputWithLabels(const Phrase &source, const Ranges ranges, std::ostream &out);

View File

@ -4,10 +4,11 @@ clean:
rm -f *.o manual-label
.cpp.o:
g++ -I../../../ -O6 -g -c $<
g++ -I../../../boost/include -I../../../ -O3 -g -c $<
manual-label: DeEn.o manual-label.o
OBJECTS = DeEn.o EnOpenNLPChunker.o EnPhrasalVerb.o Main.o LabelByInitialLetter.o
g++ DeEn.o manual-label.o -lz -lboost_program_options-mt -o manual-label
manual-label: $(OBJECTS)
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_program_options-mt -o manual-label

View File

@ -1,88 +0,0 @@
#include <iostream>
#include <cstdlib>
#include <boost/program_options.hpp>
#include "moses/Util.h"
#include "DeEn.h"
using namespace std;
bool g_debug = false;
Phrase Tokenize(const string &line);
int main(int argc, char** argv)
{
cerr << "Starting" << endl;
namespace po = boost::program_options;
po::options_description desc("Options");
desc.add_options()
("help", "Print help messages")
("add", "additional options")
("source-language,s", po::value<string>()->required(), "Source Language")
("target-language,t", po::value<string>()->required(), "Target Language");
po::variables_map vm;
try
{
po::store(po::parse_command_line(argc, argv, desc),
vm); // can throw
/** --help option
*/
if ( vm.count("help") )
{
std::cout << "Basic Command Line Parameter App" << std::endl
<< desc << std::endl;
return EXIT_SUCCESS;
}
po::notify(vm); // throws on error, so do after help in case
// there are any problems
}
catch(po::error& e)
{
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return EXIT_FAILURE;
}
string sourceLang = vm["source-language"].as<string>();
string targetLang = vm["target-language"].as<string>();
cerr << sourceLang << " " << targetLang << endl;
string line;
size_t lineNum = 1;
while (getline(cin, line)) {
//cerr << lineNum << ":" << line << endl;
if (lineNum % 1000 == 0) {
cerr << lineNum << " ";
}
Phrase source = Tokenize(line);
LabelDeEn(source, cout);
++lineNum;
}
cerr << "Finished" << endl;
return EXIT_SUCCESS;
}
Phrase Tokenize(const string &line)
{
Phrase ret;
vector<string> toks = Moses::Tokenize(line);
for (size_t i = 0; i < toks.size(); ++i) {
Word word = Moses::Tokenize(toks[i], "|");
ret.push_back(word);
}
return ret;
}

View File

@ -1,177 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.377583226" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.2071063316" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/mira/Debug}" id="cdt.managedbuild.builder.gnu.cross.881204887" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1218877049" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1094111510" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.2142370493" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1560615310" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool command="g++" id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.115638939" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1315998281" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.778416356" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.preprocessor.def.317569168" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="WITH_THREADS"/>
</option>
<option id="gnu.cpp.compiler.option.include.paths.1743631842" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../..&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1454738757" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1480777831" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.485611005" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1007486529" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="moses"/>
<listOptionValue builtIn="false" value="irstlm"/>
<listOptionValue builtIn="false" value="dstruct"/>
<listOptionValue builtIn="false" value="flm"/>
<listOptionValue builtIn="false" value="oolm"/>
<listOptionValue builtIn="false" value="lattice"/>
<listOptionValue builtIn="false" value="misc"/>
<listOptionValue builtIn="false" value="dalm"/>
<listOptionValue builtIn="false" value="search"/>
<listOptionValue builtIn="false" value="RandLM"/>
<listOptionValue builtIn="false" value="OnDiskPt"/>
<listOptionValue builtIn="false" value="lm"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_serialization"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
<listOptionValue builtIn="false" value="pthread"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
<listOptionValue builtIn="false" value="dl"/>
<listOptionValue builtIn="false" value="rt"/>
</option>
<option id="gnu.cpp.link.option.paths.132082917" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../irstlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../DALM/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../nplm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../randlm/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../cmph/lib&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/macosx&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686-m64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../srilm/lib/i686&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/moses/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/lm/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/OnDiskPt/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/search/Debug&quot;"/>
<listOptionValue builtIn="false" value="/opt/local/lib"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1827477602" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1554055737" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1335019965" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1106765201" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.2038764866." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1722081106" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.36030994" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/mira/Release}" id="cdt.managedbuild.builder.gnu.cross.329863268" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.299271422" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1049770857" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1354488968" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.674520633" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.568828285" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1042930447" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.305563840" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1424960921" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.460791828" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.945282347" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.561813601" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1813861310" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.991451934" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1702585996" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="mira.cdt.managedbuild.target.gnu.cross.exe.1862989567" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092;cdt.managedbuild.config.gnu.cross.exe.debug.1385309092.;cdt.managedbuild.tool.gnu.cross.c.compiler.1218877049;cdt.managedbuild.tool.gnu.c.compiler.input.1560615310">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.2038764866;cdt.managedbuild.config.gnu.cross.exe.release.2038764866.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.568828285;cdt.managedbuild.tool.gnu.cpp.compiler.input.1424960921">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1385309092;cdt.managedbuild.config.gnu.cross.exe.debug.1385309092.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.115638939;cdt.managedbuild.tool.gnu.cpp.compiler.input.1454738757">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.2038764866;cdt.managedbuild.config.gnu.cross.exe.release.2038764866.;cdt.managedbuild.tool.gnu.cross.c.compiler.299271422;cdt.managedbuild.tool.gnu.c.compiler.input.674520633">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/mira"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/mira"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -1,81 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>mira</name>
<comment></comment>
<projects>
<project>mert_lib</project>
<project>moses</project>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>Decoder.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/Decoder.cpp</locationURI>
</link>
<link>
<name>Decoder.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/Decoder.h</locationURI>
</link>
<link>
<name>Hildreth.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/Hildreth.cpp</locationURI>
</link>
<link>
<name>Hildreth.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/Hildreth.h</locationURI>
</link>
<link>
<name>HypothesisQueue.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/HypothesisQueue.cpp</locationURI>
</link>
<link>
<name>HypothesisQueue.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/HypothesisQueue.h</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/Main.h</locationURI>
</link>
<link>
<name>MiraOptimiser.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/MiraOptimiser.cpp</locationURI>
</link>
<link>
<name>Perceptron.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/mira/Perceptron.cpp</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -1301,6 +1301,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SoftMatchingFeature.h</locationURI>
</link>
<link>
<name>FF/SourceGHKMTreeInputMatchFeature.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SourceGHKMTreeInputMatchFeature.cpp</locationURI>
</link>
<link>
<name>FF/SourceGHKMTreeInputMatchFeature.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SourceGHKMTreeInputMatchFeature.h</locationURI>
</link>
<link>
<name>FF/SourceWordDeletionFeature.cpp</name>
<type>1</type>
@ -1311,6 +1321,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SourceWordDeletionFeature.h</locationURI>
</link>
<link>
<name>FF/SpanLength.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SpanLength.cpp</locationURI>
</link>
<link>
<name>FF/SpanLength.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SpanLength.h</locationURI>
</link>
<link>
<name>FF/StatefulFeatureFunction.cpp</name>
<type>1</type>
@ -1331,6 +1351,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/StatelessFeatureFunction.h</locationURI>
</link>
<link>
<name>FF/SyntaxRHS.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SyntaxRHS.cpp</locationURI>
</link>
<link>
<name>FF/SyntaxRHS.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/FF/SyntaxRHS.h</locationURI>
</link>
<link>
<name>FF/TargetBigramFeature.cpp</name>
<type>1</type>
@ -1626,6 +1656,11 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/Factory.h</locationURI>
</link>
<link>
<name>PP/PhraseProperty.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/PhraseProperty.cpp</locationURI>
</link>
<link>
<name>PP/PhraseProperty.h</name>
<type>1</type>
@ -1641,6 +1676,16 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/SourceLabelsPhraseProperty.h</locationURI>
</link>
<link>
<name>PP/SpanLengthPhraseProperty.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/SpanLengthPhraseProperty.cpp</locationURI>
</link>
<link>
<name>PP/SpanLengthPhraseProperty.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/moses/PP/SpanLengthPhraseProperty.h</locationURI>
</link>
<link>
<name>PP/TreeStructurePhraseProperty.h</name>
<type>1</type>

Some files were not shown because too many files have changed in this diff Show More