mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
Merge branch 'master' of https://github.com/moses-smt/mosesdecoder
This commit is contained in:
commit
e49ffb8efa
@ -11,12 +11,12 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -72,13 +72,13 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -1,132 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.2107801703">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.2107801703" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.2107801703." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.502948364" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.1431969079" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/manual-label}/Debug" id="cdt.managedbuild.target.gnu.builder.exe.debug.2101075234" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1118840081" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.400985496" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.1160903812" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.404589863" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="${workspace_loc:}/../.."/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.2033266575" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.debug.option.debugging.level.1568929819" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.676866714" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.254144861" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.319879082" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
|
||||
<option id="gnu.cpp.link.option.paths.132164474" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.1017214824" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="boost_program_options"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1672776758" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1104732611" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.372096550" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.649050588">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.649050588" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.649050588" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.649050588." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.1107402972" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1038954684" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/manual-label}/Release" id="cdt.managedbuild.target.gnu.builder.exe.release.100518450" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.2005888378" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
|
||||
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.968169340" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.977676916" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.1914416581" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.release.option.debugging.level.826081780" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2048171432" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.940327646" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.369758737" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1186766936" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.266174128" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.558116084" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="manual-label.cdt.managedbuild.target.gnu.exe.1701243340" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.1743303968;cdt.managedbuild.tool.gnu.cpp.compiler.input.1889240027">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.649050588;cdt.managedbuild.config.gnu.exe.release.649050588.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.924128295;cdt.managedbuild.tool.gnu.c.compiler.input.2048171432">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.2037265673;cdt.managedbuild.tool.gnu.cpp.compiler.input.967940596">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.2107801703;cdt.managedbuild.config.gnu.exe.debug.2107801703.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.789243964;cdt.managedbuild.tool.gnu.c.compiler.input.676866714">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/manual-label"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/manual-label"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
</cproject>
|
@ -1,27 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>manual-label</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
@ -1,46 +0,0 @@
|
||||
#include <list>
|
||||
#include "DeEn.h"
|
||||
#include "Main.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
bool Contains(const Phrase &source, int start, int end, int factor, const string &str)
|
||||
{
|
||||
for (int pos = start; pos <= end; ++pos) {
|
||||
bool found = IsA(source, pos, 0, factor, str);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void LabelDeEn(const Phrase &source, ostream &out)
|
||||
{
|
||||
Ranges ranges;
|
||||
|
||||
// find ranges to label
|
||||
for (int start = 0; start < source.size(); ++start) {
|
||||
for (int end = start; end < source.size(); ++end) {
|
||||
if (IsA(source, start, -1, 1, "VAFIN")
|
||||
&& IsA(source, end, +1, 1, "VVINF VVPP")
|
||||
&& !Contains(source, start, end, 1, "VAFIN VVINF VVPP VVFIN")) {
|
||||
Range range(start, end, "reorder-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
else if ((start == 0 || IsA(source, start, -1, 1, "$,"))
|
||||
&& IsA(source, end, +1, 0, "zu")
|
||||
&& IsA(source, end, +2, 1, "VVINF")
|
||||
&& !Contains(source, start, end, 1, "$,")) {
|
||||
Range range(start, end, "reorder-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OutputWithLabels(source, ranges, out);
|
||||
}
|
||||
|
@ -1,5 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Main.h"
|
||||
|
||||
void LabelDeEn(const Phrase &source, std::ostream &out);
|
@ -1,202 +0,0 @@
|
||||
/*
|
||||
* EnApacheChunker.cpp
|
||||
*
|
||||
* Created on: 28 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include "EnOpenNLPChunker.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
EnOpenNLPChunker::EnOpenNLPChunker(const std::string &openNLPPath)
|
||||
:m_openNLPPath(openNLPPath)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
EnOpenNLPChunker::~EnOpenNLPChunker() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::Process(std::istream &in, std::ostream &out, const vector<string> &filterList)
|
||||
{
|
||||
const boost::filesystem::path
|
||||
inPath = boost::filesystem::unique_path(),
|
||||
outPath = boost::filesystem::unique_path();
|
||||
// read all input to a temp file
|
||||
ofstream inFile(inPath.c_str());
|
||||
|
||||
string line;
|
||||
while (getline(in, line)) {
|
||||
Unescape(line);
|
||||
inFile << line << endl;
|
||||
}
|
||||
inFile.close();
|
||||
|
||||
// execute chunker
|
||||
string cmd = "cat " + inPath.native() + " | "
|
||||
+ m_openNLPPath + "/bin/opennlp POSTagger "
|
||||
+ m_openNLPPath + "/models/en-pos-maxent.bin | "
|
||||
+ m_openNLPPath + "/bin/opennlp ChunkerME "
|
||||
+ m_openNLPPath + "/models/en-chunker.bin > "
|
||||
+ outPath.native();
|
||||
//g << "Executing:" << cmd << endl;
|
||||
int ret = system(cmd.c_str());
|
||||
|
||||
// read result of chunker and output as Moses xml trees
|
||||
ifstream outFile(outPath.c_str());
|
||||
|
||||
size_t lineNum = 0;
|
||||
while (getline(outFile, line)) {
|
||||
//cerr << line << endl;
|
||||
MosesReformat(line, out, filterList);
|
||||
out << endl;
|
||||
++lineNum;
|
||||
}
|
||||
outFile.close();
|
||||
|
||||
// clean up temporary files
|
||||
remove(inPath.c_str());
|
||||
remove(outPath.c_str());
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::MosesReformat(const string &line, std::ostream &out, const vector<string> &filterList)
|
||||
{
|
||||
//cerr << "REFORMATING:" << line << endl;
|
||||
bool inLabel = false;
|
||||
vector<string> toks;
|
||||
Moses::Tokenize(toks, line);
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
const string &tok = toks[i];
|
||||
|
||||
if (tok.substr(0, 1) == "[" && tok.substr(1,1) != "_") {
|
||||
// start of chunk
|
||||
string label = tok.substr(1);
|
||||
if (UseLabel(label, filterList)) {
|
||||
out << "<tree label=\"" << label << "\">";
|
||||
inLabel = true;
|
||||
}
|
||||
}
|
||||
else if (ends_with(tok, "]")) {
|
||||
// end of chunk
|
||||
if (tok.size() > 1) {
|
||||
if (tok.substr(1,1) == "_") {
|
||||
// just a word that happens to be ]
|
||||
vector<string> factors;
|
||||
Moses::Tokenize(factors, tok, "_");
|
||||
assert(factors.size() == 2);
|
||||
|
||||
Escape(factors[0]);
|
||||
out << factors[0] << " ";
|
||||
}
|
||||
else {
|
||||
// a word and end of tree
|
||||
string word = tok.substr(0, tok.size()-1);
|
||||
|
||||
vector<string> factors;
|
||||
Moses::Tokenize(factors, word, "_");
|
||||
assert(factors.size() == 2);
|
||||
|
||||
Escape(factors[0]);
|
||||
out << factors[0] << " ";
|
||||
}
|
||||
|
||||
if (inLabel) {
|
||||
out << "</tree> ";
|
||||
inLabel = false;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (inLabel) {
|
||||
out << "</tree> ";
|
||||
inLabel = false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
else {
|
||||
// lexical item
|
||||
vector<string> factors;
|
||||
Moses::Tokenize(factors, tok, "_");
|
||||
if (factors.size() == 2) {
|
||||
Escape(factors[0]);
|
||||
out << factors[0] << " ";
|
||||
}
|
||||
else if (factors.size() == 1) {
|
||||
// word is _
|
||||
assert(tok.substr(0, 2) == "__");
|
||||
out << "_ ";
|
||||
}
|
||||
else {
|
||||
throw "Unknown format:" + tok;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string
|
||||
replaceAll( std::string const& original,
|
||||
std::string const& before,
|
||||
std::string const& after )
|
||||
{
|
||||
std::string retval;
|
||||
std::string::const_iterator end = original.end();
|
||||
std::string::const_iterator current = original.begin();
|
||||
std::string::const_iterator next =
|
||||
std::search( current, end, before.begin(), before.end() );
|
||||
while ( next != end ) {
|
||||
retval.append( current, next );
|
||||
retval.append( after );
|
||||
current = next + before.size();
|
||||
next = std::search( current, end, before.begin(), before.end() );
|
||||
}
|
||||
retval.append( current, next );
|
||||
return retval;
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::Escape(string &line)
|
||||
{
|
||||
line = replaceAll(line, "&", "&");
|
||||
line = replaceAll(line, "|", "|");
|
||||
line = replaceAll(line, "<", "<");
|
||||
line = replaceAll(line, ">", ">");
|
||||
line = replaceAll(line, "'", "'");
|
||||
line = replaceAll(line, "\"", """);
|
||||
line = replaceAll(line, "[", "[");
|
||||
line = replaceAll(line, "]", "]");
|
||||
}
|
||||
|
||||
void EnOpenNLPChunker::Unescape(string &line)
|
||||
{
|
||||
line = replaceAll(line, "|", "|");
|
||||
line = replaceAll(line, "<", "<");
|
||||
line = replaceAll(line, ">", ">");
|
||||
line = replaceAll(line, """, "\"");
|
||||
line = replaceAll(line, "'", "'");
|
||||
line = replaceAll(line, "[", "[");
|
||||
line = replaceAll(line, "]", "]");
|
||||
line = replaceAll(line, "&", "&");
|
||||
}
|
||||
|
||||
bool EnOpenNLPChunker::UseLabel(const std::string &label, const std::vector<std::string> &filterList) const
|
||||
{
|
||||
if (filterList.size() == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < filterList.size(); ++i) {
|
||||
if (label == filterList[i]) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
/*
|
||||
* EnApacheChunker.h
|
||||
*
|
||||
* Created on: 28 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
class EnOpenNLPChunker {
|
||||
public:
|
||||
EnOpenNLPChunker(const std::string &openNLPPath);
|
||||
virtual ~EnOpenNLPChunker();
|
||||
void Process(std::istream &in, std::ostream &out, const std::vector<std::string> &filterList);
|
||||
protected:
|
||||
const std::string m_openNLPPath;
|
||||
|
||||
void Escape(std::string &line);
|
||||
void Unescape(std::string &line);
|
||||
|
||||
void MosesReformat(const std::string &line, std::ostream &out, const std::vector<std::string> &filterList);
|
||||
|
||||
bool UseLabel(const std::string &label, const std::vector<std::string> &filterList) const;
|
||||
};
|
||||
|
@ -1,226 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <limits>
|
||||
#include <algorithm>
|
||||
#include "EnPhrasalVerb.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void EnPhrasalVerb(const Phrase &source, int revision, ostream &out)
|
||||
{
|
||||
Ranges ranges;
|
||||
|
||||
// find ranges to label
|
||||
for (int start = 0; start < source.size(); ++start) {
|
||||
size_t end = std::numeric_limits<size_t>::max();
|
||||
|
||||
if (IsA(source, start, 0, 0, "ask asked asking")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "back backed backing")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "blow blown blew")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "break broke broken")) {
|
||||
end = Found(source, start, 0, "down up in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "bring brought bringing")) {
|
||||
end = Found(source, start, 0, "down up in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "call called calling")) {
|
||||
end = Found(source, start, 0, "back up off");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "check checked checking")) {
|
||||
end = Found(source, start, 0, "out in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "cheer cheered cheering")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "clean cleaned cleaning")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "cross crossed crossing")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "cut cutting")) {
|
||||
end = Found(source, start, 0, "down off out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "do did done")) {
|
||||
end = Found(source, start, 0, "over up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "drop dropped dropping")) {
|
||||
end = Found(source, start, 0, "off");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "figure figured figuring")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "fill filled filling")) {
|
||||
end = Found(source, start, 0, "in out up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "find found finding")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "get got getting gotten")) {
|
||||
end = Found(source, start, 0, "across over back");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "give given gave giving")) {
|
||||
end = Found(source, start, 0, "away back out up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "hand handed handing")) {
|
||||
end = Found(source, start, 0, "down in over");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "hold held holding")) {
|
||||
end = Found(source, start, 0, "back up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "keep kept keeping")) {
|
||||
end = Found(source, start, 0, "from up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "let letting")) {
|
||||
end = Found(source, start, 0, "down in");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "look looked looking")) {
|
||||
end = Found(source, start, 0, "over up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "make made making")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "mix mixed mixing")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "pass passed passing")) {
|
||||
end = Found(source, start, 0, "out up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "pay payed paying")) {
|
||||
end = Found(source, start, 0, "back");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "pick picked picking")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "point pointed pointing")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "put putting")) {
|
||||
end = Found(source, start, 0, "down off out together on");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "send sending")) {
|
||||
end = Found(source, start, 0, "back");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "set setting")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "sort sorted sorting")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "switch switched switching")) {
|
||||
end = Found(source, start, 0, "off on");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "take took taking")) {
|
||||
end = Found(source, start, 0, "apart back off out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "tear torn tearing")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "think thought thinking")) {
|
||||
end = Found(source, start, 0, "over");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "thrown threw thrown throwing")) {
|
||||
end = Found(source, start, 0, "away");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "turn turned turning")) {
|
||||
end = Found(source, start, 0, "down off on");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "try tried trying")) {
|
||||
end = Found(source, start, 0, "on out");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "use used using")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "warm warmed warming")) {
|
||||
end = Found(source, start, 0, "up");
|
||||
}
|
||||
else if (IsA(source, start, 0, 0, "work worked working")) {
|
||||
end = Found(source, start, 0, "out");
|
||||
}
|
||||
|
||||
// found range to label
|
||||
if (end != std::numeric_limits<size_t>::max() &&
|
||||
end > start + 1) {
|
||||
bool add = true;
|
||||
if (revision == 1 && Exist(source,
|
||||
start + 1,
|
||||
end - 1,
|
||||
1,
|
||||
"VB VBD VBG VBN VBP VBZ")) {
|
||||
// there's a verb in between
|
||||
add = false;
|
||||
}
|
||||
|
||||
if (add) {
|
||||
Range range(start + 1, end - 1, "reorder-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OutputWithLabels(source, ranges, out);
|
||||
}
|
||||
|
||||
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str)
|
||||
{
|
||||
vector<string> soughts = Moses::Tokenize(str, " ");
|
||||
for (size_t i = start; i <= end; ++i) {
|
||||
const Word &word = source[i];
|
||||
bool found = Found(word, factor, soughts);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t Found(const Phrase &source, int pos, int factor, const std::string &str)
|
||||
{
|
||||
const size_t MAX_RANGE = 10;
|
||||
|
||||
vector<string> soughts = Moses::Tokenize(str, " ");
|
||||
vector<string> puncts = Moses::Tokenize(". : , ;", " ");
|
||||
|
||||
|
||||
size_t maxEnd = std::min(source.size(), (size_t) pos + MAX_RANGE);
|
||||
for (size_t i = pos + 1; i < maxEnd; ++i) {
|
||||
const Word &word = source[i];
|
||||
bool found;
|
||||
|
||||
found = Found(word, factor, puncts);
|
||||
if (found) {
|
||||
return std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
found = Found(word, factor, soughts);
|
||||
if (found) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return std::numeric_limits<size_t>::max();
|
||||
}
|
||||
|
||||
|
||||
bool Found(const Word &word, int factor, const vector<string> &soughts)
|
||||
{
|
||||
const string &element = word[factor];
|
||||
for (size_t i = 0; i < soughts.size(); ++i) {
|
||||
const string &sought = soughts[i];
|
||||
bool found = (element == sought);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
@ -1,11 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Main.h"
|
||||
|
||||
// roll your own identification of phrasal verbs
|
||||
void EnPhrasalVerb(const Phrase &source, int revision, std::ostream &out);
|
||||
|
||||
bool Exist(const Phrase &source, int start, int end, int factor, const std::string &str);
|
||||
size_t Found(const Phrase &source, int pos, int factor, const std::string &str);
|
||||
bool Found(const Word &word, int factor, const std::vector<std::string> &soughts);
|
||||
|
@ -1,29 +0,0 @@
|
||||
#include "LabelByInitialLetter.h"
|
||||
#include "Main.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void LabelByInitialLetter(const Phrase &source, std::ostream &out)
|
||||
{
|
||||
Ranges ranges;
|
||||
|
||||
for (int start = 0; start < source.size(); ++start) {
|
||||
const string &startWord = source[start][0];
|
||||
string startChar = startWord.substr(0,1);
|
||||
|
||||
for (int end = start + 1; end < source.size(); ++end) {
|
||||
const string &endWord = source[end][0];
|
||||
string endChar = endWord.substr(0,1);
|
||||
|
||||
if (startChar == endChar) {
|
||||
Range range(start, end, startChar + "-label");
|
||||
ranges.push_back(range);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
OutputWithLabels(source, ranges, out);
|
||||
|
||||
}
|
||||
|
||||
|
@ -1,6 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include "Main.h"
|
||||
|
||||
void LabelByInitialLetter(const Phrase &source, std::ostream &out);
|
||||
|
@ -1,195 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <boost/program_options.hpp>
|
||||
#include "moses/Util.h"
|
||||
#include "Main.h"
|
||||
#include "DeEn.h"
|
||||
#include "EnPhrasalVerb.h"
|
||||
#include "EnOpenNLPChunker.h"
|
||||
#include "LabelByInitialLetter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool g_debug = false;
|
||||
|
||||
Phrase Tokenize(const string &line);
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cerr << "Starting" << endl;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
po::options_description desc("Options");
|
||||
desc.add_options()
|
||||
("help", "Print help messages")
|
||||
|
||||
("input,i", po::value<string>(), "Input file. Otherwise it will read from standard in")
|
||||
("output,o", po::value<string>(), "Output file. Otherwise it will print from standard out")
|
||||
|
||||
("source-language,s", po::value<string>()->required(), "Source Language")
|
||||
("target-language,t", po::value<string>()->required(), "Target Language")
|
||||
("revision,r", po::value<int>()->default_value(0), "Revision")
|
||||
("filter", po::value<string>(), "Only use labels from this comma-separated list")
|
||||
|
||||
("opennlp", po::value<string>()->default_value(""), "Path to Apache OpenNLP toolkit")
|
||||
|
||||
;
|
||||
|
||||
po::variables_map vm;
|
||||
try
|
||||
{
|
||||
po::store(po::parse_command_line(argc, argv, desc),
|
||||
vm); // can throw
|
||||
|
||||
/** --help option
|
||||
*/
|
||||
if ( vm.count("help") )
|
||||
{
|
||||
std::cout << "Basic Command Line Parameter App" << std::endl
|
||||
<< desc << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
po::notify(vm); // throws on error, so do after help in case
|
||||
// there are any problems
|
||||
}
|
||||
catch(po::error& e)
|
||||
{
|
||||
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
|
||||
std::cerr << desc << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
istream *inStrm = &cin;
|
||||
if (vm.count("input")) {
|
||||
string inStr = vm["input"].as<string>();
|
||||
cerr << "inStr=" << inStr << endl;
|
||||
ifstream *inFile = new ifstream(inStr.c_str());
|
||||
inStrm = inFile;
|
||||
}
|
||||
|
||||
ostream *outStrm = &cout;
|
||||
if (vm.count("output")) {
|
||||
string outStr = vm["output"].as<string>();
|
||||
cerr << "outStr=" << outStr << endl;
|
||||
ostream *outFile = new ofstream(outStr.c_str());
|
||||
outStrm = outFile;
|
||||
}
|
||||
|
||||
vector<string> filterList;
|
||||
if (vm.count("filter")) {
|
||||
string filter = vm["filter"].as<string>();
|
||||
Moses::Tokenize(filterList, filter, ",");
|
||||
}
|
||||
|
||||
string sourceLang = vm["source-language"].as<string>();
|
||||
string targetLang = vm["target-language"].as<string>();
|
||||
int revision = vm["revision"].as<int>();
|
||||
|
||||
cerr << sourceLang << " " << targetLang << " " << revision << endl;
|
||||
|
||||
if (sourceLang == "en" && revision == 2) {
|
||||
if (vm.count("opennlp") == 0) {
|
||||
throw "Need path to openNLP toolkit";
|
||||
}
|
||||
|
||||
string openNLPPath = vm["opennlp"].as<string>();
|
||||
EnOpenNLPChunker chunker(openNLPPath);
|
||||
chunker.Process(*inStrm, *outStrm, filterList);
|
||||
}
|
||||
else {
|
||||
// process line-by-line
|
||||
string line;
|
||||
size_t lineNum = 1;
|
||||
|
||||
while (getline(*inStrm, line)) {
|
||||
//cerr << lineNum << ":" << line << endl;
|
||||
if (lineNum % 1000 == 0) {
|
||||
cerr << lineNum << " ";
|
||||
}
|
||||
|
||||
Phrase source = Tokenize(line);
|
||||
|
||||
if (revision == 600 ) {
|
||||
LabelByInitialLetter(source, *outStrm);
|
||||
}
|
||||
else if (sourceLang == "de" && targetLang == "en") {
|
||||
LabelDeEn(source, *outStrm);
|
||||
}
|
||||
else if (sourceLang == "en") {
|
||||
if (revision == 0 || revision == 1) {
|
||||
EnPhrasalVerb(source, revision, *outStrm);
|
||||
}
|
||||
else if (revision == 2) {
|
||||
string openNLPPath = vm["opennlp-path"].as<string>();
|
||||
EnOpenNLPChunker chunker(openNLPPath);
|
||||
}
|
||||
}
|
||||
|
||||
++lineNum;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
cerr << "Finished" << endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
Phrase Tokenize(const string &line)
|
||||
{
|
||||
Phrase ret;
|
||||
|
||||
vector<string> toks = Moses::Tokenize(line);
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
Word word = Moses::Tokenize(toks[i], "|");
|
||||
ret.push_back(word);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool IsA(const Phrase &source, int pos, int offset, int factor, const string &str)
|
||||
{
|
||||
pos += offset;
|
||||
if (pos >= source.size() || pos < 0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const string &word = source[pos][factor];
|
||||
vector<string> soughts = Moses::Tokenize(str, " ");
|
||||
for (int i = 0; i < soughts.size(); ++i) {
|
||||
string &sought = soughts[i];
|
||||
bool found = (word == sought);
|
||||
if (found) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void OutputWithLabels(const Phrase &source, const Ranges ranges, ostream &out)
|
||||
{
|
||||
// output sentence, with labels
|
||||
for (int pos = 0; pos < source.size(); ++pos) {
|
||||
// output beginning of label
|
||||
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
||||
const Range &range = *iter;
|
||||
if (range.range.first == pos) {
|
||||
out << "<tree label=\"" + range.label + "\"> ";
|
||||
}
|
||||
}
|
||||
|
||||
const Word &word = source[pos];
|
||||
out << word[0] << " ";
|
||||
|
||||
for (Ranges::const_iterator iter = ranges.begin(); iter != ranges.end(); ++iter) {
|
||||
const Range &range = *iter;
|
||||
if (range.range.second == pos) {
|
||||
out << "</tree> ";
|
||||
}
|
||||
}
|
||||
}
|
||||
out << endl;
|
||||
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <list>
|
||||
|
||||
typedef std::vector<std::string> Word;
|
||||
typedef std::vector<Word> Phrase;
|
||||
|
||||
struct Range
|
||||
{
|
||||
Range(int start,int end, const std::string &l)
|
||||
:range(start, end)
|
||||
,label(l)
|
||||
{}
|
||||
|
||||
std::pair<int,int> range;
|
||||
std::string label;
|
||||
};
|
||||
|
||||
typedef std::list<Range> Ranges;
|
||||
|
||||
bool IsA(const Phrase &source, int pos, int offset, int factor, const std::string &str);
|
||||
void OutputWithLabels(const Phrase &source, const Ranges ranges, std::ostream &out);
|
||||
|
||||
|
@ -1,14 +0,0 @@
|
||||
all: manual-label
|
||||
|
||||
clean:
|
||||
rm -f *.o manual-label
|
||||
|
||||
.cpp.o:
|
||||
g++ -I../../../boost/include -I../../../ -O3 -g -c $<
|
||||
|
||||
OBJECTS = DeEn.o EnOpenNLPChunker.o EnPhrasalVerb.o Main.o LabelByInitialLetter.o
|
||||
|
||||
manual-label: $(OBJECTS)
|
||||
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_program_options-mt -o manual-label
|
||||
|
||||
|
@ -1,131 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="manual-label" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="manual-label">
|
||||
<File Name="DeEn.cpp"/>
|
||||
<File Name="DeEn.h"/>
|
||||
<File Name="EnOpenNLPChunker.cpp"/>
|
||||
<File Name="EnOpenNLPChunker.h"/>
|
||||
<File Name="EnPhrasalVerb.cpp"/>
|
||||
<File Name="EnPhrasalVerb.h"/>
|
||||
<File Name="LabelByInitialLetter.cpp"/>
|
||||
<File Name="LabelByInitialLetter.h"/>
|
||||
<File Name="Main.cpp"/>
|
||||
<File Name="Main.h"/>
|
||||
</VirtualDirectory>
|
||||
<Settings Type="Executable">
|
||||
<GlobalSettings>
|
||||
<Compiler Options="" C_Options="" Assembler="">
|
||||
<IncludePath Value="."/>
|
||||
</Compiler>
|
||||
<Linker Options="">
|
||||
<LibraryPath Value="."/>
|
||||
</Linker>
|
||||
<ResourceCompiler Options=""/>
|
||||
</GlobalSettings>
|
||||
<Configuration Name="Debug" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder"/>
|
||||
<IncludePath Value="/Users/hieu/workspace/github/mosesdecoder/boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
|
||||
<Library Value="boost_program_options"/>
|
||||
<Library Value="boost_filesystem"/>
|
||||
<Library Value="boost_system"/>
|
||||
</Linker>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
|
||||
<Environment EnvVarSetName="<Use Defaults>" DbgSetName="<Use Defaults>">
|
||||
<![CDATA[]]>
|
||||
</Environment>
|
||||
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="no">
|
||||
<DebuggerSearchPaths/>
|
||||
<PostConnectCommands/>
|
||||
<StartupCommands/>
|
||||
</Debugger>
|
||||
<PreBuild/>
|
||||
<PostBuild/>
|
||||
<CustomBuild Enabled="no">
|
||||
<RebuildCommand/>
|
||||
<CleanCommand/>
|
||||
<BuildCommand/>
|
||||
<PreprocessFileCommand/>
|
||||
<SingleFileCommand/>
|
||||
<MakefileGenerationCommand/>
|
||||
<ThirdPartyToolName>None</ThirdPartyToolName>
|
||||
<WorkingDirectory/>
|
||||
</CustomBuild>
|
||||
<AdditionalRules>
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
<SearchPaths/>
|
||||
</Completion>
|
||||
</Configuration>
|
||||
<Configuration Name="Release" CompilerType="GCC" DebuggerType="LLDB Debugger" Type="Executable" BuildCmpWithGlobalSettings="append" BuildLnkWithGlobalSettings="append" BuildResWithGlobalSettings="append">
|
||||
<Compiler Options="-O2;-Wall" C_Options="-O2;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<Preprocessor Value="NDEBUG"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Release" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
|
||||
<Environment EnvVarSetName="<Use Defaults>" DbgSetName="<Use Defaults>">
|
||||
<![CDATA[]]>
|
||||
</Environment>
|
||||
<Debugger IsRemote="no" RemoteHostName="" RemoteHostPort="" DebuggerPath="" IsExtended="no">
|
||||
<DebuggerSearchPaths/>
|
||||
<PostConnectCommands/>
|
||||
<StartupCommands/>
|
||||
</Debugger>
|
||||
<PreBuild/>
|
||||
<PostBuild/>
|
||||
<CustomBuild Enabled="no">
|
||||
<RebuildCommand/>
|
||||
<CleanCommand/>
|
||||
<BuildCommand/>
|
||||
<PreprocessFileCommand/>
|
||||
<SingleFileCommand/>
|
||||
<MakefileGenerationCommand/>
|
||||
<ThirdPartyToolName>None</ThirdPartyToolName>
|
||||
<WorkingDirectory/>
|
||||
</CustomBuild>
|
||||
<AdditionalRules>
|
||||
<CustomPostBuild/>
|
||||
<CustomPreBuild/>
|
||||
</AdditionalRules>
|
||||
<Completion EnableCpp11="no">
|
||||
<ClangCmpFlagsC/>
|
||||
<ClangCmpFlags/>
|
||||
<ClangPP/>
|
||||
<SearchPaths/>
|
||||
</Completion>
|
||||
</Configuration>
|
||||
</Settings>
|
||||
<Dependencies Name="Debug"/>
|
||||
<Dependencies Name="Release"/>
|
||||
</CodeLite_Project>
|
@ -11,11 +11,11 @@
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
@ -79,12 +79,12 @@
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1911984684" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
|
@ -220,6 +220,16 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/ConfusionNet.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ContextParameters.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ContextParameters.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/moses/parameters/ContextParameters.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>DecodeGraph.cpp</name>
|
||||
<type>1</type>
|
||||
|
Loading…
Reference in New Issue
Block a user