This commit is contained in:
Hieu Hoang 2012-07-17 11:30:39 +01:00
commit 759aaf4e9d
44 changed files with 668 additions and 14343 deletions

3
NOTICE Normal file
View File

@ -0,0 +1,3 @@
This code includes data from Daniel Naber's Language Tools (czech abbreviations).
This code includes data from czech wiktionary (also czech abbreviations).

View File

@ -47,7 +47,7 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@ -55,7 +55,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -68,14 +68,14 @@
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>

View File

@ -77,7 +77,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@ -91,7 +91,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View File

@ -1,22 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>OnDiskPt.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>1</integer>
</dict>
</dict>
<key>SuppressBuildableAutocreation</key>
<dict>
<key>D2AAC045055464E500DB518D</key>
<dict>
<key>primary</key>
<true/>
</dict>
</dict>
</dict>
</plist>

View File

@ -28,14 +28,12 @@
<None Include="..\..\lm\max_order.hh" />
<None Include="..\..\lm\model.hh" />
<None Include="..\..\lm\model_type.hh" />
<None Include="..\..\lm\ngram_query.hh" />
<None Include="..\..\lm\quantize.hh" />
<None Include="..\..\lm\README" />
<None Include="..\..\lm\read_arpa.hh" />
<None Include="..\..\lm\return.hh" />
<None Include="..\..\lm\search_hashed.hh" />
<None Include="..\..\lm\search_trie.hh" />
<None Include="..\..\lm\state.hh" />
<None Include="..\..\lm\test.arpa" />
<None Include="..\..\lm\test.sh" />
<None Include="..\..\lm\test_nounk.arpa" />
@ -49,6 +47,8 @@
<None Include="..\..\lm\word_index.hh" />
<None Include="..\..\util\bit_packing.hh" />
<None Include="..\..\util\check.hh" />
<None Include="..\..\util\COPYING" />
<None Include="..\..\util\COPYING.LESSER" />
<None Include="..\..\util\ersatz_progress.hh" />
<None Include="..\..\util\exception.hh" />
<None Include="..\..\util\file.hh" />
@ -68,7 +68,6 @@
<None Include="..\..\util\sorted_uniform.hh" />
<None Include="..\..\util\string_piece.hh" />
<None Include="..\..\util\tokenize_piece.hh" />
<None Include="..\..\util\usage.hh" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\lm\bhiksha.cc" />
@ -78,7 +77,6 @@
<ClCompile Include="..\..\lm\left_test.cc" />
<ClCompile Include="..\..\lm\lm_exception.cc" />
<ClCompile Include="..\..\lm\model.cc" />
<ClCompile Include="..\..\lm\model_test.cc" />
<ClCompile Include="..\..\lm\ngram_query.cc" />
<ClCompile Include="..\..\lm\quantize.cc" />
<ClCompile Include="..\..\lm\read_arpa.cc" />
@ -97,7 +95,6 @@
<ClCompile Include="..\..\util\getopt.c" />
<ClCompile Include="..\..\util\mmap.cc" />
<ClCompile Include="..\..\util\murmur_hash.cc" />
<ClCompile Include="..\..\util\usage.cc" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}</ProjectGuid>
@ -133,8 +130,8 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
@ -149,8 +146,9 @@
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>

View File

@ -1,7 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<Workspace
version = "1.0">
<FileRef
location = "self:moses-chart-cmd.xcodeproj">
</FileRef>
</Workspace>

View File

@ -1,72 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<Scheme
version = "1.3">
<BuildAction
parallelizeBuildables = "YES"
buildImplicitDependencies = "YES">
<BuildActionEntries>
<BuildActionEntry
buildForTesting = "YES"
buildForRunning = "YES"
buildForProfiling = "YES"
buildForArchiving = "YES"
buildForAnalyzing = "YES">
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8DD76F620486A84900D96B5E"
BuildableName = "moses-chart-cmd"
BlueprintName = "moses-chart-cmd"
ReferencedContainer = "container:moses-chart-cmd.xcodeproj">
</BuildableReference>
</BuildActionEntry>
</BuildActionEntries>
</BuildAction>
<TestAction
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
shouldUseLaunchSchemeArgsEnv = "YES"
buildConfiguration = "Debug">
<Testables>
</Testables>
</TestAction>
<LaunchAction
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
launchStyle = "0"
useCustomWorkingDirectory = "NO"
buildConfiguration = "Debug">
<BuildableProductRunnable>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8DD76F620486A84900D96B5E"
BuildableName = "moses-chart-cmd"
BlueprintName = "moses-chart-cmd"
ReferencedContainer = "container:moses-chart-cmd.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
<AdditionalOptions>
</AdditionalOptions>
</LaunchAction>
<ProfileAction
shouldUseLaunchSchemeArgsEnv = "YES"
savedToolIdentifier = ""
useCustomWorkingDirectory = "NO"
buildConfiguration = "Release">
<BuildableProductRunnable>
<BuildableReference
BuildableIdentifier = "primary"
BlueprintIdentifier = "8DD76F620486A84900D96B5E"
BuildableName = "moses-chart-cmd"
BlueprintName = "moses-chart-cmd"
ReferencedContainer = "container:moses-chart-cmd.xcodeproj">
</BuildableReference>
</BuildableProductRunnable>
</ProfileAction>
<AnalyzeAction
buildConfiguration = "Debug">
</AnalyzeAction>
<ArchiveAction
buildConfiguration = "Release"
revealArchiveInOrganizer = "YES">
</ArchiveAction>
</Scheme>

View File

@ -1,22 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>moses-chart-cmd.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>4</integer>
</dict>
</dict>
<key>SuppressBuildableAutocreation</key>
<dict>
<key>8DD76F620486A84900D96B5E</key>
<dict>
<key>primary</key>
<true/>
</dict>
</dict>
</dict>
</plist>

View File

@ -47,8 +47,8 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
@ -58,19 +58,20 @@
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
</ClCompile>
<Link>
<AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<RandomizedBaseAddress>false</RandomizedBaseAddress>
<DataExecutionPrevention>
</DataExecutionPrevention>
<TargetMachine>MachineX86</TargetMachine>
<AdditionalLibraryDirectories>C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
@ -78,7 +79,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>
@ -87,6 +88,7 @@
<DataExecutionPrevention>
</DataExecutionPrevention>
<TargetMachine>MachineX86</TargetMachine>
<AdditionalLibraryDirectories>C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>

View File

@ -39,9 +39,7 @@ Global
{E2233DB1-5592-46FE-9420-E529420612FA}.Release|Win32.ActiveCfg = Release|Win32
{E2233DB1-5592-46FE-9420-E529420612FA}.Release|Win32.Build.0 = Release|Win32
{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Debug|Win32.ActiveCfg = Debug|Win32
{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Debug|Win32.Build.0 = Debug|Win32
{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Release|Win32.ActiveCfg = Release|Win32
{88AE90C9-72D2-42ED-8389-770ACDCD4308}.Release|Win32.Build.0 = Release|Win32
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.ActiveCfg = Debug|Win32
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.Build.0 = Debug|Win32
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.ActiveCfg = Release|Win32

View File

@ -22,7 +22,6 @@
<ClInclude Include="..\..\moses\src\ChartHypothesis.h" />
<ClInclude Include="..\..\moses\src\ChartHypothesisCollection.h" />
<ClInclude Include="..\..\moses\src\ChartManager.h" />
<ClInclude Include="..\..\moses\src\ChartRuleLookupManager.h" />
<ClInclude Include="..\..\moses\src\ChartTranslationOption.h" />
<ClInclude Include="..\..\moses\src\ChartTranslationOptionCollection.h" />
<ClInclude Include="..\..\moses\src\ChartTranslationOptionList.h" />
@ -32,6 +31,12 @@
<ClInclude Include="..\..\moses\src\ChartTrellisPath.h" />
<ClInclude Include="..\..\moses\src\ChartTrellisPathList.h" />
<ClInclude Include="..\..\moses\src\ConfusionNet.h" />
<ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerCYKPlus.h" />
<ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerMemory.h" />
<ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerOnDisk.h" />
<ClInclude Include="..\..\moses\src\CYKPlusParser\DotChart.h" />
<ClInclude Include="..\..\moses\src\CYKPlusParser\DotChartInMemory.h" />
<ClInclude Include="..\..\moses\src\CYKPlusParser\DotChartOnDisk.h" />
<ClInclude Include="..\..\moses\src\DecodeFeature.h" />
<ClInclude Include="..\..\moses\src\DecodeGraph.h" />
<ClInclude Include="..\..\moses\src\DecodeStep.h" />
@ -39,6 +44,11 @@
<ClInclude Include="..\..\moses\src\DecodeStepTranslation.h" />
<ClInclude Include="..\..\moses\src\Dictionary.h" />
<ClInclude Include="..\..\moses\src\DummyScoreProducers.h" />
<ClInclude Include="..\..\moses\src\DynSAInclude\file.h" />
<ClInclude Include="..\..\moses\src\DynSAInclude\FileHandler.h" />
<ClInclude Include="..\..\moses\src\DynSAInclude\onlineRLM.h" />
<ClInclude Include="..\..\moses\src\DynSAInclude\quantizer.h" />
<ClInclude Include="..\..\moses\src\DynSAInclude\vocab.h" />
<ClInclude Include="..\..\moses\src\DynSuffixArray.h" />
<ClInclude Include="..\..\moses\src\Factor.h" />
<ClInclude Include="..\..\moses\src\FactorCollection.h" />
@ -68,6 +78,7 @@
<ClInclude Include="..\..\moses\src\LM\Joint.h" />
<ClInclude Include="..\..\moses\src\LM\Ken.h" />
<ClInclude Include="..\..\moses\src\LM\MultiFactor.h" />
<ClInclude Include="..\..\moses\src\LM\ORLM.h" />
<ClInclude Include="..\..\moses\src\LM\SingleFactor.h" />
<ClInclude Include="..\..\moses\src\LVoc.h" />
<ClInclude Include="..\..\moses\src\Manager.h" />
@ -97,13 +108,29 @@
<ClInclude Include="..\..\moses\src\RuleTable\LoaderFactory.h" />
<ClInclude Include="..\..\moses\src\RuleTable\LoaderHiero.h" />
<ClInclude Include="..\..\moses\src\RuleTable\LoaderStandard.h" />
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryALSuffixArray.h" />
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryNodeSCFG.h" />
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryOnDisk.h" />
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionarySCFG.h" />
<ClInclude Include="..\..\moses\src\RuleTable\Trie.h" />
<ClInclude Include="..\..\moses\src\RuleTable\UTrie.h" />
<ClInclude Include="..\..\moses\src\RuleTable\UTrieNode.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\ApplicableRuleTrie.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\IntermediateVarSpanNode.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\Parser.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\SentenceMap.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\StackLattice.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\StackLatticeBuilder.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\StackLatticeSearcher.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\VarSpanNode.h" />
<ClInclude Include="..\..\moses\src\Scope3Parser\VarSpanTrieBuilder.h" />
<ClInclude Include="..\..\moses\src\ScoreComponentCollection.h" />
<ClInclude Include="..\..\moses\src\ScoreIndexManager.h" />
<ClInclude Include="..\..\moses\src\ScoreProducer.h" />
<ClInclude Include="..\..\moses\src\Search.h" />
<ClInclude Include="..\..\moses\src\SearchCubePruning.h" />
<ClInclude Include="..\..\moses\src\SearchNormal.h" />
<ClInclude Include="..\..\moses\src\SearchNormalBatch.h" />
<ClInclude Include="..\..\moses\src\Sentence.h" />
<ClInclude Include="..\..\moses\src\SentenceStats.h" />
<ClInclude Include="..\..\moses\src\SquareMatrix.h" />
@ -149,6 +176,11 @@
<ClCompile Include="..\..\moses\src\ChartTrellisNode.cpp" />
<ClCompile Include="..\..\moses\src\ChartTrellisPath.cpp" />
<ClCompile Include="..\..\moses\src\ConfusionNet.cpp" />
<ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerCYKPlus.cpp" />
<ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerMemory.cpp" />
<ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerOnDisk.cpp" />
<ClCompile Include="..\..\moses\src\CYKPlusParser\DotChartInMemory.cpp" />
<ClCompile Include="..\..\moses\src\CYKPlusParser\DotChartOnDisk.cpp" />
<ClCompile Include="..\..\moses\src\DecodeFeature.cpp" />
<ClCompile Include="..\..\moses\src\DecodeGraph.cpp" />
<ClCompile Include="..\..\moses\src\DecodeStep.cpp" />
@ -156,6 +188,8 @@
<ClCompile Include="..\..\moses\src\DecodeStepTranslation.cpp" />
<ClCompile Include="..\..\moses\src\Dictionary.cpp" />
<ClCompile Include="..\..\moses\src\DummyScoreProducers.cpp" />
<ClCompile Include="..\..\moses\src\DynSAInclude\FileHandler.cpp" />
<ClCompile Include="..\..\moses\src\DynSAInclude\vocab.cpp" />
<ClCompile Include="..\..\moses\src\DynSuffixArray.cpp" />
<ClCompile Include="..\..\moses\src\Factor.cpp" />
<ClCompile Include="..\..\moses\src\FactorCollection.cpp" />
@ -183,6 +217,7 @@
<ClCompile Include="..\..\moses\src\LM\Joint.cpp" />
<ClCompile Include="..\..\moses\src\LM\Ken.cpp" />
<ClCompile Include="..\..\moses\src\LM\MultiFactor.cpp" />
<ClCompile Include="..\..\moses\src\LM\ORLM.cpp" />
<ClCompile Include="..\..\moses\src\LM\SingleFactor.cpp" />
<ClCompile Include="..\..\moses\src\LVoc.cpp" />
<ClCompile Include="..\..\moses\src\Manager.cpp" />
@ -207,13 +242,24 @@
<ClCompile Include="..\..\moses\src\RuleTable\LoaderFactory.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\LoaderHiero.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\LoaderStandard.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryALSuffixArray.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryNodeSCFG.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryOnDisk.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionarySCFG.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\Trie.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\UTrie.cpp" />
<ClCompile Include="..\..\moses\src\RuleTable\UTrieNode.cpp" />
<ClCompile Include="..\..\moses\src\Scope3Parser\ApplicableRuleTrie.cpp" />
<ClCompile Include="..\..\moses\src\Scope3Parser\Parser.cpp" />
<ClCompile Include="..\..\moses\src\Scope3Parser\StackLatticeBuilder.cpp" />
<ClCompile Include="..\..\moses\src\Scope3Parser\VarSpanTrieBuilder.cpp" />
<ClCompile Include="..\..\moses\src\ScoreComponentCollection.cpp" />
<ClCompile Include="..\..\moses\src\ScoreIndexManager.cpp" />
<ClCompile Include="..\..\moses\src\ScoreProducer.cpp" />
<ClCompile Include="..\..\moses\src\Search.cpp" />
<ClCompile Include="..\..\moses\src\SearchCubePruning.cpp" />
<ClCompile Include="..\..\moses\src\SearchNormal.cpp" />
<ClCompile Include="..\..\moses\src\SearchNormalBatch.cpp" />
<ClCompile Include="..\..\moses\src\Sentence.cpp" />
<ClCompile Include="..\..\moses\src\SentenceStats.cpp" />
<ClCompile Include="..\..\moses\src\SquareMatrix.cpp" />
@ -239,6 +285,9 @@
<ClCompile Include="..\..\moses\src\WordsRange.cpp" />
<ClCompile Include="..\..\moses\src\XmlOption.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="..\..\util\file.hh" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{8122157A-0DE5-44FF-8E5B-024ED6ACE7AF}</ProjectGuid>
<RootNamespace>moses</RootNamespace>
@ -270,17 +319,17 @@
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\GnuWin32\include;C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\GnuWin32\include;C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
@ -295,9 +344,9 @@
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<AdditionalIncludeDirectories>$(SolutionDir)\..\..\lm\msinttypes;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>

View File

@ -7,6 +7,8 @@
objects = {
/* Begin PBXBuildFile section */
1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */; };
1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E1D823F15AC29BB00FE42E9 /* FileHandler.h */; };
1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */; };
1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E879EA615A346F90051F346 /* SearchNormalBatch.h */; };
1EAC363514CDC79300DF97C3 /* Loader.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC362C14CDC79300DF97C3 /* Loader.h */; };
@ -72,8 +74,6 @@
1EC7378414B977AB00238410 /* DummyScoreProducers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361114B977AA00238410 /* DummyScoreProducers.cpp */; };
1EC7378514B977AB00238410 /* DummyScoreProducers.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361214B977AA00238410 /* DummyScoreProducers.h */; };
1EC7378614B977AB00238410 /* fdstream.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361414B977AA00238410 /* fdstream.h */; };
1EC7378714B977AB00238410 /* file.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361514B977AA00238410 /* file.cpp */; };
1EC7378814B977AB00238410 /* file.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361614B977AA00238410 /* file.h */; };
1EC7378914B977AB00238410 /* hash.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361714B977AA00238410 /* hash.h */; };
1EC7378A14B977AB00238410 /* onlineRLM.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361914B977AA00238410 /* onlineRLM.h */; };
1EC7378B14B977AB00238410 /* params.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361B14B977AA00238410 /* params.cpp */; };
@ -301,6 +301,8 @@
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FileHandler.cpp; sourceTree = "<group>"; };
1E1D823F15AC29BB00FE42E9 /* FileHandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FileHandler.h; sourceTree = "<group>"; };
1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SearchNormalBatch.cpp; path = ../../moses/src/SearchNormalBatch.cpp; sourceTree = "<group>"; };
1E879EA615A346F90051F346 /* SearchNormalBatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SearchNormalBatch.h; path = ../../moses/src/SearchNormalBatch.h; sourceTree = "<group>"; };
1EAC362C14CDC79300DF97C3 /* Loader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Loader.h; path = ../../moses/src/RuleTable/Loader.h; sourceTree = "<group>"; };
@ -366,8 +368,6 @@
1EC7361114B977AA00238410 /* DummyScoreProducers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DummyScoreProducers.cpp; path = ../../moses/src/DummyScoreProducers.cpp; sourceTree = "<group>"; };
1EC7361214B977AA00238410 /* DummyScoreProducers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DummyScoreProducers.h; path = ../../moses/src/DummyScoreProducers.h; sourceTree = "<group>"; };
1EC7361414B977AA00238410 /* fdstream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fdstream.h; sourceTree = "<group>"; };
1EC7361514B977AA00238410 /* file.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = file.cpp; sourceTree = "<group>"; };
1EC7361614B977AA00238410 /* file.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = file.h; sourceTree = "<group>"; };
1EC7361714B977AA00238410 /* hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hash.h; sourceTree = "<group>"; };
1EC7361914B977AA00238410 /* onlineRLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = onlineRLM.h; sourceTree = "<group>"; };
1EC7361B14B977AA00238410 /* params.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = params.cpp; sourceTree = "<group>"; };
@ -878,9 +878,9 @@
1EC7361314B977AA00238410 /* DynSAInclude */ = {
isa = PBXGroup;
children = (
1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */,
1E1D823F15AC29BB00FE42E9 /* FileHandler.h */,
1EC7361414B977AA00238410 /* fdstream.h */,
1EC7361514B977AA00238410 /* file.cpp */,
1EC7361614B977AA00238410 /* file.h */,
1EC7361714B977AA00238410 /* hash.h */,
1EC7361914B977AA00238410 /* onlineRLM.h */,
1EC7361B14B977AA00238410 /* params.cpp */,
@ -1012,7 +1012,6 @@
1EC7377D14B977AB00238410 /* Dictionary.h in Headers */,
1EC7378514B977AB00238410 /* DummyScoreProducers.h in Headers */,
1EC7378614B977AB00238410 /* fdstream.h in Headers */,
1EC7378814B977AB00238410 /* file.h in Headers */,
1EC7378914B977AB00238410 /* hash.h in Headers */,
1EC7378A14B977AB00238410 /* onlineRLM.h in Headers */,
1EC7378C14B977AB00238410 /* params.h in Headers */,
@ -1143,6 +1142,7 @@
1EDA809314D19FBF003D2191 /* UTrieNode.h in Headers */,
1EF8F2C4159A61970047B613 /* HypoList.h in Headers */,
1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */,
1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@ -1222,7 +1222,6 @@
1EC7377A14B977AB00238410 /* DecodeStepTranslation.cpp in Sources */,
1EC7377C14B977AB00238410 /* Dictionary.cpp in Sources */,
1EC7378414B977AB00238410 /* DummyScoreProducers.cpp in Sources */,
1EC7378714B977AB00238410 /* file.cpp in Sources */,
1EC7378B14B977AB00238410 /* params.cpp in Sources */,
1EC7379314B977AB00238410 /* vocab.cpp in Sources */,
1EC7379514B977AB00238410 /* DynSuffixArray.cpp in Sources */,
@ -1328,6 +1327,7 @@
1EDA809014D19FBF003D2191 /* UTrie.cpp in Sources */,
1EDA809214D19FBF003D2191 /* UTrieNode.cpp in Sources */,
1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */,
1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};

View File

@ -1,22 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>SchemeUserState</key>
<dict>
<key>moses.xcscheme</key>
<dict>
<key>orderHint</key>
<integer>2</integer>
</dict>
</dict>
<key>SuppressBuildableAutocreation</key>
<dict>
<key>D2AAC045055464E500DB518D</key>
<dict>
<key>primary</key>
<true/>
</dict>
</dict>
</dict>
</plist>

View File

@ -99,10 +99,15 @@ PTEntry::PTEntry(const std::string& str, int index) :
pos = nextPos + SEPARATOR.size();
nextPos = str.find(SEPARATOR, pos);
this->scores = str.substr(pos,nextPos-pos);
if (nextPos < str.size()) {
this->scores = str.substr(pos,nextPos-pos);
pos = nextPos + SEPARATOR.size();
this->extra = str.substr(pos);
pos = nextPos + SEPARATOR.size();
this->extra = str.substr(pos);
}
else {
this->scores = str.substr(pos,str.size()-pos);
}
int c = 0;
std::string::iterator i=scores.begin();

View File

@ -217,11 +217,11 @@ class Moses():
a, b, prob = line.split(b' ')
if side == 'e2f' and not e2f_filter or a in e2f_filter and b in e2f_filter[a]:
if side == 'e2f' and (not e2f_filter or a in e2f_filter and b in e2f_filter[a]):
self.word_pairs_e2f[a][b][i] = float(prob)
elif side == 'f2e' and not f2e_filter or a in f2e_filter and b in f2e_filter[a]:
elif side == 'f2e' and (not f2e_filter or a in f2e_filter and b in f2e_filter[a]):
self.word_pairs_f2e[a][b][i] = float(prob)

View File

@ -96,10 +96,10 @@ rule boost-lib ( name macro : deps * ) {
#versions of boost do not have -mt tagged versions of all libraries. Sadly,
#boost.jam does not handle this correctly.
if [ test_flags $(L-boost-search)" -lboost_"$(name)"-mt" ] {
lib inner_boost_$(name) : $(deps) : <threading>single $(boost-search) <name>boost_$(name) ;
lib inner_boost_$(name) : $(deps) : <threading>multi $(boost-search) <name>boost_$(name)-mt ;
lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name) : : <library>$(deps) ;
lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt : : <library>$(deps) ;
} else {
lib inner_boost_$(name) : $(deps) : $(boost-search) <name>boost_$(name) ;
lib inner_boost_$(name) : : $(boost-search) <name>boost_$(name) : : <library>$(deps) ;
}
alias boost_$(name) : inner_boost_$(name) : $(boost-auto-shared) : : <link>shared:<define>BOOST_$(macro) $(boost-include) ;

View File

@ -123,7 +123,7 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
return ret;
}
// i is the order of the backoff we're looking for.
unsigned char order_minus_2 = 0;
unsigned char order_minus_2 = start - 2;
for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i, ++order_minus_2) {
typename Search::MiddlePointer p(search_.LookupMiddle(order_minus_2, *i, node, independent_left, extend_left));
if (!p.Found()) break;

View File

@ -6,7 +6,7 @@
#include "lm/weights.hh"
#include "util/bit_packing.hh"
#include <inttypes.h>
#include <stdint.h>
namespace lm {
namespace ngram {

View File

@ -37,6 +37,7 @@ void usage()
cerr << "[--prev-scfile|-R] comma separated list of previous scorer data" << endl;
cerr << "[--factors|-f] list of factors passed to the scorer (e.g. 0|2)" << endl;
cerr << "[--filter|-l] filter command used to preprocess the sentences" << endl;
cerr << "[--allow-duplicates|-d] omit the duplicate removal step" << endl;
cerr << "[-v] verbose level" << endl;
cerr << "[--help|-h] print this message and exit" << endl;
exit(1);
@ -56,6 +57,7 @@ static struct option long_options[] = {
{"prev-ffile", required_argument, 0, 'E'},
{"verbose", required_argument, 0, 'v'},
{"help", no_argument, 0, 'h'},
{"allow-duplicates", no_argument, 0, 'd'},
{0, 0, 0, 0}
};
@ -72,6 +74,7 @@ struct ProgramOption {
string prevScoreDataFile;
string prevFeatureDataFile;
bool binmode;
bool allowDuplicates;
int verbosity;
ProgramOption()
@ -86,6 +89,7 @@ struct ProgramOption {
prevScoreDataFile(""),
prevFeatureDataFile(""),
binmode(false),
allowDuplicates(false),
verbosity(0) { }
};
@ -93,7 +97,7 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
int c;
int option_index;
while ((c = getopt_long(argc, argv, "s:r:f:l:n:S:F:R:E:v:hb", long_options, &option_index)) != -1) {
while ((c = getopt_long(argc, argv, "s:r:f:l:n:S:F:R:E:v:hbd", long_options, &option_index)) != -1) {
switch (c) {
case 's':
opt->scorerType = string(optarg);
@ -131,6 +135,9 @@ void ParseCommandOptions(int argc, char** argv, ProgramOption* opt) {
case 'v':
opt->verbosity = atoi(optarg);
break;
case 'd':
opt->allowDuplicates = true;
break;
default:
usage();
}
@ -224,7 +231,9 @@ int main(int argc, char** argv)
PrintUserTime("Nbest entries loaded and scored");
//ADDED_BY_TS
data.removeDuplicates();
if (!option.allowDuplicates) {
data.removeDuplicates();
}
//END_ADDED
data.save(option.featureDataFile, option.scoreDataFile, option.binmode);

View File

@ -1,7 +1,9 @@
// $Id$
// vim:tabstop=2
/***********************************************************************
K-best Batch MIRA for Moses
Copyright (C) 2012, National Research Council Canada / Conseil national
de recherches du Canada
***********************************************************************/
/**

View File

@ -262,13 +262,13 @@ BitmapContainer::BitmapContainer(const WordsBitmap &bitmap
BitmapContainer::~BitmapContainer()
{
// As we have created the square position objects we clean up now.
HypothesisQueueItem *item = NULL;
while (!m_queue.empty()) {
item = m_queue.top();
FREEHYPO(item->GetHypothesis());
delete item;
m_queue.pop();
HypothesisQueueItem *item = m_queue.top();
m_queue.pop();
FREEHYPO( item->GetHypothesis() );
delete item;
}
// Delete all edges.

View File

@ -1,4 +1,10 @@
#include "file.h"
#include "FileHandler.h"
#include <stdio.h>
#ifdef WIN32
#define popen(A, B) _popen(A, B)
#define pclose(A) _pclose(A)
#endif
namespace Moses
{
@ -17,7 +23,7 @@ const std::string FileHandler::kBzip2Command = "bzip2 -f";
const std::string FileHandler::kBunzip2Command = "bunzip2 -f";
FileHandler::FileHandler(const std::string & path, std::ios_base::openmode flags, bool /* checkExists */)
: std::fstream(NULL), path_(path), flags_(flags), buffer_(NULL), fp_(NULL)
: std::fstream((const char*) NULL), path_(path), flags_(flags), buffer_(NULL), fp_(NULL)
{
if( !(flags^(std::ios::in|std::ios::out)) ) {
fprintf(stderr, "ERROR: FileHandler does not support bidirectional files (%s).\n", path_.c_str());
@ -31,8 +37,10 @@ FileHandler::FileHandler(const std::string & path, std::ios_base::openmode flags
FileHandler::~FileHandler()
{
#ifndef NO_PIPES
if( fp_ != 0 )
pclose(fp_);
#endif
if( path_ != FileHandler::kStdInDescriptor &&
path_ != FileHandler::kStdOutDescriptor )
delete buffer_;
@ -45,7 +53,11 @@ fdstreambuf * FileHandler::openCompressedFile(const char * cmd)
//bool isInput = (flags_ & std::ios::in);
//open pipe to file with compression/decompression command
const char * p_type = (flags_ & std::ios::in ? "r" : "w");
#ifndef NO_PIPES
fp_ = popen(cmd, p_type);
#else
fp_ = NULL;
#endif
if( fp_ == NULL ) {
//fprintf(stderr, "ERROR:Failed to open compressed file at %s\n", path_.c_str());
perror("openCompressedFile: ");
@ -152,6 +164,7 @@ bool FileHandler::getCompressionCmds(const std::string & filepath, std::string &
bool FileHandler::reset()
{
#ifndef NO_PIPES
// move to beginning of file
if (fp_ != 0) {
//can't seek on a pipe so reopen
@ -162,6 +175,7 @@ bool FileHandler::reset()
//reinitialize
this->init(buffer_);
} else
#endif
buffer_->pubseekoff(0, std::ios_base::beg); //sets both get and put pointers to beginning of stream
return true;
}

View File

@ -18,7 +18,11 @@
#define INC_RANDLM_FILTER_H
#include <cmath>
#include "file.h"
#include "FileHandler.h"
#ifdef WIN32
#define log2(X) (log((double)X)/log((double)2))
#endif
namespace randlm {
@ -40,7 +44,7 @@ namespace randlm {
// current implementation has following constraints
CHECK(cell_width_ > 0 && cell_width_ <= 64 && cell_width_ >= width);
// used for >> division
log_cell_width_ = static_cast<int>(floor(log(cell_width_)/log(2) + 0.000001));
log_cell_width_ = static_cast<int>(floor(log((double)cell_width_)/log((double)2) + 0.000001));
// size of underlying data in Ts
cells_ = ((addresses * width) + cell_width_ - 1) >> log_cell_width_;
// instantiate underlying data

View File

@ -5,7 +5,7 @@
#include <cmath>
#include "types.h"
#include "utils.h"
#include "file.h"
#include "FileHandler.h"
using namespace Moses;
typedef uint64_t P; // largest input range is 2^64

View File

@ -2,6 +2,7 @@
#define INC_DYNAMICLM_H
#include <algorithm>
#include <vector>
#include "perfectHash.h"
#include "RandLMCache.h"
#include "types.h"
@ -109,7 +110,7 @@ bool OnlineRLM<T>::insert(const std::vector<string>& ngram, const int value) {
template<typename T>
bool OnlineRLM<T>::update(const std::vector<string>& ngram, const int value) {
int len = ngram.size();
wordID_t wrdIDs[len];
std::vector<wordID_t> wrdIDs(len);
uint64_t index(this->cells_ + 1);
hpdEntry_t hpdItr;
vocab_->MakeOpen();
@ -118,14 +119,15 @@ bool OnlineRLM<T>::update(const std::vector<string>& ngram, const int value) {
// if updating, minimize false positives by pre-checking if context already in model
bool bIncluded(true);
if(value > 1 && len < (int)order_)
bIncluded = markPrefix(wrdIDs, ngram.size(), true); // mark context
bIncluded = markPrefix(&wrdIDs[0], ngram.size(), true); // mark context
if(bIncluded) { // if context found
bIncluded = PerfectHash<T>::update2(wrdIDs, len, value, hpdItr, index);
bIncluded = PerfectHash<T>::update2(&wrdIDs[0], len, value, hpdItr, index);
if(index < this->cells_) {
markQueried(index);
}
else if(hpdItr != this->dict_.end()) markQueried(hpdItr);
}
return bIncluded;
}
template<typename T>
@ -262,6 +264,7 @@ int OnlineRLM<T>::sbsqQuery(const wordID_t* IDs, const int len, int* codes,
if(val != -1) break; // if anything found
else --fnd; // else decrement found
}
return fnd;
}
@ -275,7 +278,7 @@ float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
if(!cache_->checkCacheNgram(ngram, len, &logprob, &context)) {
// get full prob and put in cache
int num_fnd(0), den_val(0);
int in[len]; // in[] keeps counts of increasing order numerator
int *in = new int[len]; // in[] keeps counts of increasing order numerator
for(int i = 0; i < len; ++i) in[i] = 0;
for(int i = len - 1; i >= 0; --i) {
if(ngram[i] == vocab_->GetkOOVWordID()) break; // no need to query if OOV
@ -324,10 +327,13 @@ float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
template<typename T>
const void* OnlineRLM<T>::getContext(const wordID_t* ngram, int len) {
int dummy(0);
float* addresses[len]; // only interested in addresses of cache
float* *addresses = new float*[len]; // only interested in addresses of cache
CHECK(cache_->getCache2(ngram, len, &addresses[0], &dummy) == len);
// return address of cache node
return (const void*)addresses[0];
float *addr0 = addresses[0];
free( addresses );
return (const void*)addr0;
}
template<typename T>

View File

@ -6,7 +6,7 @@
#include <set>
#include <vector>
#include "util/check.hh"
#include "file.h"
#include "FileHandler.h"
#include "utils.h"
#include "types.h"

View File

@ -9,6 +9,10 @@
static const float kFloatErr = 0.00001f;
#ifdef WIN32
#define log2(X) (log((double)X)/log((double)2))
#endif
//! @todo ask abby2
class LogQtizer {
public:

View File

@ -7,10 +7,16 @@
#include <vector>
#include <typeinfo>
#include <stdint.h>
#ifdef WIN32
#define iterate(c, i) for(decltype(c.begin()) i = c.begin(); i != c.end(); ++i)
#define piterate(c, i) for(decltype(c->begin()) i = c->begin(); i != c->end(); ++i)
#define riterate(c, i) for(decltype(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)
#else
#define iterate(c, i) for(__typeof__(c.begin()) i = c.begin(); i != c.end(); ++i)
#define piterate(c, i) for(__typeof__(c->begin()) i = c->begin(); i != c->end(); ++i)
#define riterate(c, i) for(__typeof__(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)
#endif
#define THREADED false
#define THREAD_MAX 2

View File

@ -4,7 +4,7 @@
#include <map>
#include <string>
#include "types.h"
#include "file.h"
#include "FileHandler.h"
#include "utils.h"
#include "../TypeDef.h"
#include "../Word.h"

View File

@ -20,6 +20,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <boost/version.hpp>
#include <boost/thread/locks.hpp>
#include <ostream>
#include <string>
#include "FactorCollection.h"

View File

@ -7,7 +7,7 @@
#include "LM/SingleFactor.h"
#include "DynSAInclude/onlineRLM.h"
//#include "multiOnlineRLM.h"
#include "DynSAInclude/file.h"
#include "DynSAInclude/FileHandler.h"
#include "DynSAInclude/vocab.h"
namespace Moses

View File

@ -355,7 +355,6 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
size_t numTokens = 0;
size_t numKeyTokens = 0;
while(getline(inFile, line)) {
//TRACE_ERR(lnc<<":"<<line<<"\n");
++lnc;
if(0 == lnc % 10000) {
TRACE_ERR(".");
@ -465,6 +464,10 @@ bool LexicalReorderingTableTree::Create(std::istream& inFile,
}
cands.push_back(GenericCandidate(tgt_phrases, scores));
}
if (lnc == 0) {
TRACE_ERR("ERROR: empty lexicalised reordering file\n" << std::endl);
return false;
}
//flush remainders
cands.writeBin(ot);
cands.clear();

View File

@ -62,9 +62,9 @@ public:
std::map<int,std::string>::iterator iter;
while ((iter = m_outputs.find(m_nextOutput)) != m_outputs.end()) {
*m_outStream << iter->second << std::flush;
m_outputs.erase(iter);
++m_nextOutput;
std::map<int,std::string>::iterator debugIter = m_debugs.find(iter->first);
m_outputs.erase(iter);
if (debugIter != m_debugs.end()) {
*m_debugStream << debugIter->second << std::flush;
m_debugs.erase(debugIter);

View File

@ -249,15 +249,6 @@ public:
return s_instance;
}
/** delete current static instance and replace with another.
* Used by gui front end
*/
#ifdef WIN32
static void Reset() {
s_instance = StaticData();
}
#endif
//! Load data into static instance. This function is required as LoadData() is not const
static bool LoadDataStatic(Parameter *parameter) {
return s_instance.LoadData(parameter);

View File

@ -109,6 +109,7 @@ public:
#ifdef BOOST_HAS_PTHREADS
pthread_t tid = pthread_self();
#else
typedef void * pthread_t;
pthread_t tid = 0;
#endif
std::cerr << "Executing " << m_id << " in thread id " << tid << std::endl;

View File

@ -331,12 +331,21 @@ PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair )
PhraseAlignment* bestAlignment;
for(size_t i=0; i<phrasePair.size(); i++) {
if (phrasePair[i]->count > bestAlignmentCount) {
bestAlignmentCount = phrasePair[i]->count;
bestAlignment = phrasePair[i];
size_t alignInd;
if (inverseFlag)
{ // count backwards, so that alignments for ties will be the same for both normal & inverse scores
alignInd = phrasePair.size() - i - 1;
}
}
else {
alignInd = i;
}
if (phrasePair[alignInd]->count > bestAlignmentCount) {
bestAlignmentCount = phrasePair[alignInd]->count;
bestAlignment = phrasePair[alignInd];
}
}
return bestAlignment;
}

View File

@ -35,7 +35,7 @@ for (my $i = 8; $i < $#ARGV + 1; ++$i)
my $TMPDIR=dirname($extract) ."/tmp.$$";
mkdir $TMPDIR;
my $totalLines = int(`cat $align | wc -l`);
my $totalLines = int(`cat '$align' | wc -l`);
my $linesPerSplit = int($totalLines / $numParallel) + 1;
print "total=$totalLines line-per-split=$linesPerSplit \n";
@ -46,15 +46,15 @@ my $cmd;
if ($numParallel > 1)
{
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $target $TMPDIR/target.";
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 '$target' '$TMPDIR/target.'";
$pid = RunFork($cmd);
push(@children, $pid);
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $source $TMPDIR/source.";
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 '$source' '$TMPDIR/source.'";
$pid = RunFork($cmd);
push(@children, $pid);
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 $align $TMPDIR/align.";
$cmd = "$splitCmd -d -l $linesPerSplit -a 5 '$align' '$TMPDIR/align.'";
$pid = RunFork($cmd);
push(@children, $pid);
@ -68,15 +68,15 @@ else
{
my $numStr = NumStr(0);
$cmd = "ln -s $target $TMPDIR/target.$numStr";
$cmd = "ln -s '$target' '$TMPDIR/target.$numStr'";
print STDERR "Executing: $cmd \n";
`$cmd`;
$cmd = "ln -s $source $TMPDIR/source.$numStr";
$cmd = "ln -s '$source' '$TMPDIR/source.$numStr'";
print STDERR "Executing: $cmd \n";
`$cmd`;
$cmd = "ln -s $align $TMPDIR/align.$numStr";
$cmd = "ln -s '$align' '$TMPDIR/align.$numStr'";
print STDERR "Executing: $cmd \n";
`$cmd`;
}
@ -90,7 +90,7 @@ for (my $i = 0; $i < $numParallel; ++$i)
if ($pid == 0)
{ # child
my $numStr = NumStr($i);
my $cmd = "$extractCmd $TMPDIR/target.$numStr $TMPDIR/source.$numStr $TMPDIR/align.$numStr $TMPDIR/extract.$numStr $otherExtractArgs \n";
my $cmd = "'$extractCmd' '$TMPDIR/target.$numStr' '$TMPDIR/source.$numStr' '$TMPDIR/align.$numStr' '$TMPDIR/extract.$numStr' $otherExtractArgs \n";
print STDERR $cmd;
`$cmd`;
@ -108,20 +108,21 @@ foreach (@children) {
}
# merge
my $catCmd = "zcat ";
my $catInvCmd = "zcat ";
my $catOCmd = "zcat ";
my $is_osx = ($^O eq "darwin");
my $catCmd = $is_osx?"gunzip -c ":"zcat ";
my $catInvCmd = $catCmd;
my $catOCmd = $catCmd;
for (my $i = 0; $i < $numParallel; ++$i)
{
my $numStr = NumStr($i);
$catCmd .= "$TMPDIR/extract.$numStr.gz ";
$catInvCmd .= "$TMPDIR/extract.$numStr.inv.gz ";
$catOCmd .= "$TMPDIR/extract.$numStr.o.gz ";
$catCmd .= "'$TMPDIR/extract.$numStr.gz' ";
$catInvCmd .= "'$TMPDIR/extract.$numStr.inv.gz' ";
$catOCmd .= "'$TMPDIR/extract.$numStr.o.gz' ";
}
$catCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.sorted.gz \n";
$catInvCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.inv.sorted.gz \n";
$catOCmd .= " | LC_ALL=C $sortCmd -T $TMPDIR | gzip -c > $extract.o.sorted.gz \n";
$catCmd .= " | LC_ALL=C $sortCmd -T '$TMPDIR' | gzip -c > '$extract.sorted.gz' \n";
$catInvCmd .= " | LC_ALL=C $sortCmd -T '$TMPDIR' | gzip -c > '$extract.inv.sorted.gz' \n";
$catOCmd .= " | LC_ALL=C $sortCmd -T '$TMPDIR' | gzip -c > '$extract.o.sorted.gz' \n";
@children = ();
@ -135,7 +136,7 @@ if ($makeTTable)
}
my $numStr = NumStr(0);
if (-e "$TMPDIR/extract.$numStr.o.gz")
if (-e "'$TMPDIR/extract.$numStr.o.gz'")
{
$pid = RunFork($catOCmd);
push(@children, $pid);
@ -147,7 +148,7 @@ foreach (@children) {
}
# delete temporary files
$cmd = "rm -rf $TMPDIR \n";
$cmd = "rm -rf '$TMPDIR' \n";
print STDERR $cmd;
`$cmd`;

View File

@ -44,7 +44,7 @@ my $cmd;
my $fileCount = 0;
if ($numParallel <= 1)
{ # don't do parallel. Just link the extract file into place
$cmd = "ln -s $extractFile $TMPDIR/extract.0.gz";
$cmd = "ln -s '$extractFile' '$TMPDIR/extract.0.gz'";
print STDERR "$cmd \n";
systemCheck($cmd);
@ -121,7 +121,7 @@ for (my $i = 0; $i < $fileCount; ++$i)
my $fileInd = $i % $numParallel;
my $fh = $runFiles[$fileInd];
my $cmd = "$scoreCmd $TMPDIR/extract.$i.gz $lexFile $TMPDIR/phrase-table.half.$numStr.gz $otherExtractArgs\n";
my $cmd = "'$scoreCmd' '$TMPDIR'/extract.$i.gz '$lexFile' '$TMPDIR'/phrase-table.half.$numStr.gz $otherExtractArgs\n";
print $fh $cmd;
}
@ -129,7 +129,7 @@ for (my $i = 0; $i < $fileCount; ++$i)
for (my $i = 0; $i < $numParallel; ++$i)
{
close($runFiles[$i]);
my $path = "$TMPDIR/run.$i.sh";
my $path = "'$TMPDIR'/run.$i.sh";
systemCheck("chmod +x $path");
}
@ -137,7 +137,7 @@ for (my $i = 0; $i < $numParallel; ++$i)
my @children;
for (my $i = 0; $i < $numParallel; ++$i)
{
my $cmd = "$TMPDIR/run.$i.sh";
my $cmd = "'$TMPDIR'/run.$i.sh";
my $pid = RunFork($cmd);
push(@children, $pid);
}
@ -152,17 +152,19 @@ $cmd = "\n\nOH SHIT. This should have been filled in \n\n";
if ($fileCount == 1 && !$doSort)
{
my $numStr = NumStr(0);
$cmd = "mv $TMPDIR/phrase-table.half.$numStr.gz $ptHalf";
$cmd = "mv '$TMPDIR/phrase-table.half.$numStr.gz' '$ptHalf'";
}
else
{
$cmd = "zcat $TMPDIR/phrase-table.half.*.gz";
my $_is_osx = ($^O eq "darwin");
my $_catCmd = $_is_osx?"gunzip -c ":"zcat ";
$cmd = $_catCmd."'$TMPDIR'/phrase-table.half.*.gz";
if ($doSort) {
$cmd .= "| LC_ALL=C $sortCmd -T $TMPDIR ";
$cmd .= "| LC_ALL=C $sortCmd -T '$TMPDIR' ";
}
$cmd .= " | gzip -c > $ptHalf";
$cmd .= " | gzip -c > '$ptHalf'";
}
print STDERR $cmd;
systemCheck($cmd);
@ -213,7 +215,7 @@ if (-e $cocPath)
close(FHCOC);
}
$cmd = "rm -rf $TMPDIR \n";
$cmd = "rm -rf '$TMPDIR' \n";
print STDERR $cmd;
systemCheck($cmd);

View File

@ -45,7 +45,7 @@ binmode(STDOUT, ":utf8");
my $sentence = 0;
my $infile = $INFILE;
$infile =~ s/[\.\/]/_/g;
open(MODEL,"$MOSES -v 0 -f $RECASE_MODEL -i $INFILE -dl 1|");
open(MODEL,"$MOSES -v 0 -f $RECASE_MODEL -i $INFILE -dl 0|");
binmode(MODEL, ":utf8");
while(<MODEL>) {
chomp;

View File

@ -2,6 +2,7 @@
# $Id$
use strict;
use FindBin qw($Bin);
use Getopt::Long "GetOptions";
binmode(STDIN, ":utf8");
@ -59,7 +60,7 @@ if ($HELP || $ERROR) {
--ngram-count=file ... path to ngram-count.sh if not in \$PATH (used only with --lm=SRILM).
= Steps this script will perform =
(1) Truecasing (disabled);
(1) Truecasing;
(2) Language Model Training;
(3) Data Preparation
(4-10) Recaser Model Training;
@ -78,7 +79,8 @@ if ($HELP || $ERROR) {
# main loop
`mkdir -p $DIR`;
&truecase() if 0 && $FIRST_STEP == 1;
&truecase() if $FIRST_STEP == 1;
$CORPUS = "$DIR/aligned.truecased" if (-e "$DIR/aligned.truecased");
&train_lm() if $FIRST_STEP <= 2;
&prepare_data() if $FIRST_STEP <= 3 && $LAST_STEP >= 3;
&train_recase_model() if $FIRST_STEP <= 10 && $LAST_STEP >= 3;
@ -87,7 +89,17 @@ if ($HELP || $ERROR) {
### subs ###
sub truecase {
# to do
print STDERR "(1) Truecase data @ ".`date`;
print STDERR "(1) To build model without truecasing, use --first-step 2, and make sure $DIR/aligned.truecased does not exist\n";
my $cmd = "$Bin/train-truecaser.perl --model $DIR/truecaser_model --corpus $CORPUS";
print STDERR $cmd."\n";
system($cmd) == 0 || die("Training truecaser died with error " . ($? >> 8) . "\n");
$cmd = "$Bin/truecase.perl --model $DIR/truecaser_model < $CORPUS > $DIR/aligned.truecased";
print STDERR $cmd."\n";
system($cmd) == 0 || die("Applying truecaser died with error " . ($? >> 8) . "\n");
}
sub train_lm {
@ -162,7 +174,9 @@ sub cleanup {
my $clean_2 = $?;
`rm -f $DIR/lex*`;
my $clean_3 = $?;
if ($clean_1 + $clean_2 + $clean_3 != 0) {
`rm -f $DIR/truecaser_model`;
my $clean_4 = $?;
if ($clean_1 + $clean_2 + $clean_3 + $clean_4 != 0) {
print STDERR "Training successful but some files could not be cleaned.\n";
}
}

View File

@ -0,0 +1,390 @@
Bc
BcA
Ing
Ing.arch
MUDr
MVDr
MgA
Mgr
JUDr
PhDr
RNDr
PharmDr
ThLic
ThDr
Ph.D
Th.D
prof
doc
CSc
DrSc
dr. h. c
PaedDr
Dr
PhMr
DiS
abt
ad
a.i
aj
angl
anon
apod
atd
atp
aut
bd
biogr
b.m
b.p
b.r
cca
cit
cizojaz
c.k
col
čes
čín
čj
ed
facs
fasc
fol
fot
franc
h.c
hist
hl
hrsg
ibid
il
ind
inv.č
jap
jhdt
jv
koed
kol
korej
kl
krit
lat
lit
m.a
maď
mj
mp
násl
např
nepubl
něm
no
nr
n.s
okr
odd
odp
obr
opr
orig
phil
pl
pokrač
pol
port
pozn
.kr
.n.l
přel
přeprac
příl
pseud
pt
red
repr
resp
revid
rkp
roč
roz
rozš
samost
sect
sest
seš
sign
sl
srv
stol
sv
šk
šk.ro
špan
tab
t.č
tis
tj
tzv
univ
uspoř
vol
vl.jm
vs
vyd
vyobr
zal
zejm
zkr
zprac
zvl
n.p
např
než
MUDr
abl
absol
adj
adv
ak
ak. sl
akt
alch
amer
anat
angl
anglosas
arab
arch
archit
arg
astr
astrol
att
bás
belg
bibl
biol
boh
bot
bulh
círk
csl
č
čas
čes
dat
děj
dep
dět
dial
dór
dopr
dosl
ekon
epic
etnonym
eufem
f
fam
fem
fil
film
form
fot
fr
fut
fyz
gen
geogr
geol
geom
germ
gram
hebr
herald
hist
hl
hovor
hud
hut
chcsl
chem
ie
imp
impf
ind
indoevr
inf
instr
interj
ión
iron
it
kanad
katalán
klas
kniž
komp
konj
konkr
kuch
lat
lék
les
lid
lit
liturg
lok
log
m
mat
meteor
metr
mod
ms
mysl
n
náb
námoř
neklas
něm
nesklon
nom
ob
obch
obyč
ojed
opt
part
pas
pejor
pers
pf
pl
plpf
práv
prep
předl
přivl
r
rcsl
refl
reg
rkp
ř
řec
s
samohl
sg
sl
souhl
spec
srov
stfr
střv
stsl
subj
subst
superl
sv
sz
táz
tech
telev
teol
trans
typogr
var
vedl
verb
vl. jm
voj
vok
vůb
vulg
výtv
vztaž
zahr
zájm
zast
zejm
zeměd
zkr
mj
dl
atp
sport
Mgr
horn
MVDr
JUDr
RSDr
Bc
PhDr
ThDr
Ing
aj
apod
PharmDr
pomn
ev
slang
nprap
odp
dop
pol
st
stol
p. n. l
před n. l
n. l
. Kr
po Kr
. n. l
odd
RNDr
tzv
atd
tzn
resp
tj
p
br
č. j
čj
č. p
čp
a. s
s. r. o
spol. s r. o
p. o
s. p
v. o. s
k. s
o. p. s
o. s
v. r
v z
ml
kr
mld
hod
popř
ap
event
rus
slov
rum
švýc
P. T
zvl
hor
dol
S.O.S

View File

@ -18,6 +18,7 @@ if ($SCRIPTS_ROOTDIR eq '') {
$SCRIPTS_ROOTDIR = dirname(__FILE__);
}
$SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = qq{'$SCRIPTS_ROOTDIR'};
#$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_SORT_BUFFER_SIZE, $_SORT_BATCH_SIZE, $_SORT_COMPRESS, $_SORT_PARALLEL, $_CORPUS,
@ -586,7 +587,7 @@ die("ERROR: format for decoding steps is \"t0,g0,t1,g1:t2\", you provided $___DE
sub prepare {
print STDERR "(1) preparing corpus @ ".`date`;
safesystem("mkdir -p $___CORPUS_DIR") or die("ERROR: could not create corpus dir $___CORPUS_DIR");
safesystem("mkdir -p '$___CORPUS_DIR'") or die("ERROR: could not create corpus dir $___CORPUS_DIR");
print STDERR "(1.0) selecting factors @ ".`date`;
my ($factor_f,$factor_e) = split(/\-/,$___ALIGNMENT_FACTORS);
@ -725,7 +726,7 @@ sub reduce_factors {
$realfull .= ".gz";
$reduced =~ s/(\.gz)?$/.gz/;
}
safesystem("ln -s $realfull $reduced")
safesystem("ln -s '$realfull' '$reduced'")
or die "Failed to create symlink $realfull -> $reduced";
return;
}
@ -768,12 +769,12 @@ sub reduce_factors {
print STDERR "\n";
close(OUT);
close(IN);
`rm -f $reduced.lock`;
`rm -f '$reduced.lock'`;
}
sub make_classes {
my ($corpus,$classes) = @_;
my $cmd = "$MKCLS -c50 -n2 -p$corpus -V$classes opt";
my $cmd = "$MKCLS -c50 -n2 -p'$corpus' -V'$classes' opt";
print STDERR "(1.1) running mkcls @ ".`date`."$cmd\n";
if (-e $classes) {
print STDERR " $classes already in place, reusing\n";
@ -802,7 +803,7 @@ sub get_vocabulary {
}
my %VCB;
open(VCB,">$vcb") or die "ERROR: Can't write $vcb";
open(VCB,">", "$vcb") or die "ERROR: Can't write $vcb";
print VCB "1\tUNK\t0\n";
my $id=2;
foreach (reverse sort @NUM) {
@ -972,7 +973,7 @@ sub run_single_giza_on_parts {
if ($i%3==1 && $part < ($___PARTS*$i)/$size && $part<$___PARTS) {
close(PART) if $part;
$part++;
safesystem("mkdir -p $___CORPUS_DIR/part$part") or die("ERROR: could not create $___CORPUS_DIR/part$part");
safesystem("mkdir -p '$___CORPUS_DIR/part$part'") or die("ERROR: could not create $___CORPUS_DIR/part$part");
open(PART,">$___CORPUS_DIR/part$part/$f-$e-int-train.snt")
or die "ERROR: Can't write $___CORPUS_DIR/part$part/$f-$e-int-train.snt";
}
@ -1090,6 +1091,9 @@ sub run_single_giza {
my $GizaOptions;
foreach my $option (sort keys %GizaDefaultOptions){
my $value = $GizaDefaultOptions{$option} ;
if ($value =~ /\s+/) {
$value = qq('$value') #makes '/file name/' from /file name/
}
$GizaOptions .= " -$option $value" ;
}
@ -1115,17 +1119,17 @@ sub run_single_giza {
die "ERROR: Giza did not produce the output file $dir/$f-$e.$___GIZA_EXTENSION. Is your corpus clean (reasonably-sized sentences)?"
if ! -e "$dir/$f-$e.$___GIZA_EXTENSION";
safesystem("rm -f $dir/$f-$e.$___GIZA_EXTENSION.gz") or die;
safesystem("gzip $dir/$f-$e.$___GIZA_EXTENSION") or die;
safesystem("rm -f '$dir/$f-$e.$___GIZA_EXTENSION.gz'") or die;
safesystem("gzip '$dir/$f-$e.$___GIZA_EXTENSION'") or die;
}
sub run_single_snt2cooc {
my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_;
print STDERR "(2.1a) running snt2cooc $f-$e @ ".`date`."\n";
safesystem("mkdir -p $dir") or die("ERROR");
safesystem("mkdir -p '$dir'") or die("ERROR");
if ($SNT2COOC eq "$_EXTERNAL_BINDIR/snt2cooc.out") {
print "$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc\n";
safesystem("$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc") or die("ERROR");
safesystem("$SNT2COOC '$vcb_e' '$vcb_f' '$train' > '$dir/$f-$e.cooc'") or die("ERROR");
} else {
print "$SNT2COOC $dir/$f-$e.cooc $vcb_e $vcb_f $train\n";
safesystem("$SNT2COOC $dir/$f-$e.cooc $vcb_e $vcb_f $train") or die("ERROR");
@ -1146,22 +1150,22 @@ sub word_align {
my($__ALIGNMENT_CMD,$__ALIGNMENT_INV_CMD);
if (-e "$___GIZA_F2E/$___F-$___E.$___GIZA_EXTENSION.bz2"){
$__ALIGNMENT_CMD="\"$BZCAT $___GIZA_F2E/$___F-$___E.$___GIZA_EXTENSION.bz2\"";
$__ALIGNMENT_CMD="\"$BZCAT '$___GIZA_F2E/$___F-$___E.$___GIZA_EXTENSION.bz2'\"";
} elsif (-e "$___GIZA_F2E/$___F-$___E.$___GIZA_EXTENSION.gz") {
$__ALIGNMENT_CMD="\"$ZCAT $___GIZA_F2E/$___F-$___E.$___GIZA_EXTENSION.gz\"";
$__ALIGNMENT_CMD="\"$ZCAT '$___GIZA_F2E/$___F-$___E.$___GIZA_EXTENSION.gz'\"";
} else {
die "ERROR: Can't read $___GIZA_F2E/$___F-$___E.$___GIZA_EXTENSION.{bz2,gz}\n";
}
if ( -e "$___GIZA_E2F/$___E-$___F.$___GIZA_EXTENSION.bz2"){
$__ALIGNMENT_INV_CMD="\"$BZCAT $___GIZA_E2F/$___E-$___F.$___GIZA_EXTENSION.bz2\"";
$__ALIGNMENT_INV_CMD="\"$BZCAT '$___GIZA_E2F/$___E-$___F.$___GIZA_EXTENSION.bz2'\"";
}elsif (-e "$___GIZA_E2F/$___E-$___F.$___GIZA_EXTENSION.gz"){
$__ALIGNMENT_INV_CMD="\"$ZCAT $___GIZA_E2F/$___E-$___F.$___GIZA_EXTENSION.gz\"";
$__ALIGNMENT_INV_CMD="\"$ZCAT '$___GIZA_E2F/$___E-$___F.$___GIZA_EXTENSION.gz'\"";
}else{
die "ERROR: Can't read $___GIZA_E2F/$___E-$___F.$___GIZA_EXTENSION.{bz2,gz}\n\n";
}
safesystem("mkdir -p $___MODEL_DIR") or die("ERROR: could not create dir $___MODEL_DIR");
safesystem("mkdir -p '$___MODEL_DIR'") or die("ERROR: could not create dir $___MODEL_DIR");
#build arguments for symal
my($__symal_a)="";
@ -1182,7 +1186,7 @@ sub word_align {
safesystem("$GIZA2BAL -d $__ALIGNMENT_INV_CMD -i $__ALIGNMENT_CMD |".
"$SYMAL -alignment=\"$__symal_a\" -diagonal=\"$__symal_d\" ".
"-final=\"$__symal_f\" -both=\"$__symal_b\" > ".
"$___ALIGNMENT_FILE.$___ALIGNMENT")
"'$___ALIGNMENT_FILE.$___ALIGNMENT'")
||
die "ERROR: Can't generate symmetrized alignment file\n"
@ -1389,7 +1393,7 @@ sub extract_phrase {
my @tempfiles = ();
foreach my $f ($alignment_file_e, $alignment_file_f, $alignment_file_a) {
if (! -e $f && -e $f.".gz") {
safesystem("gunzip < $f.gz > $f") or die("Failed to gunzip corpus $f");
safesystem("gunzip < '$f.gz' > '$f'") or die("Failed to gunzip corpus $f");
push @tempfiles, "$f.gz";
}
}
@ -1398,7 +1402,7 @@ sub extract_phrase {
{
my $max_length = &get_max_phrase_length($table_number);
$cmd = "$RULE_EXTRACT $alignment_file_e $alignment_file_f $alignment_file_a $extract_file";
$cmd = "$RULE_EXTRACT '$alignment_file_e' '$alignment_file_f' '$alignment_file_a' '$extract_file'";
$cmd .= " --GlueGrammar $___GLUE_GRAMMAR_FILE" if $_GLUE_GRAMMAR;
$cmd .= " --UnknownWordLabel $_UNKNOWN_WORD_LABEL_FILE" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_LABEL_FILE);
$cmd .= " --PCFG" if $_PCFG;
@ -1415,14 +1419,14 @@ sub extract_phrase {
{
if ( $_EPPEX ) {
# eppex sets max_phrase_length itself (as the maximum phrase length for which any Lossy Counter is defined)
$cmd = "$EPPEX $alignment_file_e $alignment_file_f $alignment_file_a $extract_file $_EPPEX";
$cmd = "$EPPEX '$alignment_file_e' '$alignment_file_f' '$alignment_file_a' '$extract_file' $_EPPEX";
}
else {
my $max_length = &get_max_phrase_length($table_number);
print "MAX $max_length $reordering_flag $table_number\n";
$max_length = &get_max_phrase_length(-1) if $reordering_flag;
$cmd = "$PHRASE_EXTRACT $alignment_file_e $alignment_file_f $alignment_file_a $extract_file $max_length";
$cmd = "$PHRASE_EXTRACT '$alignment_file_e' '$alignment_file_f' '$alignment_file_a' '$extract_file' '$max_length'";
}
if ($reordering_flag) {
$cmd .= " orientation";
@ -1530,7 +1534,7 @@ sub score_phrase_phrase_extract {
print STDERR "(6.".($substep++).") creating table half $ttable_file.half.$direction @ ".`date`;
my $cmd = "$PHRASE_SCORE $extract $lexical_file.$direction $ttable_file.half.$direction.gz $inverse";
my $cmd = "$PHRASE_SCORE '$extract' '$lexical_file.$direction' '$ttable_file.half.$direction.gz' $inverse";
$cmd .= " --Hierarchical" if $_HIERARCHICAL;
$cmd .= " --WordAlignment" if $_PHRASE_WORD_ALIGNMENT;
$cmd .= " --KneserNey" if $KNESER_NEY;
@ -1578,7 +1582,7 @@ sub score_phrase_phrase_extract {
# merging the two halves
print STDERR "(6.6) consolidating the two halves @ ".`date`;
return if $___CONTINUE && -e "$ttable_file.gz";
my $cmd = "$PHRASE_CONSOLIDATE $ttable_file.half.f2e.gz $ttable_file.half.e2f.gz /dev/stdout";
my $cmd = "$PHRASE_CONSOLIDATE '$ttable_file.half.f2e.gz' '$ttable_file.half.e2f.gz' /dev/stdout";
$cmd .= " --Hierarchical" if $_HIERARCHICAL;
$cmd .= " --LogProb" if $LOG_PROB;
$cmd .= " --NegLogProb" if $NEG_LOG_PROB;
@ -1589,10 +1593,10 @@ sub score_phrase_phrase_extract {
$cmd .= " --GoodTuring $ttable_file.half.f2e.gz.coc" if $GOOD_TURING;
$cmd .= " --KneserNey $ttable_file.half.f2e.gz.coc" if $KNESER_NEY;
$cmd .= " | gzip -c > $ttable_file.gz";
$cmd .= " | gzip -c > '$ttable_file.gz'";
safesystem($cmd) or die "ERROR: Consolidating the two phrase table halves failed";
if (! $debug) { safesystem("rm -f $ttable_file.half.*") or die("ERROR"); }
if (! $debug) { safesystem("rm -f '$ttable_file.half.'*") or die("ERROR"); }
}
sub score_phrase_memscore {
@ -1606,7 +1610,7 @@ sub score_phrase_memscore {
# The output is sorted to avoid breaking scripts that rely on the
# sorting behaviour of the previous scoring algorithm.
my $cmd = "$MEMSCORE $options | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip >$ttable_file.gz";
my $cmd = "$MEMSCORE $options | LC_ALL=C sort $__SORT_BUFFER_SIZE $__SORT_BATCH_SIZE -T $___TEMP_DIR | gzip >'$ttable_file.gz'";
if (-e "$extract_file.gz") {
$cmd = "$ZCAT $extract_file.gz | ".$cmd;
} else {
@ -1666,7 +1670,7 @@ sub get_reordering {
print STDERR "(7.2) building tables @ ".`date`;
#create cmd string for lexical reordering scoring
my $cmd = "$LEXICAL_REO_SCORER $extract_file.o.sorted.gz $smooth $reo_model_path";
my $cmd = "$LEXICAL_REO_SCORER '$extract_file.o.sorted.gz' $smooth '$reo_model_path'";
$cmd .= " --SmoothWithCounts" if ($smooth =~ /(.+)u$/);
for my $mtype (keys %REORDERING_MODEL_TYPES) {
$cmd .= " --model \"$mtype $REORDERING_MODEL_TYPES{$mtype}";
@ -1764,8 +1768,8 @@ sub get_generation {
}
}
close(GEN);
safesystem("rm -f $file.gz") or die("ERROR");
safesystem("gzip $file") or die("ERROR");
safesystem("rm -f '$file.gz'") or die("ERROR");
safesystem("gzip '$file'") or die("ERROR");
}
### (9) CREATE CONFIGURATION FILE
@ -1776,7 +1780,7 @@ sub create_ini {
&full_path(\$___MODEL_DIR);
&full_path(\$___VCB_E);
&full_path(\$___VCB_F);
`mkdir -p $___MODEL_DIR`;
`mkdir -p '$___MODEL_DIR'`;
open(INI,">$___CONFIG") or die("ERROR: Can't write $___CONFIG");
print INI "#########################
### MOSES CONFIG FILE ###