Merge commit 'upstream/master'

This commit is contained in:
Eleftherios Avramidis 2012-01-14 00:22:57 +01:00
commit caa4ce78e1
42 changed files with 4875 additions and 2665 deletions

View File

@ -23,6 +23,7 @@
#include <string> #include <string>
#include <vector> #include <vector>
#include <iterator> #include <iterator>
#include <cassert>
#include "../moses/src/InputFileStream.h" #include "../moses/src/InputFileStream.h"
#include "../moses/src/Util.h" #include "../moses/src/Util.h"
#include "../moses/src/UserMessage.h" #include "../moses/src/UserMessage.h"

View File

@ -7,39 +7,54 @@
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
1ED4FB4B11BDBAA7004E826A /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB4911BDBAA7004E826A /* Main.cpp */; }; 1EBA432514B97B35003CC0EA /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA432314B97B35003CC0EA /* Main.cpp */; };
1ED4FB6011BDBAFB004E826A /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1ED4FB5F11BDBAE6004E826A /* libOnDiskPt.a */; }; 1EF0707114B9EE800052152A /* liblm.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EBA45C014B97EF1003CC0EA /* liblm.a */; };
1ED4FB6111BDBB00004E826A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1ED4FB5611BDBAD7004E826A /* libmoses.a */; }; 1EF0707214B9EE800052152A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EBA432E14B97CA1003CC0EA /* libmoses.a */; };
1EF0707314B9EE800052152A /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EBA433714B97CA6003CC0EA /* libOnDiskPt.a */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */ /* Begin PBXContainerItemProxy section */
1ED4FB5511BDBAD7004E826A /* PBXContainerItemProxy */ = { 1EBA432D14B97CA1003CC0EA /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1ED4FB4E11BDBAD7004E826A /* moses.xcodeproj */; containerPortal = 1EBA432614B97CA1003CC0EA /* moses.xcodeproj */;
proxyType = 2; proxyType = 2;
remoteGlobalIDString = D2AAC046055464E500DB518D; remoteGlobalIDString = D2AAC046055464E500DB518D;
remoteInfo = moses; remoteInfo = moses;
}; };
1ED4FB5E11BDBAE6004E826A /* PBXContainerItemProxy */ = { 1EBA433614B97CA6003CC0EA /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1ED4FB5711BDBAE6004E826A /* OnDiskPt.xcodeproj */; containerPortal = 1EBA432F14B97CA6003CC0EA /* OnDiskPt.xcodeproj */;
proxyType = 2; proxyType = 2;
remoteGlobalIDString = D2AAC046055464E500DB518D; remoteGlobalIDString = D2AAC046055464E500DB518D;
remoteInfo = OnDiskPt; remoteInfo = OnDiskPt;
}; };
1ED4FB7211BDBC05004E826A /* PBXContainerItemProxy */ = { 1EBA45BF14B97EF1003CC0EA /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1ED4FB4E11BDBAD7004E826A /* moses.xcodeproj */; containerPortal = 1EBA45B414B97EF1003CC0EA /* lm.xcodeproj */;
proxyType = 2;
remoteGlobalIDString = 1EE8C2E91476A48E002496F2;
remoteInfo = lm;
};
1EF0707614B9EE930052152A /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EBA432F14B97CA6003CC0EA /* OnDiskPt.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = OnDiskPt;
};
1EF0707814B9EE980052152A /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EBA432614B97CA1003CC0EA /* moses.xcodeproj */;
proxyType = 1; proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D; remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = moses; remoteInfo = moses;
}; };
1ED4FB7411BDBC09004E826A /* PBXContainerItemProxy */ = { 1EF0707A14B9EE9C0052152A /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1ED4FB5711BDBAE6004E826A /* OnDiskPt.xcodeproj */; containerPortal = 1EBA45B414B97EF1003CC0EA /* lm.xcodeproj */;
proxyType = 1; proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D; remoteGlobalIDString = 1EE8C2E81476A48E002496F2;
remoteInfo = OnDiskPt; remoteInfo = lm;
}; };
/* End PBXContainerItemProxy section */ /* End PBXContainerItemProxy section */
@ -57,10 +72,11 @@
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
1E4FC4861251FFBF00FB0D9D /* CreateOnDisk */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = CreateOnDisk; sourceTree = BUILT_PRODUCTS_DIR; }; 1E4FC4861251FFBF00FB0D9D /* CreateOnDisk */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = CreateOnDisk; sourceTree = BUILT_PRODUCTS_DIR; };
1ED4FB4911BDBAA7004E826A /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = src/Main.cpp; sourceTree = "<group>"; }; 1EBA432314B97B35003CC0EA /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = ../../OnDiskPt/Main.cpp; sourceTree = "<group>"; };
1ED4FB4A11BDBAA7004E826A /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Main.h; path = src/Main.h; sourceTree = "<group>"; }; 1EBA432414B97B35003CC0EA /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Main.h; path = ../../OnDiskPt/Main.h; sourceTree = "<group>"; };
1ED4FB4E11BDBAD7004E826A /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; }; 1EBA432614B97CA1003CC0EA /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
1ED4FB5711BDBAE6004E826A /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; }; 1EBA432F14B97CA6003CC0EA /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = OnDiskPt.xcodeproj; sourceTree = "<group>"; };
1EBA45B414B97EF1003CC0EA /* lm.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = lm.xcodeproj; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
@ -68,8 +84,9 @@
isa = PBXFrameworksBuildPhase; isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1ED4FB6111BDBB00004E826A /* libmoses.a in Frameworks */, 1EF0707114B9EE800052152A /* liblm.a in Frameworks */,
1ED4FB6011BDBAFB004E826A /* libOnDiskPt.a in Frameworks */, 1EF0707214B9EE800052152A /* libmoses.a in Frameworks */,
1EF0707314B9EE800052152A /* libOnDiskPt.a in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -79,11 +96,12 @@
08FB7794FE84155DC02AAC07 /* CreateOnDisk */ = { 08FB7794FE84155DC02AAC07 /* CreateOnDisk */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1ED4FB5711BDBAE6004E826A /* OnDiskPt.xcodeproj */,
1ED4FB4E11BDBAD7004E826A /* moses.xcodeproj */,
08FB7795FE84155DC02AAC07 /* Source */, 08FB7795FE84155DC02AAC07 /* Source */,
C6859E8C029090F304C91782 /* Documentation */, C6859E8C029090F304C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */, 1AB674ADFE9D54B511CA2CBB /* Products */,
1EBA432614B97CA1003CC0EA /* moses.xcodeproj */,
1EBA432F14B97CA6003CC0EA /* OnDiskPt.xcodeproj */,
1EBA45B414B97EF1003CC0EA /* lm.xcodeproj */,
); );
name = CreateOnDisk; name = CreateOnDisk;
sourceTree = "<group>"; sourceTree = "<group>";
@ -91,8 +109,8 @@
08FB7795FE84155DC02AAC07 /* Source */ = { 08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1ED4FB4911BDBAA7004E826A /* Main.cpp */, 1EBA432314B97B35003CC0EA /* Main.cpp */,
1ED4FB4A11BDBAA7004E826A /* Main.h */, 1EBA432414B97B35003CC0EA /* Main.h */,
); );
name = Source; name = Source;
sourceTree = "<group>"; sourceTree = "<group>";
@ -105,18 +123,26 @@
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1ED4FB4F11BDBAD7004E826A /* Products */ = { 1EBA432714B97CA1003CC0EA /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1ED4FB5611BDBAD7004E826A /* libmoses.a */, 1EBA432E14B97CA1003CC0EA /* libmoses.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1ED4FB5811BDBAE6004E826A /* Products */ = { 1EBA433014B97CA6003CC0EA /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1ED4FB5F11BDBAE6004E826A /* libOnDiskPt.a */, 1EBA433714B97CA6003CC0EA /* libOnDiskPt.a */,
);
name = Products;
sourceTree = "<group>";
};
1EBA45B514B97EF1003CC0EA /* Products */ = {
isa = PBXGroup;
children = (
1EBA45C014B97EF1003CC0EA /* liblm.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
@ -142,8 +168,9 @@
buildRules = ( buildRules = (
); );
dependencies = ( dependencies = (
1ED4FB7311BDBC05004E826A /* PBXTargetDependency */, 1EF0707B14B9EE9C0052152A /* PBXTargetDependency */,
1ED4FB7511BDBC09004E826A /* PBXTargetDependency */, 1EF0707914B9EE980052152A /* PBXTargetDependency */,
1EF0707714B9EE930052152A /* PBXTargetDependency */,
); );
name = CreateOnDisk; name = CreateOnDisk;
productInstallPath = "$(HOME)/bin"; productInstallPath = "$(HOME)/bin";
@ -170,12 +197,16 @@
projectDirPath = ""; projectDirPath = "";
projectReferences = ( projectReferences = (
{ {
ProductGroup = 1ED4FB4F11BDBAD7004E826A /* Products */; ProductGroup = 1EBA45B514B97EF1003CC0EA /* Products */;
ProjectRef = 1ED4FB4E11BDBAD7004E826A /* moses.xcodeproj */; ProjectRef = 1EBA45B414B97EF1003CC0EA /* lm.xcodeproj */;
}, },
{ {
ProductGroup = 1ED4FB5811BDBAE6004E826A /* Products */; ProductGroup = 1EBA432714B97CA1003CC0EA /* Products */;
ProjectRef = 1ED4FB5711BDBAE6004E826A /* OnDiskPt.xcodeproj */; ProjectRef = 1EBA432614B97CA1003CC0EA /* moses.xcodeproj */;
},
{
ProductGroup = 1EBA433014B97CA6003CC0EA /* Products */;
ProjectRef = 1EBA432F14B97CA6003CC0EA /* OnDiskPt.xcodeproj */;
}, },
); );
projectRoot = ""; projectRoot = "";
@ -186,18 +217,25 @@
/* End PBXProject section */ /* End PBXProject section */
/* Begin PBXReferenceProxy section */ /* Begin PBXReferenceProxy section */
1ED4FB5611BDBAD7004E826A /* libmoses.a */ = { 1EBA432E14B97CA1003CC0EA /* libmoses.a */ = {
isa = PBXReferenceProxy; isa = PBXReferenceProxy;
fileType = archive.ar; fileType = archive.ar;
path = libmoses.a; path = libmoses.a;
remoteRef = 1ED4FB5511BDBAD7004E826A /* PBXContainerItemProxy */; remoteRef = 1EBA432D14B97CA1003CC0EA /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR; sourceTree = BUILT_PRODUCTS_DIR;
}; };
1ED4FB5F11BDBAE6004E826A /* libOnDiskPt.a */ = { 1EBA433714B97CA6003CC0EA /* libOnDiskPt.a */ = {
isa = PBXReferenceProxy; isa = PBXReferenceProxy;
fileType = archive.ar; fileType = archive.ar;
path = libOnDiskPt.a; path = libOnDiskPt.a;
remoteRef = 1ED4FB5E11BDBAE6004E826A /* PBXContainerItemProxy */; remoteRef = 1EBA433614B97CA6003CC0EA /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
1EBA45C014B97EF1003CC0EA /* liblm.a */ = {
isa = PBXReferenceProxy;
fileType = archive.ar;
path = liblm.a;
remoteRef = 1EBA45BF14B97EF1003CC0EA /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR; sourceTree = BUILT_PRODUCTS_DIR;
}; };
/* End PBXReferenceProxy section */ /* End PBXReferenceProxy section */
@ -207,22 +245,27 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1ED4FB4B11BDBAA7004E826A /* Main.cpp in Sources */, 1EBA432514B97B35003CC0EA /* Main.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
/* End PBXSourcesBuildPhase section */ /* End PBXSourcesBuildPhase section */
/* Begin PBXTargetDependency section */ /* Begin PBXTargetDependency section */
1ED4FB7311BDBC05004E826A /* PBXTargetDependency */ = { 1EF0707714B9EE930052152A /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = moses;
targetProxy = 1ED4FB7211BDBC05004E826A /* PBXContainerItemProxy */;
};
1ED4FB7511BDBC09004E826A /* PBXTargetDependency */ = {
isa = PBXTargetDependency; isa = PBXTargetDependency;
name = OnDiskPt; name = OnDiskPt;
targetProxy = 1ED4FB7411BDBC09004E826A /* PBXContainerItemProxy */; targetProxy = 1EF0707614B9EE930052152A /* PBXContainerItemProxy */;
};
1EF0707914B9EE980052152A /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = moses;
targetProxy = 1EF0707814B9EE980052152A /* PBXContainerItemProxy */;
};
1EF0707B14B9EE9C0052152A /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = lm;
targetProxy = 1EF0707A14B9EE9C0052152A /* PBXContainerItemProxy */;
}; };
/* End PBXTargetDependency section */ /* End PBXTargetDependency section */
@ -236,12 +279,15 @@
GCC_ENABLE_FIX_AND_CONTINUE = YES; GCC_ENABLE_FIX_AND_CONTINUE = YES;
GCC_MODEL_TUNING = G5; GCC_MODEL_TUNING = G5;
GCC_OPTIMIZATION_LEVEL = 0; GCC_OPTIMIZATION_LEVEL = 0;
HEADER_SEARCH_PATHS = (
../../,
../../irstlm/include,
);
INSTALL_PATH = /usr/local/bin; INSTALL_PATH = /usr/local/bin;
LIBRARY_SEARCH_PATHS = ( LIBRARY_SEARCH_PATHS = (
../irstlm/lib, ../../irstlm/lib,
../srilm/lib/macosx, ../../srilm/lib/macosx,
../randlm/lib, ../../randlm/lib,
../kenlm,
); );
OTHER_LDFLAGS = ( OTHER_LDFLAGS = (
"-lz", "-lz",
@ -251,7 +297,6 @@
"-loolm", "-loolm",
"-lflm", "-lflm",
"-llattice", "-llattice",
"-lkenlm",
"-lrandlm", "-lrandlm",
); );
PRODUCT_NAME = CreateOnDisk; PRODUCT_NAME = CreateOnDisk;
@ -264,12 +309,15 @@
ALWAYS_SEARCH_USER_PATHS = NO; ALWAYS_SEARCH_USER_PATHS = NO;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_MODEL_TUNING = G5; GCC_MODEL_TUNING = G5;
HEADER_SEARCH_PATHS = (
../../,
../../irstlm/include,
);
INSTALL_PATH = /usr/local/bin; INSTALL_PATH = /usr/local/bin;
LIBRARY_SEARCH_PATHS = ( LIBRARY_SEARCH_PATHS = (
../irstlm/lib, ../../irstlm/lib,
../srilm/lib/macosx, ../../srilm/lib/macosx,
../randlm/lib, ../../randlm/lib,
../kenlm,
); );
OTHER_LDFLAGS = ( OTHER_LDFLAGS = (
"-lz", "-lz",
@ -279,7 +327,6 @@
"-loolm", "-loolm",
"-lflm", "-lflm",
"-llattice", "-llattice",
"-lkenlm",
"-lrandlm", "-lrandlm",
); );
PRODUCT_NAME = CreateOnDisk; PRODUCT_NAME = CreateOnDisk;

View File

@ -7,41 +7,41 @@
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
1ED4FB1911BDBA2B004E826A /* OnDiskWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB0811BDBA2B004E826A /* OnDiskWrapper.cpp */; }; 1EBA430C14B97ABF003CC0EA /* OnDiskWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA42FA14B97ABF003CC0EA /* OnDiskWrapper.cpp */; };
1ED4FB1A11BDBA2B004E826A /* OnDiskWrapper.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB0911BDBA2B004E826A /* OnDiskWrapper.h */; }; 1EBA430D14B97ABF003CC0EA /* OnDiskWrapper.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA42FB14B97ABF003CC0EA /* OnDiskWrapper.h */; };
1ED4FB1B11BDBA2B004E826A /* Phrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB0A11BDBA2B004E826A /* Phrase.cpp */; }; 1EBA430E14B97ABF003CC0EA /* Phrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA42FC14B97ABF003CC0EA /* Phrase.cpp */; };
1ED4FB1C11BDBA2B004E826A /* Phrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB0B11BDBA2B004E826A /* Phrase.h */; }; 1EBA430F14B97ABF003CC0EA /* Phrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA42FD14B97ABF003CC0EA /* Phrase.h */; };
1ED4FB1D11BDBA2B004E826A /* PhraseNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB0C11BDBA2B004E826A /* PhraseNode.cpp */; }; 1EBA431014B97ABF003CC0EA /* PhraseNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA42FE14B97ABF003CC0EA /* PhraseNode.cpp */; };
1ED4FB1E11BDBA2B004E826A /* PhraseNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB0D11BDBA2B004E826A /* PhraseNode.h */; }; 1EBA431114B97ABF003CC0EA /* PhraseNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA42FF14B97ABF003CC0EA /* PhraseNode.h */; };
1ED4FB1F11BDBA2B004E826A /* SourcePhrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB0E11BDBA2B004E826A /* SourcePhrase.cpp */; }; 1EBA431214B97ABF003CC0EA /* SourcePhrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA430014B97ABF003CC0EA /* SourcePhrase.cpp */; };
1ED4FB2011BDBA2B004E826A /* SourcePhrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB0F11BDBA2B004E826A /* SourcePhrase.h */; }; 1EBA431314B97ABF003CC0EA /* SourcePhrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA430114B97ABF003CC0EA /* SourcePhrase.h */; };
1ED4FB2111BDBA2B004E826A /* TargetPhrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB1011BDBA2B004E826A /* TargetPhrase.cpp */; }; 1EBA431414B97ABF003CC0EA /* TargetPhrase.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA430214B97ABF003CC0EA /* TargetPhrase.cpp */; };
1ED4FB2211BDBA2B004E826A /* TargetPhrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB1111BDBA2B004E826A /* TargetPhrase.h */; }; 1EBA431514B97ABF003CC0EA /* TargetPhrase.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA430314B97ABF003CC0EA /* TargetPhrase.h */; };
1ED4FB2311BDBA2B004E826A /* TargetPhraseCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB1211BDBA2B004E826A /* TargetPhraseCollection.cpp */; }; 1EBA431614B97ABF003CC0EA /* TargetPhraseCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA430414B97ABF003CC0EA /* TargetPhraseCollection.cpp */; };
1ED4FB2411BDBA2B004E826A /* TargetPhraseCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB1311BDBA2B004E826A /* TargetPhraseCollection.h */; }; 1EBA431714B97ABF003CC0EA /* TargetPhraseCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA430514B97ABF003CC0EA /* TargetPhraseCollection.h */; };
1ED4FB2511BDBA2B004E826A /* Vocab.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB1411BDBA2B004E826A /* Vocab.cpp */; }; 1EBA431814B97ABF003CC0EA /* Vocab.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA430614B97ABF003CC0EA /* Vocab.cpp */; };
1ED4FB2611BDBA2B004E826A /* Vocab.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB1511BDBA2B004E826A /* Vocab.h */; }; 1EBA431914B97ABF003CC0EA /* Vocab.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA430714B97ABF003CC0EA /* Vocab.h */; };
1ED4FB2711BDBA2B004E826A /* Word.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ED4FB1611BDBA2B004E826A /* Word.cpp */; }; 1EBA431A14B97ABF003CC0EA /* Word.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA430814B97ABF003CC0EA /* Word.cpp */; };
1ED4FB2811BDBA2B004E826A /* Word.h in Headers */ = {isa = PBXBuildFile; fileRef = 1ED4FB1711BDBA2B004E826A /* Word.h */; }; 1EBA431B14B97ABF003CC0EA /* Word.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA430914B97ABF003CC0EA /* Word.h */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
1ED4FB0811BDBA2B004E826A /* OnDiskWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = OnDiskWrapper.cpp; path = src/OnDiskWrapper.cpp; sourceTree = "<group>"; }; 1EBA42FA14B97ABF003CC0EA /* OnDiskWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = OnDiskWrapper.cpp; path = ../../OnDiskPt/OnDiskWrapper.cpp; sourceTree = "<group>"; };
1ED4FB0911BDBA2B004E826A /* OnDiskWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OnDiskWrapper.h; path = src/OnDiskWrapper.h; sourceTree = "<group>"; }; 1EBA42FB14B97ABF003CC0EA /* OnDiskWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = OnDiskWrapper.h; path = ../../OnDiskPt/OnDiskWrapper.h; sourceTree = "<group>"; };
1ED4FB0A11BDBA2B004E826A /* Phrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Phrase.cpp; path = src/Phrase.cpp; sourceTree = "<group>"; }; 1EBA42FC14B97ABF003CC0EA /* Phrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Phrase.cpp; path = ../../OnDiskPt/Phrase.cpp; sourceTree = "<group>"; };
1ED4FB0B11BDBA2B004E826A /* Phrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Phrase.h; path = src/Phrase.h; sourceTree = "<group>"; }; 1EBA42FD14B97ABF003CC0EA /* Phrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Phrase.h; path = ../../OnDiskPt/Phrase.h; sourceTree = "<group>"; };
1ED4FB0C11BDBA2B004E826A /* PhraseNode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseNode.cpp; path = src/PhraseNode.cpp; sourceTree = "<group>"; }; 1EBA42FE14B97ABF003CC0EA /* PhraseNode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseNode.cpp; path = ../../OnDiskPt/PhraseNode.cpp; sourceTree = "<group>"; };
1ED4FB0D11BDBA2B004E826A /* PhraseNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseNode.h; path = src/PhraseNode.h; sourceTree = "<group>"; }; 1EBA42FF14B97ABF003CC0EA /* PhraseNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseNode.h; path = ../../OnDiskPt/PhraseNode.h; sourceTree = "<group>"; };
1ED4FB0E11BDBA2B004E826A /* SourcePhrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SourcePhrase.cpp; path = src/SourcePhrase.cpp; sourceTree = "<group>"; }; 1EBA430014B97ABF003CC0EA /* SourcePhrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SourcePhrase.cpp; path = ../../OnDiskPt/SourcePhrase.cpp; sourceTree = "<group>"; };
1ED4FB0F11BDBA2B004E826A /* SourcePhrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SourcePhrase.h; path = src/SourcePhrase.h; sourceTree = "<group>"; }; 1EBA430114B97ABF003CC0EA /* SourcePhrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SourcePhrase.h; path = ../../OnDiskPt/SourcePhrase.h; sourceTree = "<group>"; };
1ED4FB1011BDBA2B004E826A /* TargetPhrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TargetPhrase.cpp; path = src/TargetPhrase.cpp; sourceTree = "<group>"; }; 1EBA430214B97ABF003CC0EA /* TargetPhrase.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TargetPhrase.cpp; path = ../../OnDiskPt/TargetPhrase.cpp; sourceTree = "<group>"; };
1ED4FB1111BDBA2B004E826A /* TargetPhrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TargetPhrase.h; path = src/TargetPhrase.h; sourceTree = "<group>"; }; 1EBA430314B97ABF003CC0EA /* TargetPhrase.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TargetPhrase.h; path = ../../OnDiskPt/TargetPhrase.h; sourceTree = "<group>"; };
1ED4FB1211BDBA2B004E826A /* TargetPhraseCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TargetPhraseCollection.cpp; path = src/TargetPhraseCollection.cpp; sourceTree = "<group>"; }; 1EBA430414B97ABF003CC0EA /* TargetPhraseCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TargetPhraseCollection.cpp; path = ../../OnDiskPt/TargetPhraseCollection.cpp; sourceTree = "<group>"; };
1ED4FB1311BDBA2B004E826A /* TargetPhraseCollection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TargetPhraseCollection.h; path = src/TargetPhraseCollection.h; sourceTree = "<group>"; }; 1EBA430514B97ABF003CC0EA /* TargetPhraseCollection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TargetPhraseCollection.h; path = ../../OnDiskPt/TargetPhraseCollection.h; sourceTree = "<group>"; };
1ED4FB1411BDBA2B004E826A /* Vocab.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocab.cpp; path = src/Vocab.cpp; sourceTree = "<group>"; }; 1EBA430614B97ABF003CC0EA /* Vocab.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocab.cpp; path = ../../OnDiskPt/Vocab.cpp; sourceTree = "<group>"; };
1ED4FB1511BDBA2B004E826A /* Vocab.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocab.h; path = src/Vocab.h; sourceTree = "<group>"; }; 1EBA430714B97ABF003CC0EA /* Vocab.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocab.h; path = ../../OnDiskPt/Vocab.h; sourceTree = "<group>"; };
1ED4FB1611BDBA2B004E826A /* Word.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Word.cpp; path = src/Word.cpp; sourceTree = "<group>"; }; 1EBA430814B97ABF003CC0EA /* Word.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Word.cpp; path = ../../OnDiskPt/Word.cpp; sourceTree = "<group>"; };
1ED4FB1711BDBA2B004E826A /* Word.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Word.h; path = src/Word.h; sourceTree = "<group>"; }; 1EBA430914B97ABF003CC0EA /* Word.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Word.h; path = ../../OnDiskPt/Word.h; sourceTree = "<group>"; };
D2AAC046055464E500DB518D /* libOnDiskPt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libOnDiskPt.a; sourceTree = BUILT_PRODUCTS_DIR; }; D2AAC046055464E500DB518D /* libOnDiskPt.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = libOnDiskPt.a; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */ /* End PBXFileReference section */
@ -69,22 +69,22 @@
08FB7795FE84155DC02AAC07 /* Source */ = { 08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1ED4FB0811BDBA2B004E826A /* OnDiskWrapper.cpp */, 1EBA42FA14B97ABF003CC0EA /* OnDiskWrapper.cpp */,
1ED4FB0911BDBA2B004E826A /* OnDiskWrapper.h */, 1EBA42FB14B97ABF003CC0EA /* OnDiskWrapper.h */,
1ED4FB0A11BDBA2B004E826A /* Phrase.cpp */, 1EBA42FC14B97ABF003CC0EA /* Phrase.cpp */,
1ED4FB0B11BDBA2B004E826A /* Phrase.h */, 1EBA42FD14B97ABF003CC0EA /* Phrase.h */,
1ED4FB0C11BDBA2B004E826A /* PhraseNode.cpp */, 1EBA42FE14B97ABF003CC0EA /* PhraseNode.cpp */,
1ED4FB0D11BDBA2B004E826A /* PhraseNode.h */, 1EBA42FF14B97ABF003CC0EA /* PhraseNode.h */,
1ED4FB0E11BDBA2B004E826A /* SourcePhrase.cpp */, 1EBA430014B97ABF003CC0EA /* SourcePhrase.cpp */,
1ED4FB0F11BDBA2B004E826A /* SourcePhrase.h */, 1EBA430114B97ABF003CC0EA /* SourcePhrase.h */,
1ED4FB1011BDBA2B004E826A /* TargetPhrase.cpp */, 1EBA430214B97ABF003CC0EA /* TargetPhrase.cpp */,
1ED4FB1111BDBA2B004E826A /* TargetPhrase.h */, 1EBA430314B97ABF003CC0EA /* TargetPhrase.h */,
1ED4FB1211BDBA2B004E826A /* TargetPhraseCollection.cpp */, 1EBA430414B97ABF003CC0EA /* TargetPhraseCollection.cpp */,
1ED4FB1311BDBA2B004E826A /* TargetPhraseCollection.h */, 1EBA430514B97ABF003CC0EA /* TargetPhraseCollection.h */,
1ED4FB1411BDBA2B004E826A /* Vocab.cpp */, 1EBA430614B97ABF003CC0EA /* Vocab.cpp */,
1ED4FB1511BDBA2B004E826A /* Vocab.h */, 1EBA430714B97ABF003CC0EA /* Vocab.h */,
1ED4FB1611BDBA2B004E826A /* Word.cpp */, 1EBA430814B97ABF003CC0EA /* Word.cpp */,
1ED4FB1711BDBA2B004E826A /* Word.h */, 1EBA430914B97ABF003CC0EA /* Word.h */,
); );
name = Source; name = Source;
sourceTree = "<group>"; sourceTree = "<group>";
@ -111,14 +111,14 @@
isa = PBXHeadersBuildPhase; isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1ED4FB1A11BDBA2B004E826A /* OnDiskWrapper.h in Headers */, 1EBA430D14B97ABF003CC0EA /* OnDiskWrapper.h in Headers */,
1ED4FB1C11BDBA2B004E826A /* Phrase.h in Headers */, 1EBA430F14B97ABF003CC0EA /* Phrase.h in Headers */,
1ED4FB1E11BDBA2B004E826A /* PhraseNode.h in Headers */, 1EBA431114B97ABF003CC0EA /* PhraseNode.h in Headers */,
1ED4FB2011BDBA2B004E826A /* SourcePhrase.h in Headers */, 1EBA431314B97ABF003CC0EA /* SourcePhrase.h in Headers */,
1ED4FB2211BDBA2B004E826A /* TargetPhrase.h in Headers */, 1EBA431514B97ABF003CC0EA /* TargetPhrase.h in Headers */,
1ED4FB2411BDBA2B004E826A /* TargetPhraseCollection.h in Headers */, 1EBA431714B97ABF003CC0EA /* TargetPhraseCollection.h in Headers */,
1ED4FB2611BDBA2B004E826A /* Vocab.h in Headers */, 1EBA431914B97ABF003CC0EA /* Vocab.h in Headers */,
1ED4FB2811BDBA2B004E826A /* Word.h in Headers */, 1EBA431B14B97ABF003CC0EA /* Word.h in Headers */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -171,14 +171,14 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1ED4FB1911BDBA2B004E826A /* OnDiskWrapper.cpp in Sources */, 1EBA430C14B97ABF003CC0EA /* OnDiskWrapper.cpp in Sources */,
1ED4FB1B11BDBA2B004E826A /* Phrase.cpp in Sources */, 1EBA430E14B97ABF003CC0EA /* Phrase.cpp in Sources */,
1ED4FB1D11BDBA2B004E826A /* PhraseNode.cpp in Sources */, 1EBA431014B97ABF003CC0EA /* PhraseNode.cpp in Sources */,
1ED4FB1F11BDBA2B004E826A /* SourcePhrase.cpp in Sources */, 1EBA431214B97ABF003CC0EA /* SourcePhrase.cpp in Sources */,
1ED4FB2111BDBA2B004E826A /* TargetPhrase.cpp in Sources */, 1EBA431414B97ABF003CC0EA /* TargetPhrase.cpp in Sources */,
1ED4FB2311BDBA2B004E826A /* TargetPhraseCollection.cpp in Sources */, 1EBA431614B97ABF003CC0EA /* TargetPhraseCollection.cpp in Sources */,
1ED4FB2511BDBA2B004E826A /* Vocab.cpp in Sources */, 1EBA431814B97ABF003CC0EA /* Vocab.cpp in Sources */,
1ED4FB2711BDBA2B004E826A /* Word.cpp in Sources */, 1EBA431A14B97ABF003CC0EA /* Word.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -196,8 +196,7 @@
GCC_OPTIMIZATION_LEVEL = 0; GCC_OPTIMIZATION_LEVEL = 0;
HEADER_SEARCH_PATHS = ( HEADER_SEARCH_PATHS = (
/opt/local/include, /opt/local/include,
../kenlm, ../..,
../,
); );
INSTALL_PATH = /usr/local/lib; INSTALL_PATH = /usr/local/lib;
PRODUCT_NAME = OnDiskPt; PRODUCT_NAME = OnDiskPt;
@ -212,8 +211,7 @@
GCC_MODEL_TUNING = G5; GCC_MODEL_TUNING = G5;
HEADER_SEARCH_PATHS = ( HEADER_SEARCH_PATHS = (
/opt/local/include, /opt/local/include,
../kenlm, ../..,
../,
); );
INSTALL_PATH = /usr/local/lib; INSTALL_PATH = /usr/local/lib;
PRODUCT_NAME = OnDiskPt; PRODUCT_NAME = OnDiskPt;

View File

@ -0,0 +1,159 @@
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32">
<Configuration>Debug</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|Win32">
<Configuration>Release</Configuration>
<Platform>Win32</Platform>
</ProjectConfiguration>
</ItemGroup>
<ItemGroup>
<None Include="..\..\lm\bhiksha.hh" />
<None Include="..\..\lm\binary_format.hh" />
<None Include="..\..\lm\blank.hh" />
<None Include="..\..\lm\clean.sh" />
<None Include="..\..\lm\compile.sh" />
<None Include="..\..\lm\config.hh" />
<None Include="..\..\lm\COPYING" />
<None Include="..\..\lm\COPYING.LESSER" />
<None Include="..\..\lm\enumerate_vocab.hh" />
<None Include="..\..\lm\facade.hh" />
<None Include="..\..\lm\Jamfile" />
<None Include="..\..\lm\left.hh" />
<None Include="..\..\lm\LICENSE" />
<None Include="..\..\lm\lm_exception.hh" />
<None Include="..\..\lm\max_order.hh" />
<None Include="..\..\lm\model.hh" />
<None Include="..\..\lm\model_type.hh" />
<None Include="..\..\lm\quantize.hh" />
<None Include="..\..\lm\README" />
<None Include="..\..\lm\read_arpa.hh" />
<None Include="..\..\lm\return.hh" />
<None Include="..\..\lm\search_hashed.hh" />
<None Include="..\..\lm\search_trie.hh" />
<None Include="..\..\lm\test.arpa" />
<None Include="..\..\lm\test.sh" />
<None Include="..\..\lm\test_nounk.arpa" />
<None Include="..\..\lm\trie.hh" />
<None Include="..\..\lm\trie_sort.hh" />
<None Include="..\..\lm\virtual_interface.hh" />
<None Include="..\..\lm\vocab.hh" />
<None Include="..\..\lm\weights.hh" />
<None Include="..\..\lm\word_index.hh" />
<None Include="..\..\util\bit_packing.hh" />
<None Include="..\..\util\check.hh" />
<None Include="..\..\util\COPYING" />
<None Include="..\..\util\COPYING.LESSER" />
<None Include="..\..\util\ersatz_progress.hh" />
<None Include="..\..\util\exception.hh" />
<None Include="..\..\util\file.hh" />
<None Include="..\..\util\file_piece.hh" />
<None Include="..\..\util\getopt.hh" />
<None Include="..\..\util\have.hh" />
<None Include="..\..\util\Jamfile" />
<None Include="..\..\util\joint_sort.hh" />
<None Include="..\..\util\key_value_packing.hh" />
<None Include="..\..\util\LICENSE" />
<None Include="..\..\util\mmap.hh" />
<None Include="..\..\util\murmur_hash.hh" />
<None Include="..\..\util\probing_hash_table.hh" />
<None Include="..\..\util\proxy_iterator.hh" />
<None Include="..\..\util\scoped.hh" />
<None Include="..\..\util\sized_iterator.hh" />
<None Include="..\..\util\sorted_uniform.hh" />
<None Include="..\..\util\string_piece.hh" />
<None Include="..\..\util\tokenize_piece.hh" />
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\..\lm\bhiksha.cc" />
<ClCompile Include="..\..\lm\binary_format.cc" />
<ClCompile Include="..\..\lm\build_binary.cc" />
<ClCompile Include="..\..\lm\config.cc" />
<ClCompile Include="..\..\lm\left_test.cc" />
<ClCompile Include="..\..\lm\lm_exception.cc" />
<ClCompile Include="..\..\lm\model.cc" />
<ClCompile Include="..\..\lm\ngram_query.cc" />
<ClCompile Include="..\..\lm\quantize.cc" />
<ClCompile Include="..\..\lm\read_arpa.cc" />
<ClCompile Include="..\..\lm\search_hashed.cc" />
<ClCompile Include="..\..\lm\search_trie.cc" />
<ClCompile Include="..\..\lm\trie.cc" />
<ClCompile Include="..\..\lm\trie_sort.cc" />
<ClCompile Include="..\..\lm\virtual_interface.cc" />
<ClCompile Include="..\..\lm\vocab.cc" />
<ClCompile Include="..\..\util\bit_packing.cc" />
<ClCompile Include="..\..\util\ersatz_progress.cc" />
<ClCompile Include="..\..\util\exception.cc" />
<ClCompile Include="..\..\util\file.cc" />
<ClCompile Include="..\..\util\file_piece.cc" />
<ClCompile Include="..\..\util\getopt.c" />
<ClCompile Include="..\..\util\mmap.cc" />
<ClCompile Include="..\..\util\murmur_hash.cc" />
</ItemGroup>
<PropertyGroup Label="Globals">
<ProjectGuid>{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}</ProjectGuid>
<Keyword>Win32Proj</Keyword>
<RootNamespace>kenlm</RootNamespace>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
<ConfigurationType>StaticLibrary</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<PropertyGroup />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<PrecompiledHeader>
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<PrecompiledHeader>
</PrecompiledHeader>
<Optimization>MaxSpeed</Optimization>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
</Link>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>

View File

@ -0,0 +1,592 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 46;
objects = {
/* Begin PBXBuildFile section */
1EBA44AD14B97E22003CC0EA /* bhiksha.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA442B14B97E22003CC0EA /* bhiksha.cc */; };
1EBA44AE14B97E22003CC0EA /* bhiksha.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA442C14B97E22003CC0EA /* bhiksha.hh */; };
1EBA44D414B97E22003CC0EA /* binary_format.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA447D14B97E22003CC0EA /* binary_format.cc */; };
1EBA44D514B97E22003CC0EA /* binary_format.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA447E14B97E22003CC0EA /* binary_format.hh */; };
1EBA44D614B97E22003CC0EA /* blank.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA447F14B97E22003CC0EA /* blank.hh */; };
1EBA44D814B97E22003CC0EA /* config.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA448314B97E22003CC0EA /* config.cc */; };
1EBA44D914B97E22003CC0EA /* config.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA448414B97E22003CC0EA /* config.hh */; };
1EBA44DA14B97E22003CC0EA /* enumerate_vocab.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA448714B97E22003CC0EA /* enumerate_vocab.hh */; };
1EBA44DB14B97E22003CC0EA /* facade.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA448814B97E22003CC0EA /* facade.hh */; };
1EBA44DC14B97E22003CC0EA /* Jamfile in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA448914B97E22003CC0EA /* Jamfile */; };
1EBA44DD14B97E22003CC0EA /* left_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA448A14B97E22003CC0EA /* left_test.cc */; };
1EBA44DE14B97E22003CC0EA /* left.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA448B14B97E22003CC0EA /* left.hh */; };
1EBA44DF14B97E22003CC0EA /* lm_exception.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA448D14B97E22003CC0EA /* lm_exception.cc */; };
1EBA44E014B97E22003CC0EA /* lm_exception.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA448E14B97E22003CC0EA /* lm_exception.hh */; };
1EBA44E114B97E22003CC0EA /* max_order.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA448F14B97E22003CC0EA /* max_order.hh */; };
1EBA44E214B97E22003CC0EA /* model_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA449014B97E22003CC0EA /* model_test.cc */; };
1EBA44E314B97E22003CC0EA /* model_type.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449114B97E22003CC0EA /* model_type.hh */; };
1EBA44E414B97E22003CC0EA /* model.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA449214B97E22003CC0EA /* model.cc */; };
1EBA44E514B97E22003CC0EA /* model.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449314B97E22003CC0EA /* model.hh */; };
1EBA44E614B97E22003CC0EA /* ngram_query.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA449414B97E22003CC0EA /* ngram_query.cc */; };
1EBA44E714B97E22003CC0EA /* ngram_query.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449514B97E22003CC0EA /* ngram_query.hh */; };
1EBA44E814B97E22003CC0EA /* quantize.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA449614B97E22003CC0EA /* quantize.cc */; };
1EBA44E914B97E22003CC0EA /* quantize.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449714B97E22003CC0EA /* quantize.hh */; };
1EBA44EA14B97E22003CC0EA /* read_arpa.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA449814B97E22003CC0EA /* read_arpa.cc */; };
1EBA44EB14B97E22003CC0EA /* read_arpa.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449914B97E22003CC0EA /* read_arpa.hh */; };
1EBA44EC14B97E22003CC0EA /* return.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449B14B97E22003CC0EA /* return.hh */; };
1EBA44ED14B97E22003CC0EA /* search_hashed.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA449C14B97E22003CC0EA /* search_hashed.cc */; };
1EBA44EE14B97E22003CC0EA /* search_hashed.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449D14B97E22003CC0EA /* search_hashed.hh */; };
1EBA44EF14B97E22003CC0EA /* search_trie.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA449E14B97E22003CC0EA /* search_trie.cc */; };
1EBA44F014B97E22003CC0EA /* search_trie.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA449F14B97E22003CC0EA /* search_trie.hh */; };
1EBA44F114B97E22003CC0EA /* trie_sort.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA44A314B97E22003CC0EA /* trie_sort.cc */; };
1EBA44F214B97E22003CC0EA /* trie_sort.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA44A414B97E22003CC0EA /* trie_sort.hh */; };
1EBA44F314B97E22003CC0EA /* trie.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA44A514B97E22003CC0EA /* trie.cc */; };
1EBA44F414B97E22003CC0EA /* trie.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA44A614B97E22003CC0EA /* trie.hh */; };
1EBA44F514B97E22003CC0EA /* virtual_interface.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA44A714B97E22003CC0EA /* virtual_interface.cc */; };
1EBA44F614B97E22003CC0EA /* virtual_interface.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA44A814B97E22003CC0EA /* virtual_interface.hh */; };
1EBA44F714B97E22003CC0EA /* vocab.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA44A914B97E22003CC0EA /* vocab.cc */; };
1EBA44F814B97E22003CC0EA /* vocab.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA44AA14B97E22003CC0EA /* vocab.hh */; };
1EBA44F914B97E22003CC0EA /* weights.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA44AB14B97E22003CC0EA /* weights.hh */; };
1EBA44FA14B97E22003CC0EA /* word_index.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA44AC14B97E22003CC0EA /* word_index.hh */; };
1EBA457F14B97E92003CC0EA /* bit_packing_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA453614B97E92003CC0EA /* bit_packing_test.cc */; };
1EBA458014B97E92003CC0EA /* bit_packing.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA453714B97E92003CC0EA /* bit_packing.cc */; };
1EBA458114B97E92003CC0EA /* bit_packing.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA453814B97E92003CC0EA /* bit_packing.hh */; };
1EBA458214B97E92003CC0EA /* check.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA453914B97E92003CC0EA /* check.hh */; };
1EBA458314B97E92003CC0EA /* ersatz_progress.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA453C14B97E92003CC0EA /* ersatz_progress.cc */; };
1EBA458414B97E92003CC0EA /* ersatz_progress.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA453D14B97E92003CC0EA /* ersatz_progress.hh */; };
1EBA458514B97E92003CC0EA /* exception.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA453E14B97E92003CC0EA /* exception.cc */; };
1EBA458614B97E92003CC0EA /* exception.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA453F14B97E92003CC0EA /* exception.hh */; };
1EBA458714B97E92003CC0EA /* file_piece_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454014B97E92003CC0EA /* file_piece_test.cc */; };
1EBA458814B97E92003CC0EA /* file_piece.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454114B97E92003CC0EA /* file_piece.cc */; };
1EBA458914B97E92003CC0EA /* file_piece.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454214B97E92003CC0EA /* file_piece.hh */; };
1EBA458A14B97E92003CC0EA /* file.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454314B97E92003CC0EA /* file.cc */; };
1EBA458B14B97E92003CC0EA /* file.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454414B97E92003CC0EA /* file.hh */; };
1EBA458C14B97E92003CC0EA /* getopt.c in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454514B97E92003CC0EA /* getopt.c */; };
1EBA458D14B97E92003CC0EA /* getopt.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454614B97E92003CC0EA /* getopt.hh */; };
1EBA458E14B97E92003CC0EA /* have.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454714B97E92003CC0EA /* have.hh */; };
1EBA458F14B97E92003CC0EA /* Jamfile in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454814B97E92003CC0EA /* Jamfile */; };
1EBA459014B97E92003CC0EA /* joint_sort_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454914B97E92003CC0EA /* joint_sort_test.cc */; };
1EBA459114B97E92003CC0EA /* joint_sort.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454A14B97E92003CC0EA /* joint_sort.hh */; };
1EBA459214B97E92003CC0EA /* key_value_packing_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454B14B97E92003CC0EA /* key_value_packing_test.cc */; };
1EBA459314B97E92003CC0EA /* key_value_packing.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454C14B97E92003CC0EA /* key_value_packing.hh */; };
1EBA459414B97E92003CC0EA /* mmap.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA454E14B97E92003CC0EA /* mmap.cc */; };
1EBA459514B97E92003CC0EA /* mmap.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA454F14B97E92003CC0EA /* mmap.hh */; };
1EBA459614B97E92003CC0EA /* murmur_hash.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA455014B97E92003CC0EA /* murmur_hash.cc */; };
1EBA459714B97E92003CC0EA /* murmur_hash.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455114B97E92003CC0EA /* murmur_hash.hh */; };
1EBA459814B97E92003CC0EA /* probing_hash_table_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA455214B97E92003CC0EA /* probing_hash_table_test.cc */; };
1EBA459914B97E92003CC0EA /* probing_hash_table.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455314B97E92003CC0EA /* probing_hash_table.hh */; };
1EBA459A14B97E92003CC0EA /* proxy_iterator.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455414B97E92003CC0EA /* proxy_iterator.hh */; };
1EBA459B14B97E92003CC0EA /* scoped.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455514B97E92003CC0EA /* scoped.hh */; };
1EBA459C14B97E92003CC0EA /* sized_iterator.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455614B97E92003CC0EA /* sized_iterator.hh */; };
1EBA459D14B97E92003CC0EA /* sorted_uniform_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA455714B97E92003CC0EA /* sorted_uniform_test.cc */; };
1EBA459E14B97E92003CC0EA /* sorted_uniform.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455814B97E92003CC0EA /* sorted_uniform.hh */; };
1EBA459F14B97E92003CC0EA /* string_piece.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455914B97E92003CC0EA /* string_piece.hh */; };
1EBA45A014B97E92003CC0EA /* tokenize_piece_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EBA455A14B97E92003CC0EA /* tokenize_piece_test.cc */; };
1EBA45A114B97E92003CC0EA /* tokenize_piece.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EBA455B14B97E92003CC0EA /* tokenize_piece.hh */; };
/* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */
1EBA45A314B97E93003CC0EA /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EBA455C14B97E92003CC0EA /* util.xcodeproj */;
proxyType = 2;
remoteGlobalIDString = 1EE8C2711476A262002496F2;
remoteInfo = util;
};
/* End PBXContainerItemProxy section */
/* Begin PBXFileReference section */
1EBA442B14B97E22003CC0EA /* bhiksha.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bhiksha.cc; path = ../../lm/bhiksha.cc; sourceTree = "<group>"; };
1EBA442C14B97E22003CC0EA /* bhiksha.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = bhiksha.hh; path = ../../lm/bhiksha.hh; sourceTree = "<group>"; };
1EBA447D14B97E22003CC0EA /* binary_format.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = binary_format.cc; path = ../../lm/binary_format.cc; sourceTree = "<group>"; };
1EBA447E14B97E22003CC0EA /* binary_format.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = binary_format.hh; path = ../../lm/binary_format.hh; sourceTree = "<group>"; };
1EBA447F14B97E22003CC0EA /* blank.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = blank.hh; path = ../../lm/blank.hh; sourceTree = "<group>"; };
1EBA448114B97E22003CC0EA /* clean.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = clean.sh; path = ../../lm/clean.sh; sourceTree = "<group>"; };
1EBA448214B97E22003CC0EA /* compile.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = compile.sh; path = ../../lm/compile.sh; sourceTree = "<group>"; };
1EBA448314B97E22003CC0EA /* config.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = config.cc; path = ../../lm/config.cc; sourceTree = "<group>"; };
1EBA448414B97E22003CC0EA /* config.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = config.hh; path = ../../lm/config.hh; sourceTree = "<group>"; };
1EBA448514B97E22003CC0EA /* COPYING */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = COPYING; path = ../../lm/COPYING; sourceTree = "<group>"; };
1EBA448614B97E22003CC0EA /* COPYING.LESSER */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = COPYING.LESSER; path = ../../lm/COPYING.LESSER; sourceTree = "<group>"; };
1EBA448714B97E22003CC0EA /* enumerate_vocab.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = enumerate_vocab.hh; path = ../../lm/enumerate_vocab.hh; sourceTree = "<group>"; };
1EBA448814B97E22003CC0EA /* facade.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = facade.hh; path = ../../lm/facade.hh; sourceTree = "<group>"; };
1EBA448914B97E22003CC0EA /* Jamfile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.jam; name = Jamfile; path = ../../lm/Jamfile; sourceTree = "<group>"; };
1EBA448A14B97E22003CC0EA /* left_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = left_test.cc; path = ../../lm/left_test.cc; sourceTree = "<group>"; };
1EBA448B14B97E22003CC0EA /* left.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = left.hh; path = ../../lm/left.hh; sourceTree = "<group>"; };
1EBA448C14B97E22003CC0EA /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = LICENSE; path = ../../lm/LICENSE; sourceTree = "<group>"; };
1EBA448D14B97E22003CC0EA /* lm_exception.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = lm_exception.cc; path = ../../lm/lm_exception.cc; sourceTree = "<group>"; };
1EBA448E14B97E22003CC0EA /* lm_exception.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = lm_exception.hh; path = ../../lm/lm_exception.hh; sourceTree = "<group>"; };
1EBA448F14B97E22003CC0EA /* max_order.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = max_order.hh; path = ../../lm/max_order.hh; sourceTree = "<group>"; };
1EBA449014B97E22003CC0EA /* model_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = model_test.cc; path = ../../lm/model_test.cc; sourceTree = "<group>"; };
1EBA449114B97E22003CC0EA /* model_type.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = model_type.hh; path = ../../lm/model_type.hh; sourceTree = "<group>"; };
1EBA449214B97E22003CC0EA /* model.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = model.cc; path = ../../lm/model.cc; sourceTree = "<group>"; };
1EBA449314B97E22003CC0EA /* model.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = model.hh; path = ../../lm/model.hh; sourceTree = "<group>"; };
1EBA449414B97E22003CC0EA /* ngram_query.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ngram_query.cc; path = ../../lm/ngram_query.cc; sourceTree = "<group>"; };
1EBA449514B97E22003CC0EA /* ngram_query.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = ngram_query.hh; path = ../../lm/ngram_query.hh; sourceTree = "<group>"; };
1EBA449614B97E22003CC0EA /* quantize.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = quantize.cc; path = ../../lm/quantize.cc; sourceTree = "<group>"; };
1EBA449714B97E22003CC0EA /* quantize.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = quantize.hh; path = ../../lm/quantize.hh; sourceTree = "<group>"; };
1EBA449814B97E22003CC0EA /* read_arpa.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = read_arpa.cc; path = ../../lm/read_arpa.cc; sourceTree = "<group>"; };
1EBA449914B97E22003CC0EA /* read_arpa.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = read_arpa.hh; path = ../../lm/read_arpa.hh; sourceTree = "<group>"; };
1EBA449A14B97E22003CC0EA /* README */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = README; path = ../../lm/README; sourceTree = "<group>"; };
1EBA449B14B97E22003CC0EA /* return.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = return.hh; path = ../../lm/return.hh; sourceTree = "<group>"; };
1EBA449C14B97E22003CC0EA /* search_hashed.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = search_hashed.cc; path = ../../lm/search_hashed.cc; sourceTree = "<group>"; };
1EBA449D14B97E22003CC0EA /* search_hashed.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = search_hashed.hh; path = ../../lm/search_hashed.hh; sourceTree = "<group>"; };
1EBA449E14B97E22003CC0EA /* search_trie.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = search_trie.cc; path = ../../lm/search_trie.cc; sourceTree = "<group>"; };
1EBA449F14B97E22003CC0EA /* search_trie.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = search_trie.hh; path = ../../lm/search_trie.hh; sourceTree = "<group>"; };
1EBA44A014B97E22003CC0EA /* test_nounk.arpa */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = test_nounk.arpa; path = ../../lm/test_nounk.arpa; sourceTree = "<group>"; };
1EBA44A114B97E22003CC0EA /* test.arpa */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = test.arpa; path = ../../lm/test.arpa; sourceTree = "<group>"; };
1EBA44A214B97E22003CC0EA /* test.sh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.script.sh; name = test.sh; path = ../../lm/test.sh; sourceTree = "<group>"; };
1EBA44A314B97E22003CC0EA /* trie_sort.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = trie_sort.cc; path = ../../lm/trie_sort.cc; sourceTree = "<group>"; };
1EBA44A414B97E22003CC0EA /* trie_sort.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = trie_sort.hh; path = ../../lm/trie_sort.hh; sourceTree = "<group>"; };
1EBA44A514B97E22003CC0EA /* trie.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = trie.cc; path = ../../lm/trie.cc; sourceTree = "<group>"; };
1EBA44A614B97E22003CC0EA /* trie.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = trie.hh; path = ../../lm/trie.hh; sourceTree = "<group>"; };
1EBA44A714B97E22003CC0EA /* virtual_interface.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = virtual_interface.cc; path = ../../lm/virtual_interface.cc; sourceTree = "<group>"; };
1EBA44A814B97E22003CC0EA /* virtual_interface.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = virtual_interface.hh; path = ../../lm/virtual_interface.hh; sourceTree = "<group>"; };
1EBA44A914B97E22003CC0EA /* vocab.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = vocab.cc; path = ../../lm/vocab.cc; sourceTree = "<group>"; };
1EBA44AA14B97E22003CC0EA /* vocab.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = vocab.hh; path = ../../lm/vocab.hh; sourceTree = "<group>"; };
1EBA44AB14B97E22003CC0EA /* weights.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = weights.hh; path = ../../lm/weights.hh; sourceTree = "<group>"; };
1EBA44AC14B97E22003CC0EA /* word_index.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = word_index.hh; path = ../../lm/word_index.hh; sourceTree = "<group>"; };
1EBA453614B97E92003CC0EA /* bit_packing_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bit_packing_test.cc; path = ../../util/bit_packing_test.cc; sourceTree = "<group>"; };
1EBA453714B97E92003CC0EA /* bit_packing.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = bit_packing.cc; path = ../../util/bit_packing.cc; sourceTree = "<group>"; };
1EBA453814B97E92003CC0EA /* bit_packing.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = bit_packing.hh; path = ../../util/bit_packing.hh; sourceTree = "<group>"; };
1EBA453914B97E92003CC0EA /* check.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = check.hh; path = ../../util/check.hh; sourceTree = "<group>"; };
1EBA453A14B97E92003CC0EA /* COPYING */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = COPYING; path = ../../util/COPYING; sourceTree = "<group>"; };
1EBA453B14B97E92003CC0EA /* COPYING.LESSER */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = COPYING.LESSER; path = ../../util/COPYING.LESSER; sourceTree = "<group>"; };
1EBA453C14B97E92003CC0EA /* ersatz_progress.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ersatz_progress.cc; path = ../../util/ersatz_progress.cc; sourceTree = "<group>"; };
1EBA453D14B97E92003CC0EA /* ersatz_progress.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = ersatz_progress.hh; path = ../../util/ersatz_progress.hh; sourceTree = "<group>"; };
1EBA453E14B97E92003CC0EA /* exception.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = exception.cc; path = ../../util/exception.cc; sourceTree = "<group>"; };
1EBA453F14B97E92003CC0EA /* exception.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = exception.hh; path = ../../util/exception.hh; sourceTree = "<group>"; };
1EBA454014B97E92003CC0EA /* file_piece_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = file_piece_test.cc; path = ../../util/file_piece_test.cc; sourceTree = "<group>"; };
1EBA454114B97E92003CC0EA /* file_piece.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = file_piece.cc; path = ../../util/file_piece.cc; sourceTree = "<group>"; };
1EBA454214B97E92003CC0EA /* file_piece.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = file_piece.hh; path = ../../util/file_piece.hh; sourceTree = "<group>"; };
1EBA454314B97E92003CC0EA /* file.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = file.cc; path = ../../util/file.cc; sourceTree = "<group>"; };
1EBA454414B97E92003CC0EA /* file.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = file.hh; path = ../../util/file.hh; sourceTree = "<group>"; };
1EBA454514B97E92003CC0EA /* getopt.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = getopt.c; path = ../../util/getopt.c; sourceTree = "<group>"; };
1EBA454614B97E92003CC0EA /* getopt.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = getopt.hh; path = ../../util/getopt.hh; sourceTree = "<group>"; };
1EBA454714B97E92003CC0EA /* have.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = have.hh; path = ../../util/have.hh; sourceTree = "<group>"; };
1EBA454814B97E92003CC0EA /* Jamfile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.jam; name = Jamfile; path = ../../util/Jamfile; sourceTree = "<group>"; };
1EBA454914B97E92003CC0EA /* joint_sort_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = joint_sort_test.cc; path = ../../util/joint_sort_test.cc; sourceTree = "<group>"; };
1EBA454A14B97E92003CC0EA /* joint_sort.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = joint_sort.hh; path = ../../util/joint_sort.hh; sourceTree = "<group>"; };
1EBA454B14B97E92003CC0EA /* key_value_packing_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = key_value_packing_test.cc; path = ../../util/key_value_packing_test.cc; sourceTree = "<group>"; };
1EBA454C14B97E92003CC0EA /* key_value_packing.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = key_value_packing.hh; path = ../../util/key_value_packing.hh; sourceTree = "<group>"; };
1EBA454D14B97E92003CC0EA /* LICENSE */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; name = LICENSE; path = ../../util/LICENSE; sourceTree = "<group>"; };
1EBA454E14B97E92003CC0EA /* mmap.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mmap.cc; path = ../../util/mmap.cc; sourceTree = "<group>"; };
1EBA454F14B97E92003CC0EA /* mmap.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = mmap.hh; path = ../../util/mmap.hh; sourceTree = "<group>"; };
1EBA455014B97E92003CC0EA /* murmur_hash.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = murmur_hash.cc; path = ../../util/murmur_hash.cc; sourceTree = "<group>"; };
1EBA455114B97E92003CC0EA /* murmur_hash.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = murmur_hash.hh; path = ../../util/murmur_hash.hh; sourceTree = "<group>"; };
1EBA455214B97E92003CC0EA /* probing_hash_table_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = probing_hash_table_test.cc; path = ../../util/probing_hash_table_test.cc; sourceTree = "<group>"; };
1EBA455314B97E92003CC0EA /* probing_hash_table.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = probing_hash_table.hh; path = ../../util/probing_hash_table.hh; sourceTree = "<group>"; };
1EBA455414B97E92003CC0EA /* proxy_iterator.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = proxy_iterator.hh; path = ../../util/proxy_iterator.hh; sourceTree = "<group>"; };
1EBA455514B97E92003CC0EA /* scoped.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = scoped.hh; path = ../../util/scoped.hh; sourceTree = "<group>"; };
1EBA455614B97E92003CC0EA /* sized_iterator.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sized_iterator.hh; path = ../../util/sized_iterator.hh; sourceTree = "<group>"; };
1EBA455714B97E92003CC0EA /* sorted_uniform_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = sorted_uniform_test.cc; path = ../../util/sorted_uniform_test.cc; sourceTree = "<group>"; };
1EBA455814B97E92003CC0EA /* sorted_uniform.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = sorted_uniform.hh; path = ../../util/sorted_uniform.hh; sourceTree = "<group>"; };
1EBA455914B97E92003CC0EA /* string_piece.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = string_piece.hh; path = ../../util/string_piece.hh; sourceTree = "<group>"; };
1EBA455A14B97E92003CC0EA /* tokenize_piece_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = tokenize_piece_test.cc; path = ../../util/tokenize_piece_test.cc; sourceTree = "<group>"; };
1EBA455B14B97E92003CC0EA /* tokenize_piece.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; name = tokenize_piece.hh; path = ../../util/tokenize_piece.hh; sourceTree = "<group>"; };
1EBA455C14B97E92003CC0EA /* util.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = util.xcodeproj; path = ../../util/util.xcodeproj; sourceTree = "<group>"; };
1EE8C2E91476A48E002496F2 /* liblm.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblm.a; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
1EE8C2E61476A48E002496F2 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
1EBA44FB14B97E6A003CC0EA /* lm */ = {
isa = PBXGroup;
children = (
1EBA442B14B97E22003CC0EA /* bhiksha.cc */,
1EBA442C14B97E22003CC0EA /* bhiksha.hh */,
1EBA447D14B97E22003CC0EA /* binary_format.cc */,
1EBA447E14B97E22003CC0EA /* binary_format.hh */,
1EBA447F14B97E22003CC0EA /* blank.hh */,
1EBA448114B97E22003CC0EA /* clean.sh */,
1EBA448214B97E22003CC0EA /* compile.sh */,
1EBA448314B97E22003CC0EA /* config.cc */,
1EBA448414B97E22003CC0EA /* config.hh */,
1EBA448514B97E22003CC0EA /* COPYING */,
1EBA448614B97E22003CC0EA /* COPYING.LESSER */,
1EBA448714B97E22003CC0EA /* enumerate_vocab.hh */,
1EBA448814B97E22003CC0EA /* facade.hh */,
1EBA448914B97E22003CC0EA /* Jamfile */,
1EBA448A14B97E22003CC0EA /* left_test.cc */,
1EBA448B14B97E22003CC0EA /* left.hh */,
1EBA448C14B97E22003CC0EA /* LICENSE */,
1EBA448D14B97E22003CC0EA /* lm_exception.cc */,
1EBA448E14B97E22003CC0EA /* lm_exception.hh */,
1EBA448F14B97E22003CC0EA /* max_order.hh */,
1EBA449014B97E22003CC0EA /* model_test.cc */,
1EBA449114B97E22003CC0EA /* model_type.hh */,
1EBA449214B97E22003CC0EA /* model.cc */,
1EBA449314B97E22003CC0EA /* model.hh */,
1EBA449414B97E22003CC0EA /* ngram_query.cc */,
1EBA449514B97E22003CC0EA /* ngram_query.hh */,
1EBA449614B97E22003CC0EA /* quantize.cc */,
1EBA449714B97E22003CC0EA /* quantize.hh */,
1EBA449814B97E22003CC0EA /* read_arpa.cc */,
1EBA449914B97E22003CC0EA /* read_arpa.hh */,
1EBA449A14B97E22003CC0EA /* README */,
1EBA449B14B97E22003CC0EA /* return.hh */,
1EBA449C14B97E22003CC0EA /* search_hashed.cc */,
1EBA449D14B97E22003CC0EA /* search_hashed.hh */,
1EBA449E14B97E22003CC0EA /* search_trie.cc */,
1EBA449F14B97E22003CC0EA /* search_trie.hh */,
1EBA44A014B97E22003CC0EA /* test_nounk.arpa */,
1EBA44A114B97E22003CC0EA /* test.arpa */,
1EBA44A214B97E22003CC0EA /* test.sh */,
1EBA44A314B97E22003CC0EA /* trie_sort.cc */,
1EBA44A414B97E22003CC0EA /* trie_sort.hh */,
1EBA44A514B97E22003CC0EA /* trie.cc */,
1EBA44A614B97E22003CC0EA /* trie.hh */,
1EBA44A714B97E22003CC0EA /* virtual_interface.cc */,
1EBA44A814B97E22003CC0EA /* virtual_interface.hh */,
1EBA44A914B97E22003CC0EA /* vocab.cc */,
1EBA44AA14B97E22003CC0EA /* vocab.hh */,
1EBA44AB14B97E22003CC0EA /* weights.hh */,
1EBA44AC14B97E22003CC0EA /* word_index.hh */,
);
name = lm;
sourceTree = "<group>";
};
1EBA44FC14B97E81003CC0EA /* util */ = {
isa = PBXGroup;
children = (
1EBA453614B97E92003CC0EA /* bit_packing_test.cc */,
1EBA453714B97E92003CC0EA /* bit_packing.cc */,
1EBA453814B97E92003CC0EA /* bit_packing.hh */,
1EBA453914B97E92003CC0EA /* check.hh */,
1EBA453A14B97E92003CC0EA /* COPYING */,
1EBA453B14B97E92003CC0EA /* COPYING.LESSER */,
1EBA453C14B97E92003CC0EA /* ersatz_progress.cc */,
1EBA453D14B97E92003CC0EA /* ersatz_progress.hh */,
1EBA453E14B97E92003CC0EA /* exception.cc */,
1EBA453F14B97E92003CC0EA /* exception.hh */,
1EBA454014B97E92003CC0EA /* file_piece_test.cc */,
1EBA454114B97E92003CC0EA /* file_piece.cc */,
1EBA454214B97E92003CC0EA /* file_piece.hh */,
1EBA454314B97E92003CC0EA /* file.cc */,
1EBA454414B97E92003CC0EA /* file.hh */,
1EBA454514B97E92003CC0EA /* getopt.c */,
1EBA454614B97E92003CC0EA /* getopt.hh */,
1EBA454714B97E92003CC0EA /* have.hh */,
1EBA454814B97E92003CC0EA /* Jamfile */,
1EBA454914B97E92003CC0EA /* joint_sort_test.cc */,
1EBA454A14B97E92003CC0EA /* joint_sort.hh */,
1EBA454B14B97E92003CC0EA /* key_value_packing_test.cc */,
1EBA454C14B97E92003CC0EA /* key_value_packing.hh */,
1EBA454D14B97E92003CC0EA /* LICENSE */,
1EBA454E14B97E92003CC0EA /* mmap.cc */,
1EBA454F14B97E92003CC0EA /* mmap.hh */,
1EBA455014B97E92003CC0EA /* murmur_hash.cc */,
1EBA455114B97E92003CC0EA /* murmur_hash.hh */,
1EBA455214B97E92003CC0EA /* probing_hash_table_test.cc */,
1EBA455314B97E92003CC0EA /* probing_hash_table.hh */,
1EBA455414B97E92003CC0EA /* proxy_iterator.hh */,
1EBA455514B97E92003CC0EA /* scoped.hh */,
1EBA455614B97E92003CC0EA /* sized_iterator.hh */,
1EBA455714B97E92003CC0EA /* sorted_uniform_test.cc */,
1EBA455814B97E92003CC0EA /* sorted_uniform.hh */,
1EBA455914B97E92003CC0EA /* string_piece.hh */,
1EBA455A14B97E92003CC0EA /* tokenize_piece_test.cc */,
1EBA455B14B97E92003CC0EA /* tokenize_piece.hh */,
1EBA455C14B97E92003CC0EA /* util.xcodeproj */,
);
name = util;
sourceTree = "<group>";
};
1EBA455D14B97E92003CC0EA /* Products */ = {
isa = PBXGroup;
children = (
1EBA45A414B97E93003CC0EA /* libutil.a */,
);
name = Products;
sourceTree = "<group>";
};
1EE8C2DE1476A48E002496F2 = {
isa = PBXGroup;
children = (
1EBA44FC14B97E81003CC0EA /* util */,
1EBA44FB14B97E6A003CC0EA /* lm */,
1EE8C2EA1476A48E002496F2 /* Products */,
);
sourceTree = "<group>";
};
1EE8C2EA1476A48E002496F2 /* Products */ = {
isa = PBXGroup;
children = (
1EE8C2E91476A48E002496F2 /* liblm.a */,
);
name = Products;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
1EE8C2E71476A48E002496F2 /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
1EBA44AE14B97E22003CC0EA /* bhiksha.hh in Headers */,
1EBA44D514B97E22003CC0EA /* binary_format.hh in Headers */,
1EBA44D614B97E22003CC0EA /* blank.hh in Headers */,
1EBA44D914B97E22003CC0EA /* config.hh in Headers */,
1EBA44DA14B97E22003CC0EA /* enumerate_vocab.hh in Headers */,
1EBA44DB14B97E22003CC0EA /* facade.hh in Headers */,
1EBA44DE14B97E22003CC0EA /* left.hh in Headers */,
1EBA44E014B97E22003CC0EA /* lm_exception.hh in Headers */,
1EBA44E114B97E22003CC0EA /* max_order.hh in Headers */,
1EBA44E314B97E22003CC0EA /* model_type.hh in Headers */,
1EBA44E514B97E22003CC0EA /* model.hh in Headers */,
1EBA44E714B97E22003CC0EA /* ngram_query.hh in Headers */,
1EBA44E914B97E22003CC0EA /* quantize.hh in Headers */,
1EBA44EB14B97E22003CC0EA /* read_arpa.hh in Headers */,
1EBA44EC14B97E22003CC0EA /* return.hh in Headers */,
1EBA44EE14B97E22003CC0EA /* search_hashed.hh in Headers */,
1EBA44F014B97E22003CC0EA /* search_trie.hh in Headers */,
1EBA44F214B97E22003CC0EA /* trie_sort.hh in Headers */,
1EBA44F414B97E22003CC0EA /* trie.hh in Headers */,
1EBA44F614B97E22003CC0EA /* virtual_interface.hh in Headers */,
1EBA44F814B97E22003CC0EA /* vocab.hh in Headers */,
1EBA44F914B97E22003CC0EA /* weights.hh in Headers */,
1EBA44FA14B97E22003CC0EA /* word_index.hh in Headers */,
1EBA458114B97E92003CC0EA /* bit_packing.hh in Headers */,
1EBA458214B97E92003CC0EA /* check.hh in Headers */,
1EBA458414B97E92003CC0EA /* ersatz_progress.hh in Headers */,
1EBA458614B97E92003CC0EA /* exception.hh in Headers */,
1EBA458914B97E92003CC0EA /* file_piece.hh in Headers */,
1EBA458B14B97E92003CC0EA /* file.hh in Headers */,
1EBA458D14B97E92003CC0EA /* getopt.hh in Headers */,
1EBA458E14B97E92003CC0EA /* have.hh in Headers */,
1EBA459114B97E92003CC0EA /* joint_sort.hh in Headers */,
1EBA459314B97E92003CC0EA /* key_value_packing.hh in Headers */,
1EBA459514B97E92003CC0EA /* mmap.hh in Headers */,
1EBA459714B97E92003CC0EA /* murmur_hash.hh in Headers */,
1EBA459914B97E92003CC0EA /* probing_hash_table.hh in Headers */,
1EBA459A14B97E92003CC0EA /* proxy_iterator.hh in Headers */,
1EBA459B14B97E92003CC0EA /* scoped.hh in Headers */,
1EBA459C14B97E92003CC0EA /* sized_iterator.hh in Headers */,
1EBA459E14B97E92003CC0EA /* sorted_uniform.hh in Headers */,
1EBA459F14B97E92003CC0EA /* string_piece.hh in Headers */,
1EBA45A114B97E92003CC0EA /* tokenize_piece.hh in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
1EE8C2E81476A48E002496F2 /* lm */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1EE8C2ED1476A48E002496F2 /* Build configuration list for PBXNativeTarget "lm" */;
buildPhases = (
1EE8C2E51476A48E002496F2 /* Sources */,
1EE8C2E61476A48E002496F2 /* Frameworks */,
1EE8C2E71476A48E002496F2 /* Headers */,
);
buildRules = (
);
dependencies = (
);
name = lm;
productName = lm;
productReference = 1EE8C2E91476A48E002496F2 /* liblm.a */;
productType = "com.apple.product-type.library.static";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
1EE8C2E01476A48E002496F2 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1EE8C2E31476A48E002496F2 /* Build configuration list for PBXProject "lm" */;
compatibilityVersion = "Xcode 3.2";
developmentRegion = English;
hasScannedForEncodings = 0;
knownRegions = (
en,
);
mainGroup = 1EE8C2DE1476A48E002496F2;
productRefGroup = 1EE8C2EA1476A48E002496F2 /* Products */;
projectDirPath = "";
projectReferences = (
{
ProductGroup = 1EBA455D14B97E92003CC0EA /* Products */;
ProjectRef = 1EBA455C14B97E92003CC0EA /* util.xcodeproj */;
},
);
projectRoot = "";
targets = (
1EE8C2E81476A48E002496F2 /* lm */,
);
};
/* End PBXProject section */
/* Begin PBXReferenceProxy section */
1EBA45A414B97E93003CC0EA /* libutil.a */ = {
isa = PBXReferenceProxy;
fileType = archive.ar;
path = libutil.a;
remoteRef = 1EBA45A314B97E93003CC0EA /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
/* End PBXReferenceProxy section */
/* Begin PBXSourcesBuildPhase section */
1EE8C2E51476A48E002496F2 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1EBA44AD14B97E22003CC0EA /* bhiksha.cc in Sources */,
1EBA44D414B97E22003CC0EA /* binary_format.cc in Sources */,
1EBA44D814B97E22003CC0EA /* config.cc in Sources */,
1EBA44DC14B97E22003CC0EA /* Jamfile in Sources */,
1EBA44DD14B97E22003CC0EA /* left_test.cc in Sources */,
1EBA44DF14B97E22003CC0EA /* lm_exception.cc in Sources */,
1EBA44E214B97E22003CC0EA /* model_test.cc in Sources */,
1EBA44E414B97E22003CC0EA /* model.cc in Sources */,
1EBA44E614B97E22003CC0EA /* ngram_query.cc in Sources */,
1EBA44E814B97E22003CC0EA /* quantize.cc in Sources */,
1EBA44EA14B97E22003CC0EA /* read_arpa.cc in Sources */,
1EBA44ED14B97E22003CC0EA /* search_hashed.cc in Sources */,
1EBA44EF14B97E22003CC0EA /* search_trie.cc in Sources */,
1EBA44F114B97E22003CC0EA /* trie_sort.cc in Sources */,
1EBA44F314B97E22003CC0EA /* trie.cc in Sources */,
1EBA44F514B97E22003CC0EA /* virtual_interface.cc in Sources */,
1EBA44F714B97E22003CC0EA /* vocab.cc in Sources */,
1EBA457F14B97E92003CC0EA /* bit_packing_test.cc in Sources */,
1EBA458014B97E92003CC0EA /* bit_packing.cc in Sources */,
1EBA458314B97E92003CC0EA /* ersatz_progress.cc in Sources */,
1EBA458514B97E92003CC0EA /* exception.cc in Sources */,
1EBA458714B97E92003CC0EA /* file_piece_test.cc in Sources */,
1EBA458814B97E92003CC0EA /* file_piece.cc in Sources */,
1EBA458A14B97E92003CC0EA /* file.cc in Sources */,
1EBA458C14B97E92003CC0EA /* getopt.c in Sources */,
1EBA458F14B97E92003CC0EA /* Jamfile in Sources */,
1EBA459014B97E92003CC0EA /* joint_sort_test.cc in Sources */,
1EBA459214B97E92003CC0EA /* key_value_packing_test.cc in Sources */,
1EBA459414B97E92003CC0EA /* mmap.cc in Sources */,
1EBA459614B97E92003CC0EA /* murmur_hash.cc in Sources */,
1EBA459814B97E92003CC0EA /* probing_hash_table_test.cc in Sources */,
1EBA459D14B97E92003CC0EA /* sorted_uniform_test.cc in Sources */,
1EBA45A014B97E92003CC0EA /* tokenize_piece_test.cc in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
1EE8C2EB1476A48E002496F2 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
COPY_PHASE_STRIP = NO;
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_DYNAMIC_NO_PIC = NO;
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
../..,
/opt/local/include,
);
MACOSX_DEPLOYMENT_TARGET = 10.7;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = macosx;
};
name = Debug;
};
1EE8C2EC1476A48E002496F2 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
COPY_PHASE_STRIP = YES;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
../..,
/opt/local/include,
);
MACOSX_DEPLOYMENT_TARGET = 10.7;
SDKROOT = macosx;
};
name = Release;
};
1EE8C2EE1476A48E002496F2 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
EXECUTABLE_PREFIX = lib;
LIBRARY_SEARCH_PATHS = (
"$(inherited)",
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/link-static/threading-multi\"",
"\"$(SRCROOT)/../../lm/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
"\"$(SRCROOT)/../../util/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
"\"$(SRCROOT)/../../util/bin/darwin-4.2.1/release/link-static/threading-multi\"",
"\"$(SRCROOT)/../../util/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
);
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Debug;
};
1EE8C2EF1476A48E002496F2 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
EXECUTABLE_PREFIX = lib;
LIBRARY_SEARCH_PATHS = (
"$(inherited)",
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/link-static/threading-multi\"",
"\"$(SRCROOT)/../../lm/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
"\"$(SRCROOT)/../../util/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
"\"$(SRCROOT)/../../util/bin/darwin-4.2.1/release/link-static/threading-multi\"",
"\"$(SRCROOT)/../../util/bin/gcc-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
);
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1EE8C2E31476A48E002496F2 /* Build configuration list for PBXProject "lm" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1EE8C2EB1476A48E002496F2 /* Debug */,
1EE8C2EC1476A48E002496F2 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1EE8C2ED1476A48E002496F2 /* Build configuration list for PBXNativeTarget "lm" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1EE8C2EE1476A48E002496F2 /* Debug */,
1EE8C2EF1476A48E002496F2 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 1EE8C2E01476A48E002496F2 /* Project object */;
}

View File

@ -7,58 +7,58 @@
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
1E87F09311BDCD2E0033951C /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E87F08D11BDCD1B0033951C /* libmoses.a */; }; 1EAF9DC614B9F8CD005E8EBD /* liblm.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EAF9DC314B9F8BA005E8EBD /* liblm.a */; };
1E9DA31511BDC84A00F4DBD1 /* IOWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DA30A11BDC84A00F4DBD1 /* IOWrapper.cpp */; }; 1EAF9DC714B9F8CD005E8EBD /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EAF9DAD14B9F8AD005E8EBD /* libmoses.a */; };
1E9DA31611BDC84A00F4DBD1 /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DA30C11BDC84A00F4DBD1 /* Main.cpp */; }; 1EAF9DC814B9F8CD005E8EBD /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EAF9DB614B9F8B1005E8EBD /* libOnDiskPt.a */; };
1E9DA31811BDC84A00F4DBD1 /* mbr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DA31111BDC84A00F4DBD1 /* mbr.cpp */; }; 1EAF9DCA14B9F8CD005E8EBD /* libutil.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EAF9DC914B9F8CD005E8EBD /* libutil.a */; };
1E9DA31911BDC84A00F4DBD1 /* TranslationAnalysis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E9DA31311BDC84A00F4DBD1 /* TranslationAnalysis.cpp */; }; 1EF0719F14B9F1D40052152A /* IOWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF0718A14B9F1D40052152A /* IOWrapper.cpp */; };
1E9DA35011BDC97100F4DBD1 /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E9DA34F11BDC96A00F4DBD1 /* libOnDiskPt.a */; }; 1EF071A214B9F1D40052152A /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF0718E14B9F1D40052152A /* Main.cpp */; };
1EE8C40B1476ABEC002496F2 /* liblm.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EE8C3EC1476AB9B002496F2 /* liblm.a */; }; 1EF071A414B9F1D40052152A /* mbr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF0719114B9F1D40052152A /* mbr.cpp */; };
1EE8C40C1476ABEC002496F2 /* libutil.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EE8C3F31476ABAF002496F2 /* libutil.a */; }; 1EF071A614B9F1D40052152A /* TranslationAnalysis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF0719414B9F1D40052152A /* TranslationAnalysis.cpp */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */ /* Begin PBXContainerItemProxy section */
1E87F08C11BDCD1B0033951C /* PBXContainerItemProxy */ = { 1EAF9DAC14B9F8AD005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1E9DA33311BDC8BB00F4DBD1 /* moses.xcodeproj */; containerPortal = 1EAF9DA514B9F8AD005E8EBD /* moses.xcodeproj */;
proxyType = 2; proxyType = 2;
remoteGlobalIDString = D2AAC046055464E500DB518D; remoteGlobalIDString = D2AAC046055464E500DB518D;
remoteInfo = moses; remoteInfo = moses;
}; };
1E87F09411BDCD390033951C /* PBXContainerItemProxy */ = { 1EAF9DB514B9F8B1005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1E9DA33311BDC8BB00F4DBD1 /* moses.xcodeproj */; containerPortal = 1EAF9DAE14B9F8B1005E8EBD /* OnDiskPt.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = moses;
};
1E9DA34E11BDC96A00F4DBD1 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1E9DA34A11BDC96A00F4DBD1 /* OnDiskPt.xcodeproj */;
proxyType = 2; proxyType = 2;
remoteGlobalIDString = D2AAC046055464E500DB518D; remoteGlobalIDString = D2AAC046055464E500DB518D;
remoteInfo = OnDiskPt; remoteInfo = OnDiskPt;
}; };
1E9DA36311BDC9B200F4DBD1 /* PBXContainerItemProxy */ = { 1EAF9DC214B9F8BA005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1E9DA34A11BDC96A00F4DBD1 /* OnDiskPt.xcodeproj */; containerPortal = 1EAF9DB714B9F8B9005E8EBD /* lm.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = OnDiskPt;
};
1EE8C3EB1476AB9B002496F2 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EE8C3E71476AB9B002496F2 /* lm.xcodeproj */;
proxyType = 2; proxyType = 2;
remoteGlobalIDString = 1EE8C2E91476A48E002496F2; remoteGlobalIDString = 1EE8C2E91476A48E002496F2;
remoteInfo = lm; remoteInfo = lm;
}; };
1EE8C3F21476ABAF002496F2 /* PBXContainerItemProxy */ = { 1EAF9DCB14B9F8D6005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1EE8C3EE1476ABAE002496F2 /* util.xcodeproj */; containerPortal = 1EAF9DAE14B9F8B1005E8EBD /* OnDiskPt.xcodeproj */;
proxyType = 2; proxyType = 1;
remoteGlobalIDString = 1EE8C2711476A262002496F2; remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = util; remoteInfo = OnDiskPt;
};
1EAF9DCD14B9F8D6005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EAF9DA514B9F8AD005E8EBD /* moses.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = moses;
};
1EAF9DCF14B9F8D6005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EAF9DB714B9F8B9005E8EBD /* lm.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = 1EE8C2E81476A48E002496F2;
remoteInfo = lm;
}; };
/* End PBXContainerItemProxy section */ /* End PBXContainerItemProxy section */
@ -75,18 +75,18 @@
/* End PBXCopyFilesBuildPhase section */ /* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
1E9DA30A11BDC84A00F4DBD1 /* IOWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IOWrapper.cpp; path = src/IOWrapper.cpp; sourceTree = "<group>"; }; 1EAF9DA514B9F8AD005E8EBD /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
1E9DA30B11BDC84A00F4DBD1 /* IOWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IOWrapper.h; path = src/IOWrapper.h; sourceTree = "<group>"; }; 1EAF9DAE14B9F8B1005E8EBD /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = OnDiskPt.xcodeproj; sourceTree = "<group>"; };
1E9DA30C11BDC84A00F4DBD1 /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = src/Main.cpp; sourceTree = "<group>"; }; 1EAF9DB714B9F8B9005E8EBD /* lm.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = lm.xcodeproj; sourceTree = "<group>"; };
1E9DA30D11BDC84A00F4DBD1 /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Main.h; path = src/Main.h; sourceTree = "<group>"; }; 1EAF9DC914B9F8CD005E8EBD /* libutil.a */ = {isa = PBXFileReference; lastKnownFileType = file; name = libutil.a; path = ../../util/build/Release/libutil.a; sourceTree = "<group>"; };
1E9DA31111BDC84A00F4DBD1 /* mbr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mbr.cpp; path = src/mbr.cpp; sourceTree = "<group>"; }; 1EF0718A14B9F1D40052152A /* IOWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IOWrapper.cpp; path = "../../moses-chart-cmd/src/IOWrapper.cpp"; sourceTree = "<group>"; };
1E9DA31211BDC84A00F4DBD1 /* mbr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mbr.h; path = src/mbr.h; sourceTree = "<group>"; }; 1EF0718B14B9F1D40052152A /* IOWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IOWrapper.h; path = "../../moses-chart-cmd/src/IOWrapper.h"; sourceTree = "<group>"; };
1E9DA31311BDC84A00F4DBD1 /* TranslationAnalysis.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationAnalysis.cpp; path = src/TranslationAnalysis.cpp; sourceTree = "<group>"; }; 1EF0718E14B9F1D40052152A /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = "../../moses-chart-cmd/src/Main.cpp"; sourceTree = "<group>"; };
1E9DA31411BDC84A00F4DBD1 /* TranslationAnalysis.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TranslationAnalysis.h; path = src/TranslationAnalysis.h; sourceTree = "<group>"; }; 1EF0718F14B9F1D40052152A /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Main.h; path = "../../moses-chart-cmd/src/Main.h"; sourceTree = "<group>"; };
1E9DA33311BDC8BB00F4DBD1 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; }; 1EF0719114B9F1D40052152A /* mbr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mbr.cpp; path = "../../moses-chart-cmd/src/mbr.cpp"; sourceTree = "<group>"; };
1E9DA34A11BDC96A00F4DBD1 /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; }; 1EF0719214B9F1D40052152A /* mbr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mbr.h; path = "../../moses-chart-cmd/src/mbr.h"; sourceTree = "<group>"; };
1EE8C3E71476AB9B002496F2 /* lm.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = lm.xcodeproj; path = ../lm/lm.xcodeproj; sourceTree = "<group>"; }; 1EF0719414B9F1D40052152A /* TranslationAnalysis.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationAnalysis.cpp; path = "../../moses-chart-cmd/src/TranslationAnalysis.cpp"; sourceTree = "<group>"; };
1EE8C3EE1476ABAE002496F2 /* util.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = util.xcodeproj; path = ../util/util.xcodeproj; sourceTree = "<group>"; }; 1EF0719514B9F1D40052152A /* TranslationAnalysis.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TranslationAnalysis.h; path = "../../moses-chart-cmd/src/TranslationAnalysis.h"; sourceTree = "<group>"; };
8DD76F6C0486A84900D96B5E /* moses-chart-cmd */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "moses-chart-cmd"; sourceTree = BUILT_PRODUCTS_DIR; }; 8DD76F6C0486A84900D96B5E /* moses-chart-cmd */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "moses-chart-cmd"; sourceTree = BUILT_PRODUCTS_DIR; };
/* End PBXFileReference section */ /* End PBXFileReference section */
@ -95,10 +95,10 @@
isa = PBXFrameworksBuildPhase; isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1EE8C40B1476ABEC002496F2 /* liblm.a in Frameworks */, 1EAF9DCA14B9F8CD005E8EBD /* libutil.a in Frameworks */,
1EE8C40C1476ABEC002496F2 /* libutil.a in Frameworks */, 1EAF9DC614B9F8CD005E8EBD /* liblm.a in Frameworks */,
1E87F09311BDCD2E0033951C /* libmoses.a in Frameworks */, 1EAF9DC714B9F8CD005E8EBD /* libmoses.a in Frameworks */,
1E9DA35011BDC97100F4DBD1 /* libOnDiskPt.a in Frameworks */, 1EAF9DC814B9F8CD005E8EBD /* libOnDiskPt.a in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
@ -108,13 +108,13 @@
08FB7794FE84155DC02AAC07 /* moses-chart-cmd */ = { 08FB7794FE84155DC02AAC07 /* moses-chart-cmd */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1EE8C3EE1476ABAE002496F2 /* util.xcodeproj */, 1EAF9DC914B9F8CD005E8EBD /* libutil.a */,
1EE8C3E71476AB9B002496F2 /* lm.xcodeproj */,
1E9DA34A11BDC96A00F4DBD1 /* OnDiskPt.xcodeproj */,
1E9DA33311BDC8BB00F4DBD1 /* moses.xcodeproj */,
08FB7795FE84155DC02AAC07 /* Source */, 08FB7795FE84155DC02AAC07 /* Source */,
C6859E8C029090F304C91782 /* Documentation */, C6859E8C029090F304C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */, 1AB674ADFE9D54B511CA2CBB /* Products */,
1EAF9DB714B9F8B9005E8EBD /* lm.xcodeproj */,
1EAF9DA514B9F8AD005E8EBD /* moses.xcodeproj */,
1EAF9DAE14B9F8B1005E8EBD /* OnDiskPt.xcodeproj */,
); );
name = "moses-chart-cmd"; name = "moses-chart-cmd";
sourceTree = "<group>"; sourceTree = "<group>";
@ -122,14 +122,14 @@
08FB7795FE84155DC02AAC07 /* Source */ = { 08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1E9DA30A11BDC84A00F4DBD1 /* IOWrapper.cpp */, 1EF0718A14B9F1D40052152A /* IOWrapper.cpp */,
1E9DA30B11BDC84A00F4DBD1 /* IOWrapper.h */, 1EF0718B14B9F1D40052152A /* IOWrapper.h */,
1E9DA30C11BDC84A00F4DBD1 /* Main.cpp */, 1EF0718E14B9F1D40052152A /* Main.cpp */,
1E9DA30D11BDC84A00F4DBD1 /* Main.h */, 1EF0718F14B9F1D40052152A /* Main.h */,
1E9DA31111BDC84A00F4DBD1 /* mbr.cpp */, 1EF0719114B9F1D40052152A /* mbr.cpp */,
1E9DA31211BDC84A00F4DBD1 /* mbr.h */, 1EF0719214B9F1D40052152A /* mbr.h */,
1E9DA31311BDC84A00F4DBD1 /* TranslationAnalysis.cpp */, 1EF0719414B9F1D40052152A /* TranslationAnalysis.cpp */,
1E9DA31411BDC84A00F4DBD1 /* TranslationAnalysis.h */, 1EF0719514B9F1D40052152A /* TranslationAnalysis.h */,
); );
name = Source; name = Source;
sourceTree = "<group>"; sourceTree = "<group>";
@ -142,34 +142,26 @@
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1E9DA33411BDC8BB00F4DBD1 /* Products */ = { 1EAF9DA614B9F8AD005E8EBD /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1E87F08D11BDCD1B0033951C /* libmoses.a */, 1EAF9DAD14B9F8AD005E8EBD /* libmoses.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1E9DA34B11BDC96A00F4DBD1 /* Products */ = { 1EAF9DAF14B9F8B1005E8EBD /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1E9DA34F11BDC96A00F4DBD1 /* libOnDiskPt.a */, 1EAF9DB614B9F8B1005E8EBD /* libOnDiskPt.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1EE8C3E81476AB9B002496F2 /* Products */ = { 1EAF9DB814B9F8B9005E8EBD /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1EE8C3EC1476AB9B002496F2 /* liblm.a */, 1EAF9DC314B9F8BA005E8EBD /* liblm.a */,
);
name = Products;
sourceTree = "<group>";
};
1EE8C3EF1476ABAE002496F2 /* Products */ = {
isa = PBXGroup;
children = (
1EE8C3F31476ABAF002496F2 /* libutil.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
@ -195,8 +187,9 @@
buildRules = ( buildRules = (
); );
dependencies = ( dependencies = (
1E9DA36411BDC9B200F4DBD1 /* PBXTargetDependency */, 1EAF9DCC14B9F8D6005E8EBD /* PBXTargetDependency */,
1E87F09511BDCD390033951C /* PBXTargetDependency */, 1EAF9DCE14B9F8D6005E8EBD /* PBXTargetDependency */,
1EAF9DD014B9F8D6005E8EBD /* PBXTargetDependency */,
); );
name = "moses-chart-cmd"; name = "moses-chart-cmd";
productInstallPath = "$(HOME)/bin"; productInstallPath = "$(HOME)/bin";
@ -223,20 +216,16 @@
projectDirPath = ""; projectDirPath = "";
projectReferences = ( projectReferences = (
{ {
ProductGroup = 1EE8C3E81476AB9B002496F2 /* Products */; ProductGroup = 1EAF9DB814B9F8B9005E8EBD /* Products */;
ProjectRef = 1EE8C3E71476AB9B002496F2 /* lm.xcodeproj */; ProjectRef = 1EAF9DB714B9F8B9005E8EBD /* lm.xcodeproj */;
}, },
{ {
ProductGroup = 1E9DA33411BDC8BB00F4DBD1 /* Products */; ProductGroup = 1EAF9DA614B9F8AD005E8EBD /* Products */;
ProjectRef = 1E9DA33311BDC8BB00F4DBD1 /* moses.xcodeproj */; ProjectRef = 1EAF9DA514B9F8AD005E8EBD /* moses.xcodeproj */;
}, },
{ {
ProductGroup = 1E9DA34B11BDC96A00F4DBD1 /* Products */; ProductGroup = 1EAF9DAF14B9F8B1005E8EBD /* Products */;
ProjectRef = 1E9DA34A11BDC96A00F4DBD1 /* OnDiskPt.xcodeproj */; ProjectRef = 1EAF9DAE14B9F8B1005E8EBD /* OnDiskPt.xcodeproj */;
},
{
ProductGroup = 1EE8C3EF1476ABAE002496F2 /* Products */;
ProjectRef = 1EE8C3EE1476ABAE002496F2 /* util.xcodeproj */;
}, },
); );
projectRoot = ""; projectRoot = "";
@ -247,32 +236,25 @@
/* End PBXProject section */ /* End PBXProject section */
/* Begin PBXReferenceProxy section */ /* Begin PBXReferenceProxy section */
1E87F08D11BDCD1B0033951C /* libmoses.a */ = { 1EAF9DAD14B9F8AD005E8EBD /* libmoses.a */ = {
isa = PBXReferenceProxy; isa = PBXReferenceProxy;
fileType = archive.ar; fileType = archive.ar;
path = libmoses.a; path = libmoses.a;
remoteRef = 1E87F08C11BDCD1B0033951C /* PBXContainerItemProxy */; remoteRef = 1EAF9DAC14B9F8AD005E8EBD /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR; sourceTree = BUILT_PRODUCTS_DIR;
}; };
1E9DA34F11BDC96A00F4DBD1 /* libOnDiskPt.a */ = { 1EAF9DB614B9F8B1005E8EBD /* libOnDiskPt.a */ = {
isa = PBXReferenceProxy; isa = PBXReferenceProxy;
fileType = archive.ar; fileType = archive.ar;
path = libOnDiskPt.a; path = libOnDiskPt.a;
remoteRef = 1E9DA34E11BDC96A00F4DBD1 /* PBXContainerItemProxy */; remoteRef = 1EAF9DB514B9F8B1005E8EBD /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR; sourceTree = BUILT_PRODUCTS_DIR;
}; };
1EE8C3EC1476AB9B002496F2 /* liblm.a */ = { 1EAF9DC314B9F8BA005E8EBD /* liblm.a */ = {
isa = PBXReferenceProxy; isa = PBXReferenceProxy;
fileType = archive.ar; fileType = archive.ar;
path = liblm.a; path = liblm.a;
remoteRef = 1EE8C3EB1476AB9B002496F2 /* PBXContainerItemProxy */; remoteRef = 1EAF9DC214B9F8BA005E8EBD /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
1EE8C3F31476ABAF002496F2 /* libutil.a */ = {
isa = PBXReferenceProxy;
fileType = archive.ar;
path = libutil.a;
remoteRef = 1EE8C3F21476ABAF002496F2 /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR; sourceTree = BUILT_PRODUCTS_DIR;
}; };
/* End PBXReferenceProxy section */ /* End PBXReferenceProxy section */
@ -282,25 +264,30 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1E9DA31511BDC84A00F4DBD1 /* IOWrapper.cpp in Sources */, 1EF0719F14B9F1D40052152A /* IOWrapper.cpp in Sources */,
1E9DA31611BDC84A00F4DBD1 /* Main.cpp in Sources */, 1EF071A214B9F1D40052152A /* Main.cpp in Sources */,
1E9DA31811BDC84A00F4DBD1 /* mbr.cpp in Sources */, 1EF071A414B9F1D40052152A /* mbr.cpp in Sources */,
1E9DA31911BDC84A00F4DBD1 /* TranslationAnalysis.cpp in Sources */, 1EF071A614B9F1D40052152A /* TranslationAnalysis.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
/* End PBXSourcesBuildPhase section */ /* End PBXSourcesBuildPhase section */
/* Begin PBXTargetDependency section */ /* Begin PBXTargetDependency section */
1E87F09511BDCD390033951C /* PBXTargetDependency */ = { 1EAF9DCC14B9F8D6005E8EBD /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = moses;
targetProxy = 1E87F09411BDCD390033951C /* PBXContainerItemProxy */;
};
1E9DA36411BDC9B200F4DBD1 /* PBXTargetDependency */ = {
isa = PBXTargetDependency; isa = PBXTargetDependency;
name = OnDiskPt; name = OnDiskPt;
targetProxy = 1E9DA36311BDC9B200F4DBD1 /* PBXContainerItemProxy */; targetProxy = 1EAF9DCB14B9F8D6005E8EBD /* PBXContainerItemProxy */;
};
1EAF9DCE14B9F8D6005E8EBD /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = moses;
targetProxy = 1EAF9DCD14B9F8D6005E8EBD /* PBXContainerItemProxy */;
};
1EAF9DD014B9F8D6005E8EBD /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = lm;
targetProxy = 1EAF9DCF14B9F8D6005E8EBD /* PBXContainerItemProxy */;
}; };
/* End PBXTargetDependency section */ /* End PBXTargetDependency section */
@ -319,17 +306,11 @@
_LARGE_FILES, _LARGE_FILES,
"_FILE_OFFSET_BITS=64", "_FILE_OFFSET_BITS=64",
); );
HEADER_SEARCH_PATHS = ( HEADER_SEARCH_PATHS = /opt/local/include;
..,
../moses/src,
../kenlm,
/opt/local/include,
);
INSTALL_PATH = /usr/local/bin; INSTALL_PATH = /usr/local/bin;
LIBRARY_SEARCH_PATHS = ( LIBRARY_SEARCH_PATHS = (
../irstlm/lib, ../../irstlm/lib,
../srilm/lib/macosx, ../../srilm/lib/macosx,
../kenlm,
); );
OTHER_LDFLAGS = ( OTHER_LDFLAGS = (
"-lz", "-lz",
@ -341,6 +322,7 @@
"-llattice", "-llattice",
); );
PRODUCT_NAME = "moses-chart-cmd"; PRODUCT_NAME = "moses-chart-cmd";
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
}; };
name = Debug; name = Debug;
}; };
@ -355,17 +337,11 @@
_LARGE_FILES, _LARGE_FILES,
"_FILE_OFFSET_BITS=64", "_FILE_OFFSET_BITS=64",
); );
HEADER_SEARCH_PATHS = ( HEADER_SEARCH_PATHS = /opt/local/include;
..,
../moses/src,
../kenlm,
/opt/local/include,
);
INSTALL_PATH = /usr/local/bin; INSTALL_PATH = /usr/local/bin;
LIBRARY_SEARCH_PATHS = ( LIBRARY_SEARCH_PATHS = (
../irstlm/lib, ../../irstlm/lib,
../srilm/lib/macosx, ../../srilm/lib/macosx,
../kenlm,
); );
OTHER_LDFLAGS = ( OTHER_LDFLAGS = (
"-lz", "-lz",
@ -377,6 +353,7 @@
"-llattice", "-llattice",
); );
PRODUCT_NAME = "moses-chart-cmd"; PRODUCT_NAME = "moses-chart-cmd";
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
}; };
name = Release; name = Release;
}; };
@ -390,8 +367,9 @@
GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES; GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = ( HEADER_SEARCH_PATHS = (
../moses/src, ../../moses/src,
../, ../..,
"/Users/hieuhoang/workspace/github/moses-smt/moses/src/**",
); );
ONLY_ACTIVE_ARCH = YES; ONLY_ACTIVE_ARCH = YES;
PREBINDING = NO; PREBINDING = NO;
@ -408,8 +386,9 @@
GCC_WARN_ABOUT_RETURN_TYPE = YES; GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES; GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = ( HEADER_SEARCH_PATHS = (
../moses/src, ../../moses/src,
../, ../..,
"/Users/hieuhoang/workspace/github/moses-smt/moses/src/**",
); );
ONLY_ACTIVE_ARCH = YES; ONLY_ACTIVE_ARCH = YES;
PREBINDING = NO; PREBINDING = NO;

View File

@ -7,63 +7,60 @@
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
03306D820C0B249A00CA1311 /* mbr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 03306D800C0B249A00CA1311 /* mbr.cpp */; }; 1EAF9D7A14B9F566005E8EBD /* IOWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAF9D5B14B9F566005E8EBD /* IOWrapper.cpp */; };
03306D830C0B249A00CA1311 /* mbr.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 03306D810C0B249A00CA1311 /* mbr.h */; }; 1EAF9D7C14B9F566005E8EBD /* Jamfile in Sources */ = {isa = PBXBuildFile; fileRef = 1EAF9D5E14B9F566005E8EBD /* Jamfile */; };
1C8CFF4D0AD68D3600FA22E2 /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFF450AD68D3600FA22E2 /* Main.cpp */; }; 1EAF9D7D14B9F566005E8EBD /* LatticeMBR.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAF9D5F14B9F566005E8EBD /* LatticeMBR.cpp */; };
1C8CFF4E0AD68D3600FA22E2 /* Main.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 1C8CFF460AD68D3600FA22E2 /* Main.h */; }; 1EAF9D8114B9F566005E8EBD /* Main.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAF9D6414B9F566005E8EBD /* Main.cpp */; };
1C8CFF4F0AD68D3600FA22E2 /* TranslationAnalysis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1C8CFF470AD68D3600FA22E2 /* TranslationAnalysis.cpp */; }; 1EAF9D8314B9F566005E8EBD /* mbr.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAF9D6714B9F566005E8EBD /* mbr.cpp */; };
1C8CFF500AD68D3600FA22E2 /* TranslationAnalysis.h in CopyFiles */ = {isa = PBXBuildFile; fileRef = 1C8CFF480AD68D3600FA22E2 /* TranslationAnalysis.h */; }; 1EAF9D8514B9F566005E8EBD /* TranslationAnalysis.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAF9D6A14B9F566005E8EBD /* TranslationAnalysis.cpp */; };
1CE646E411679F6900EC77CC /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1CE646E311679F5F00EC77CC /* libOnDiskPt.a */; }; 1EAF9D9D14B9F7B6005E8EBD /* libOnDiskPt.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EAF9D9C14B9F7A6005E8EBD /* libOnDiskPt.a */; };
1EE8C2DD1476A3F2002496F2 /* libutil.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EE8C2DC1476A34A002496F2 /* libutil.a */; }; 1EF070A914B9F0380052152A /* liblm.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EF0708A14B9EF070052152A /* liblm.a */; };
1EE8C3C91476AB64002496F2 /* liblm.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EE8C3C81476AB3C002496F2 /* liblm.a */; }; 1EF070AA14B9F0380052152A /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EF0709014B9EF0E0052152A /* libmoses.a */; };
B219B8540E93812700EAB407 /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 03306D670C0B240B00CA1311 /* libmoses.a */; };
B219B8580E9381AC00EAB407 /* IOWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B219B8560E9381AC00EAB407 /* IOWrapper.cpp */; };
B28B1ED3110F52BB00AAD188 /* LatticeMBR.cpp in Sources */ = {isa = PBXBuildFile; fileRef = B28B1ED2110F52BB00AAD188 /* LatticeMBR.cpp */; };
/* End PBXBuildFile section */ /* End PBXBuildFile section */
/* Begin PBXContainerItemProxy section */ /* Begin PBXContainerItemProxy section */
03306D660C0B240B00CA1311 /* PBXContainerItemProxy */ = { 1EAF9D9B14B9F7A6005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 03306D5F0C0B240B00CA1311 /* moses.xcodeproj */; containerPortal = 1EAF9D9414B9F7A6005E8EBD /* OnDiskPt.xcodeproj */;
proxyType = 2;
remoteGlobalIDString = D2AAC046055464E500DB518D;
remoteInfo = moses;
};
03306D770C0B244800CA1311 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 03306D5F0C0B240B00CA1311 /* moses.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = moses;
};
1CE646E211679F5F00EC77CC /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1CE646DB11679F5F00EC77CC /* OnDiskPt.xcodeproj */;
proxyType = 2; proxyType = 2;
remoteGlobalIDString = D2AAC046055464E500DB518D; remoteGlobalIDString = D2AAC046055464E500DB518D;
remoteInfo = OnDiskPt; remoteInfo = OnDiskPt;
}; };
1CE6472D1167A11600EC77CC /* PBXContainerItemProxy */ = { 1EAF9D9E14B9F7BD005E8EBD /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1CE646DB11679F5F00EC77CC /* OnDiskPt.xcodeproj */; containerPortal = 1EAF9D9414B9F7A6005E8EBD /* OnDiskPt.xcodeproj */;
proxyType = 1; proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D; remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = OnDiskPt; remoteInfo = OnDiskPt;
}; };
1EE8C2DB1476A34A002496F2 /* PBXContainerItemProxy */ = { 1EF0708914B9EF070052152A /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy; isa = PBXContainerItemProxy;
containerPortal = 1EE8C2D41476A34A002496F2 /* util.xcodeproj */; containerPortal = 1EF0708514B9EF070052152A /* lm.xcodeproj */;
proxyType = 2;
remoteGlobalIDString = 1EE8C2711476A262002496F2;
remoteInfo = util;
};
1EE8C3C71476AB3C002496F2 /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EE8C3C01476AB3C002496F2 /* lm.xcodeproj */;
proxyType = 2; proxyType = 2;
remoteGlobalIDString = 1EE8C2E91476A48E002496F2; remoteGlobalIDString = 1EE8C2E91476A48E002496F2;
remoteInfo = lm; remoteInfo = lm;
}; };
1EF0708F14B9EF0E0052152A /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EF0708B14B9EF0D0052152A /* moses.xcodeproj */;
proxyType = 2;
remoteGlobalIDString = D2AAC046055464E500DB518D;
remoteInfo = moses;
};
1EF070AD14B9F03F0052152A /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EF0708B14B9EF0D0052152A /* moses.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = D2AAC045055464E500DB518D;
remoteInfo = moses;
};
1EF070AF14B9F0430052152A /* PBXContainerItemProxy */ = {
isa = PBXContainerItemProxy;
containerPortal = 1EF0708514B9EF070052152A /* lm.xcodeproj */;
proxyType = 1;
remoteGlobalIDString = 1EE8C2E81476A48E002496F2;
remoteInfo = lm;
};
/* End PBXContainerItemProxy section */ /* End PBXContainerItemProxy section */
/* Begin PBXCopyFilesBuildPhase section */ /* Begin PBXCopyFilesBuildPhase section */
@ -73,30 +70,27 @@
dstPath = /usr/share/man/man1/; dstPath = /usr/share/man/man1/;
dstSubfolderSpec = 0; dstSubfolderSpec = 0;
files = ( files = (
1C8CFF4E0AD68D3600FA22E2 /* Main.h in CopyFiles */,
1C8CFF500AD68D3600FA22E2 /* TranslationAnalysis.h in CopyFiles */,
03306D830C0B249A00CA1311 /* mbr.h in CopyFiles */,
); );
runOnlyForDeploymentPostprocessing = 1; runOnlyForDeploymentPostprocessing = 1;
}; };
/* End PBXCopyFilesBuildPhase section */ /* End PBXCopyFilesBuildPhase section */
/* Begin PBXFileReference section */ /* Begin PBXFileReference section */
03306D5F0C0B240B00CA1311 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = moses.xcodeproj; path = ../moses/moses.xcodeproj; sourceTree = SOURCE_ROOT; }; 1EAF9D5B14B9F566005E8EBD /* IOWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IOWrapper.cpp; path = "../../moses-cmd/src/IOWrapper.cpp"; sourceTree = "<group>"; };
03306D800C0B249A00CA1311 /* mbr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = mbr.cpp; path = src/mbr.cpp; sourceTree = "<group>"; }; 1EAF9D5C14B9F566005E8EBD /* IOWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IOWrapper.h; path = "../../moses-cmd/src/IOWrapper.h"; sourceTree = "<group>"; };
03306D810C0B249A00CA1311 /* mbr.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = mbr.h; path = src/mbr.h; sourceTree = "<group>"; }; 1EAF9D5E14B9F566005E8EBD /* Jamfile */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.jam; name = Jamfile; path = "../../moses-cmd/src/Jamfile"; sourceTree = "<group>"; };
1C8CFF450AD68D3600FA22E2 /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = src/Main.cpp; sourceTree = "<group>"; }; 1EAF9D5F14B9F566005E8EBD /* LatticeMBR.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LatticeMBR.cpp; path = "../../moses-cmd/src/LatticeMBR.cpp"; sourceTree = "<group>"; };
1C8CFF460AD68D3600FA22E2 /* Main.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = Main.h; path = src/Main.h; sourceTree = "<group>"; }; 1EAF9D6014B9F566005E8EBD /* LatticeMBR.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LatticeMBR.h; path = "../../moses-cmd/src/LatticeMBR.h"; sourceTree = "<group>"; };
1C8CFF470AD68D3600FA22E2 /* TranslationAnalysis.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationAnalysis.cpp; path = src/TranslationAnalysis.cpp; sourceTree = "<group>"; }; 1EAF9D6414B9F566005E8EBD /* Main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Main.cpp; path = "../../moses-cmd/src/Main.cpp"; sourceTree = "<group>"; };
1C8CFF480AD68D3600FA22E2 /* TranslationAnalysis.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = TranslationAnalysis.h; path = src/TranslationAnalysis.h; sourceTree = "<group>"; }; 1EAF9D6514B9F566005E8EBD /* Main.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Main.h; path = "../../moses-cmd/src/Main.h"; sourceTree = "<group>"; };
1CE646DB11679F5F00EC77CC /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = OnDiskPt.xcodeproj; path = ../OnDiskPt/OnDiskPt.xcodeproj; sourceTree = SOURCE_ROOT; }; 1EAF9D6714B9F566005E8EBD /* mbr.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mbr.cpp; path = "../../moses-cmd/src/mbr.cpp"; sourceTree = "<group>"; };
1EE8C2D41476A34A002496F2 /* util.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = util.xcodeproj; path = ../util/util.xcodeproj; sourceTree = "<group>"; }; 1EAF9D6814B9F566005E8EBD /* mbr.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = mbr.h; path = "../../moses-cmd/src/mbr.h"; sourceTree = "<group>"; };
1EE8C3C01476AB3C002496F2 /* lm.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; name = lm.xcodeproj; path = ../lm/lm.xcodeproj; sourceTree = "<group>"; }; 1EAF9D6A14B9F566005E8EBD /* TranslationAnalysis.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = TranslationAnalysis.cpp; path = "../../moses-cmd/src/TranslationAnalysis.cpp"; sourceTree = "<group>"; };
1EAF9D6B14B9F566005E8EBD /* TranslationAnalysis.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TranslationAnalysis.h; path = "../../moses-cmd/src/TranslationAnalysis.h"; sourceTree = "<group>"; };
1EAF9D9414B9F7A6005E8EBD /* OnDiskPt.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = OnDiskPt.xcodeproj; sourceTree = "<group>"; };
1EF0708514B9EF070052152A /* lm.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = lm.xcodeproj; sourceTree = "<group>"; };
1EF0708B14B9EF0D0052152A /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
8DD76F6C0486A84900D96B5E /* moses-cmd */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "moses-cmd"; sourceTree = BUILT_PRODUCTS_DIR; }; 8DD76F6C0486A84900D96B5E /* moses-cmd */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "moses-cmd"; sourceTree = BUILT_PRODUCTS_DIR; };
B219B8560E9381AC00EAB407 /* IOWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = IOWrapper.cpp; path = src/IOWrapper.cpp; sourceTree = "<group>"; };
B219B8570E9381AC00EAB407 /* IOWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = IOWrapper.h; path = src/IOWrapper.h; sourceTree = "<group>"; };
B28B1ED2110F52BB00AAD188 /* LatticeMBR.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LatticeMBR.cpp; path = src/LatticeMBR.cpp; sourceTree = "<group>"; };
B28B1ED4110F52C600AAD188 /* LatticeMBR.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LatticeMBR.h; path = src/LatticeMBR.h; sourceTree = "<group>"; };
/* End PBXFileReference section */ /* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */ /* Begin PBXFrameworksBuildPhase section */
@ -104,34 +98,24 @@
isa = PBXFrameworksBuildPhase; isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1EE8C3C91476AB64002496F2 /* liblm.a in Frameworks */, 1EAF9D9D14B9F7B6005E8EBD /* libOnDiskPt.a in Frameworks */,
1EE8C2DD1476A3F2002496F2 /* libutil.a in Frameworks */, 1EF070A914B9F0380052152A /* liblm.a in Frameworks */,
1CE646E411679F6900EC77CC /* libOnDiskPt.a in Frameworks */, 1EF070AA14B9F0380052152A /* libmoses.a in Frameworks */,
B219B8540E93812700EAB407 /* libmoses.a in Frameworks */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
/* End PBXFrameworksBuildPhase section */ /* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */ /* Begin PBXGroup section */
03306D600C0B240B00CA1311 /* Products */ = {
isa = PBXGroup;
children = (
03306D670C0B240B00CA1311 /* libmoses.a */,
);
name = Products;
sourceTree = "<group>";
};
08FB7794FE84155DC02AAC07 /* moses-cmd */ = { 08FB7794FE84155DC02AAC07 /* moses-cmd */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1EE8C3C01476AB3C002496F2 /* lm.xcodeproj */,
1EE8C2D41476A34A002496F2 /* util.xcodeproj */,
1CE646DB11679F5F00EC77CC /* OnDiskPt.xcodeproj */,
03306D5F0C0B240B00CA1311 /* moses.xcodeproj */,
08FB7795FE84155DC02AAC07 /* Source */, 08FB7795FE84155DC02AAC07 /* Source */,
C6859E8C029090F304C91782 /* Documentation */, C6859E8C029090F304C91782 /* Documentation */,
1AB674ADFE9D54B511CA2CBB /* Products */, 1AB674ADFE9D54B511CA2CBB /* Products */,
1EAF9D9414B9F7A6005E8EBD /* OnDiskPt.xcodeproj */,
1EF0708514B9EF070052152A /* lm.xcodeproj */,
1EF0708B14B9EF0D0052152A /* moses.xcodeproj */,
); );
name = "moses-cmd"; name = "moses-cmd";
sourceTree = "<group>"; sourceTree = "<group>";
@ -139,16 +123,17 @@
08FB7795FE84155DC02AAC07 /* Source */ = { 08FB7795FE84155DC02AAC07 /* Source */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
B28B1ED4110F52C600AAD188 /* LatticeMBR.h */, 1EAF9D5B14B9F566005E8EBD /* IOWrapper.cpp */,
B28B1ED2110F52BB00AAD188 /* LatticeMBR.cpp */, 1EAF9D5C14B9F566005E8EBD /* IOWrapper.h */,
B219B8560E9381AC00EAB407 /* IOWrapper.cpp */, 1EAF9D5E14B9F566005E8EBD /* Jamfile */,
B219B8570E9381AC00EAB407 /* IOWrapper.h */, 1EAF9D5F14B9F566005E8EBD /* LatticeMBR.cpp */,
03306D800C0B249A00CA1311 /* mbr.cpp */, 1EAF9D6014B9F566005E8EBD /* LatticeMBR.h */,
03306D810C0B249A00CA1311 /* mbr.h */, 1EAF9D6414B9F566005E8EBD /* Main.cpp */,
1C8CFF450AD68D3600FA22E2 /* Main.cpp */, 1EAF9D6514B9F566005E8EBD /* Main.h */,
1C8CFF460AD68D3600FA22E2 /* Main.h */, 1EAF9D6714B9F566005E8EBD /* mbr.cpp */,
1C8CFF470AD68D3600FA22E2 /* TranslationAnalysis.cpp */, 1EAF9D6814B9F566005E8EBD /* mbr.h */,
1C8CFF480AD68D3600FA22E2 /* TranslationAnalysis.h */, 1EAF9D6A14B9F566005E8EBD /* TranslationAnalysis.cpp */,
1EAF9D6B14B9F566005E8EBD /* TranslationAnalysis.h */,
); );
name = Source; name = Source;
sourceTree = "<group>"; sourceTree = "<group>";
@ -161,26 +146,26 @@
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1CE646DC11679F5F00EC77CC /* Products */ = { 1EAF9D9514B9F7A6005E8EBD /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1CE646E311679F5F00EC77CC /* libOnDiskPt.a */, 1EAF9D9C14B9F7A6005E8EBD /* libOnDiskPt.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1EE8C2D51476A34A002496F2 /* Products */ = { 1EF0708614B9EF070052152A /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1EE8C2DC1476A34A002496F2 /* libutil.a */, 1EF0708A14B9EF070052152A /* liblm.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
}; };
1EE8C3C11476AB3C002496F2 /* Products */ = { 1EF0708C14B9EF0D0052152A /* Products */ = {
isa = PBXGroup; isa = PBXGroup;
children = ( children = (
1EE8C3C81476AB3C002496F2 /* liblm.a */, 1EF0709014B9EF0E0052152A /* libmoses.a */,
); );
name = Products; name = Products;
sourceTree = "<group>"; sourceTree = "<group>";
@ -206,8 +191,9 @@
buildRules = ( buildRules = (
); );
dependencies = ( dependencies = (
03306D780C0B244800CA1311 /* PBXTargetDependency */, 1EAF9D9F14B9F7BD005E8EBD /* PBXTargetDependency */,
1CE6472E1167A11600EC77CC /* PBXTargetDependency */, 1EF070B014B9F0430052152A /* PBXTargetDependency */,
1EF070AE14B9F03F0052152A /* PBXTargetDependency */,
); );
name = "moses-cmd"; name = "moses-cmd";
productInstallPath = "$(HOME)/bin"; productInstallPath = "$(HOME)/bin";
@ -234,20 +220,16 @@
projectDirPath = ""; projectDirPath = "";
projectReferences = ( projectReferences = (
{ {
ProductGroup = 1EE8C3C11476AB3C002496F2 /* Products */; ProductGroup = 1EF0708614B9EF070052152A /* Products */;
ProjectRef = 1EE8C3C01476AB3C002496F2 /* lm.xcodeproj */; ProjectRef = 1EF0708514B9EF070052152A /* lm.xcodeproj */;
}, },
{ {
ProductGroup = 03306D600C0B240B00CA1311 /* Products */; ProductGroup = 1EF0708C14B9EF0D0052152A /* Products */;
ProjectRef = 03306D5F0C0B240B00CA1311 /* moses.xcodeproj */; ProjectRef = 1EF0708B14B9EF0D0052152A /* moses.xcodeproj */;
}, },
{ {
ProductGroup = 1CE646DC11679F5F00EC77CC /* Products */; ProductGroup = 1EAF9D9514B9F7A6005E8EBD /* Products */;
ProjectRef = 1CE646DB11679F5F00EC77CC /* OnDiskPt.xcodeproj */; ProjectRef = 1EAF9D9414B9F7A6005E8EBD /* OnDiskPt.xcodeproj */;
},
{
ProductGroup = 1EE8C2D51476A34A002496F2 /* Products */;
ProjectRef = 1EE8C2D41476A34A002496F2 /* util.xcodeproj */;
}, },
); );
projectRoot = ""; projectRoot = "";
@ -258,32 +240,25 @@
/* End PBXProject section */ /* End PBXProject section */
/* Begin PBXReferenceProxy section */ /* Begin PBXReferenceProxy section */
03306D670C0B240B00CA1311 /* libmoses.a */ = { 1EAF9D9C14B9F7A6005E8EBD /* libOnDiskPt.a */ = {
isa = PBXReferenceProxy;
fileType = archive.ar;
path = libmoses.a;
remoteRef = 03306D660C0B240B00CA1311 /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
1CE646E311679F5F00EC77CC /* libOnDiskPt.a */ = {
isa = PBXReferenceProxy; isa = PBXReferenceProxy;
fileType = archive.ar; fileType = archive.ar;
path = libOnDiskPt.a; path = libOnDiskPt.a;
remoteRef = 1CE646E211679F5F00EC77CC /* PBXContainerItemProxy */; remoteRef = 1EAF9D9B14B9F7A6005E8EBD /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR; sourceTree = BUILT_PRODUCTS_DIR;
}; };
1EE8C2DC1476A34A002496F2 /* libutil.a */ = { 1EF0708A14B9EF070052152A /* liblm.a */ = {
isa = PBXReferenceProxy;
fileType = archive.ar;
path = libutil.a;
remoteRef = 1EE8C2DB1476A34A002496F2 /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
1EE8C3C81476AB3C002496F2 /* liblm.a */ = {
isa = PBXReferenceProxy; isa = PBXReferenceProxy;
fileType = archive.ar; fileType = archive.ar;
path = liblm.a; path = liblm.a;
remoteRef = 1EE8C3C71476AB3C002496F2 /* PBXContainerItemProxy */; remoteRef = 1EF0708914B9EF070052152A /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR;
};
1EF0709014B9EF0E0052152A /* libmoses.a */ = {
isa = PBXReferenceProxy;
fileType = archive.ar;
path = libmoses.a;
remoteRef = 1EF0708F14B9EF0E0052152A /* PBXContainerItemProxy */;
sourceTree = BUILT_PRODUCTS_DIR; sourceTree = BUILT_PRODUCTS_DIR;
}; };
/* End PBXReferenceProxy section */ /* End PBXReferenceProxy section */
@ -293,26 +268,32 @@
isa = PBXSourcesBuildPhase; isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647; buildActionMask = 2147483647;
files = ( files = (
1C8CFF4D0AD68D3600FA22E2 /* Main.cpp in Sources */, 1EAF9D7A14B9F566005E8EBD /* IOWrapper.cpp in Sources */,
1C8CFF4F0AD68D3600FA22E2 /* TranslationAnalysis.cpp in Sources */, 1EAF9D7C14B9F566005E8EBD /* Jamfile in Sources */,
03306D820C0B249A00CA1311 /* mbr.cpp in Sources */, 1EAF9D7D14B9F566005E8EBD /* LatticeMBR.cpp in Sources */,
B219B8580E9381AC00EAB407 /* IOWrapper.cpp in Sources */, 1EAF9D8114B9F566005E8EBD /* Main.cpp in Sources */,
B28B1ED3110F52BB00AAD188 /* LatticeMBR.cpp in Sources */, 1EAF9D8314B9F566005E8EBD /* mbr.cpp in Sources */,
1EAF9D8514B9F566005E8EBD /* TranslationAnalysis.cpp in Sources */,
); );
runOnlyForDeploymentPostprocessing = 0; runOnlyForDeploymentPostprocessing = 0;
}; };
/* End PBXSourcesBuildPhase section */ /* End PBXSourcesBuildPhase section */
/* Begin PBXTargetDependency section */ /* Begin PBXTargetDependency section */
03306D780C0B244800CA1311 /* PBXTargetDependency */ = { 1EAF9D9F14B9F7BD005E8EBD /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = moses;
targetProxy = 03306D770C0B244800CA1311 /* PBXContainerItemProxy */;
};
1CE6472E1167A11600EC77CC /* PBXTargetDependency */ = {
isa = PBXTargetDependency; isa = PBXTargetDependency;
name = OnDiskPt; name = OnDiskPt;
targetProxy = 1CE6472D1167A11600EC77CC /* PBXContainerItemProxy */; targetProxy = 1EAF9D9E14B9F7BD005E8EBD /* PBXContainerItemProxy */;
};
1EF070AE14B9F03F0052152A /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = moses;
targetProxy = 1EF070AD14B9F03F0052152A /* PBXContainerItemProxy */;
};
1EF070B014B9F0430052152A /* PBXTargetDependency */ = {
isa = PBXTargetDependency;
name = lm;
targetProxy = 1EF070AF14B9F0430052152A /* PBXContainerItemProxy */;
}; };
/* End PBXTargetDependency section */ /* End PBXTargetDependency section */
@ -341,9 +322,8 @@
); );
INSTALL_PATH = "$(HOME)/bin"; INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = ( LIBRARY_SEARCH_PATHS = (
../irstlm/lib, ../../irstlm/lib,
../srilm/lib/macosx, ../../srilm/lib/macosx,
../kenlm,
); );
OTHER_LDFLAGS = ( OTHER_LDFLAGS = (
"-lflm", "-lflm",
@ -382,9 +362,8 @@
); );
INSTALL_PATH = "$(HOME)/bin"; INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = ( LIBRARY_SEARCH_PATHS = (
../irstlm/lib, ../../irstlm/lib,
../srilm/lib/macosx, ../../srilm/lib/macosx,
../kenlm,
); );
OTHER_LDFLAGS = ( OTHER_LDFLAGS = (
"-lflm", "-lflm",
@ -415,9 +394,8 @@
); );
INSTALL_PATH = "$(HOME)/bin"; INSTALL_PATH = "$(HOME)/bin";
LIBRARY_SEARCH_PATHS = ( LIBRARY_SEARCH_PATHS = (
../irstlm/lib, ../../irstlm/lib,
../srilm/lib/macosx, ../../srilm/lib/macosx,
../kenlm,
); );
OTHER_LDFLAGS = ( OTHER_LDFLAGS = (
"-lflm", "-lflm",
@ -436,9 +414,9 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)"; ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)";
HEADER_SEARCH_PATHS = ../kenlm;
ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)"; ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)";
SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk"; SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk";
USER_HEADER_SEARCH_PATHS = "../../moses/src ../../";
VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64"; VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64";
}; };
name = Debug; name = Debug;
@ -447,9 +425,9 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)"; ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)";
HEADER_SEARCH_PATHS = ../kenlm;
ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)"; ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)";
SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk"; SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk";
USER_HEADER_SEARCH_PATHS = "../../moses/src ../../";
VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64"; VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64";
}; };
name = Release; name = Release;
@ -458,9 +436,9 @@
isa = XCBuildConfiguration; isa = XCBuildConfiguration;
buildSettings = { buildSettings = {
ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)"; ARCHS = "$(ONLY_ACTIVE_ARCH_PRE_XCODE_3_1)";
HEADER_SEARCH_PATHS = ../kenlm;
ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)"; ONLY_ACTIVE_ARCH_PRE_XCODE_3_1 = "$(NATIVE_ARCH_ACTUAL)";
SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk"; SDKROOT = "$(DEVELOPER_SDK_DIR)/MacOSX10.6.sdk";
USER_HEADER_SEARCH_PATHS = "../../moses/src ../../";
VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64"; VALID_ARCHS = "i386 ppc ppc64 ppc7400 ppc970 x86_64";
}; };
name = Default; name = Default;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,46 @@
//
// Java Sample client for mosesserver (Created by Marwen AZOUZI)
// The XML-RPC libraries are available at Apache (http://ws.apache.org/xmlrpc/)
//
import java.util.HashMap;
import java.net.URL;
import org.apache.xmlrpc.client.XmlRpcClient;
import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
public class SampleClient {
public static void main(String[] args) {
try {
// Create an instance of XmlRpcClient
XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
config.setServerURL(new URL("http://localhost:8080/RPC2"));
XmlRpcClient client = new XmlRpcClient();
client.setConfig(config);
// The XML-RPC data type used by mosesserver is <struct>. In Java, this data type can be represented using HashMap.
HashMap<String,String> mosesParams = new HashMap<String,String>();
String textToTranslate = new String("some text to translate .");
mosesParams.put("text", textToTranslate);
mosesParams.put("align", "true");
mosesParams.put("report-all-factors", "true");
// The XmlRpcClient.execute method doesn't accept Hashmap (pParams). It's either Object[] or List.
Object[] params = new Object[] { null };
params[0] = mosesParams;
// Invoke the remote method "translate". The result is an Object, convert it to a HashMap.
HashMap result = (HashMap)client.execute("translate", params);
// Print the returned results
String textTranslation = (String)result.get("text");
System.out.println("Input : "+textToTranslate);
System.out.println("Translation : "+textTranslation);
if (result.get("align") != null){
Object[] aligns = (Object[])result.get("align");
System.out.println("Phrase alignments : [Source Start:Source End][Target Start]");
for ( Object element : aligns) {
HashMap align = (HashMap)element;
System.out.println("["+align.get("src-start")+":"+align.get("src-end")+"]["+align.get("tgt-start")+"]");
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

View File

@ -169,52 +169,6 @@ raw-corpus = $toy-data/nc-5k.$output-extension
# #
#lm = #lm =
#################################################################
# INTERPOLATING LANGUAGE MODELS
[INTERPOLATED-LM]
# if multiple language models are used, these may be combined
# by optimizing perplexity on a tuning set
# see, for instance [Koehn and Schwenk, IJCNLP 2008]
### script to interpolate language models
# if commented out, no interpolation is performed
#
# script = $moses-script-dir/ems/support/interpolate-lm.perl
### tuning set
# you may use the same set that is used for mert tuning (reference set)
#
#tuning-sgm =
#raw-tuning =
#tokenized-tuning =
#factored-tuning =
#lowercased-tuning =
#split-tuning =
### script to use for binary table format for irstlm or kenlm
# (default: no binarization)
# irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm
# kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/kenlm/build_binary
#type = 8
### script to create quantized language model format (irstlm)
# (default: no quantization)
#
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm
### script to use for converting into randomized table format
# (default: no randomization)
#
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8"
#################################################################
# TRANSLATION MODEL TRAINING
[TRAINING] [TRAINING]

View File

@ -1,354 +0,0 @@
// !$*UTF8*$!
{
archiveVersion = 1;
classes = {
};
objectVersion = 46;
objects = {
/* Begin PBXBuildFile section */
1EE8C3981476A73C002496F2 /* bhiksha.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3651476A73C002496F2 /* bhiksha.cc */; };
1EE8C3991476A73C002496F2 /* bhiksha.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3661476A73C002496F2 /* bhiksha.hh */; };
1EE8C39A1476A73C002496F2 /* binary_format.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3671476A73C002496F2 /* binary_format.cc */; };
1EE8C39B1476A73C002496F2 /* binary_format.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3681476A73C002496F2 /* binary_format.hh */; };
1EE8C39C1476A73C002496F2 /* blank.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3691476A73C002496F2 /* blank.hh */; };
1EE8C39D1476A73C002496F2 /* build_binary.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C36A1476A73C002496F2 /* build_binary.cc */; };
1EE8C39E1476A73C002496F2 /* config.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C36D1476A73C002496F2 /* config.cc */; };
1EE8C39F1476A73C002496F2 /* config.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C36E1476A73C002496F2 /* config.hh */; };
1EE8C3A01476A73C002496F2 /* enumerate_vocab.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3711476A73C002496F2 /* enumerate_vocab.hh */; };
1EE8C3A11476A73C002496F2 /* facade.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3721476A73C002496F2 /* facade.hh */; };
1EE8C3A21476A73C002496F2 /* left_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3731476A73C002496F2 /* left_test.cc */; };
1EE8C3A31476A73C002496F2 /* left.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3741476A73C002496F2 /* left.hh */; };
1EE8C3A41476A73C002496F2 /* lm_exception.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3761476A73C002496F2 /* lm_exception.cc */; };
1EE8C3A51476A73C002496F2 /* lm_exception.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3771476A73C002496F2 /* lm_exception.hh */; };
1EE8C3A71476A73C002496F2 /* max_order.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C37B1476A73C002496F2 /* max_order.hh */; };
1EE8C3A81476A73C002496F2 /* model_test.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C37C1476A73C002496F2 /* model_test.cc */; };
1EE8C3A91476A73C002496F2 /* model_type.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C37D1476A73C002496F2 /* model_type.hh */; };
1EE8C3AA1476A73C002496F2 /* model.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C37E1476A73C002496F2 /* model.cc */; };
1EE8C3AB1476A73C002496F2 /* model.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C37F1476A73C002496F2 /* model.hh */; };
1EE8C3AC1476A73C002496F2 /* ngram_query.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3801476A73C002496F2 /* ngram_query.cc */; };
1EE8C3AD1476A73C002496F2 /* quantize.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3811476A73C002496F2 /* quantize.cc */; };
1EE8C3AE1476A73C002496F2 /* quantize.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3821476A73C002496F2 /* quantize.hh */; };
1EE8C3AF1476A73C002496F2 /* read_arpa.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3831476A73C002496F2 /* read_arpa.cc */; };
1EE8C3B01476A73C002496F2 /* read_arpa.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3841476A73C002496F2 /* read_arpa.hh */; };
1EE8C3B11476A73C002496F2 /* return.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3861476A73C002496F2 /* return.hh */; };
1EE8C3B21476A73C002496F2 /* search_hashed.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3871476A73C002496F2 /* search_hashed.cc */; };
1EE8C3B31476A73C002496F2 /* search_hashed.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3881476A73C002496F2 /* search_hashed.hh */; };
1EE8C3B41476A73C002496F2 /* search_trie.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3891476A73C002496F2 /* search_trie.cc */; };
1EE8C3B51476A73C002496F2 /* search_trie.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C38A1476A73C002496F2 /* search_trie.hh */; };
1EE8C3B61476A73C002496F2 /* trie_sort.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C38E1476A73C002496F2 /* trie_sort.cc */; };
1EE8C3B71476A73C002496F2 /* trie_sort.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C38F1476A73C002496F2 /* trie_sort.hh */; };
1EE8C3B81476A73C002496F2 /* trie.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3901476A73C002496F2 /* trie.cc */; };
1EE8C3B91476A73C002496F2 /* trie.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3911476A73C002496F2 /* trie.hh */; };
1EE8C3BA1476A73C002496F2 /* virtual_interface.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3921476A73C002496F2 /* virtual_interface.cc */; };
1EE8C3BB1476A73C002496F2 /* virtual_interface.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3931476A73C002496F2 /* virtual_interface.hh */; };
1EE8C3BC1476A73C002496F2 /* vocab.cc in Sources */ = {isa = PBXBuildFile; fileRef = 1EE8C3941476A73C002496F2 /* vocab.cc */; };
1EE8C3BD1476A73C002496F2 /* vocab.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3951476A73C002496F2 /* vocab.hh */; };
1EE8C3BE1476A73C002496F2 /* weights.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3961476A73C002496F2 /* weights.hh */; };
1EE8C3BF1476A73C002496F2 /* word_index.hh in Headers */ = {isa = PBXBuildFile; fileRef = 1EE8C3971476A73C002496F2 /* word_index.hh */; };
/* End PBXBuildFile section */
/* Begin PBXFileReference section */
1EE8C2E91476A48E002496F2 /* liblm.a */ = {isa = PBXFileReference; explicitFileType = archive.ar; includeInIndex = 0; path = liblm.a; sourceTree = BUILT_PRODUCTS_DIR; };
1EE8C3651476A73C002496F2 /* bhiksha.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = bhiksha.cc; sourceTree = "<group>"; };
1EE8C3661476A73C002496F2 /* bhiksha.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = bhiksha.hh; sourceTree = "<group>"; };
1EE8C3671476A73C002496F2 /* binary_format.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = binary_format.cc; sourceTree = "<group>"; };
1EE8C3681476A73C002496F2 /* binary_format.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = binary_format.hh; sourceTree = "<group>"; };
1EE8C3691476A73C002496F2 /* blank.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = blank.hh; sourceTree = "<group>"; };
1EE8C36A1476A73C002496F2 /* build_binary.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = build_binary.cc; sourceTree = "<group>"; };
1EE8C36D1476A73C002496F2 /* config.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = config.cc; sourceTree = "<group>"; };
1EE8C36E1476A73C002496F2 /* config.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = config.hh; sourceTree = "<group>"; };
1EE8C3711476A73C002496F2 /* enumerate_vocab.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = enumerate_vocab.hh; sourceTree = "<group>"; };
1EE8C3721476A73C002496F2 /* facade.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = facade.hh; sourceTree = "<group>"; };
1EE8C3731476A73C002496F2 /* left_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = left_test.cc; sourceTree = "<group>"; };
1EE8C3741476A73C002496F2 /* left.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = left.hh; sourceTree = "<group>"; };
1EE8C3761476A73C002496F2 /* lm_exception.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = lm_exception.cc; sourceTree = "<group>"; };
1EE8C3771476A73C002496F2 /* lm_exception.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = lm_exception.hh; sourceTree = "<group>"; };
1EE8C37B1476A73C002496F2 /* max_order.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = max_order.hh; sourceTree = "<group>"; };
1EE8C37C1476A73C002496F2 /* model_test.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = model_test.cc; sourceTree = "<group>"; };
1EE8C37D1476A73C002496F2 /* model_type.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = model_type.hh; sourceTree = "<group>"; };
1EE8C37E1476A73C002496F2 /* model.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = model.cc; sourceTree = "<group>"; };
1EE8C37F1476A73C002496F2 /* model.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = model.hh; sourceTree = "<group>"; };
1EE8C3801476A73C002496F2 /* ngram_query.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ngram_query.cc; sourceTree = "<group>"; };
1EE8C3811476A73C002496F2 /* quantize.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = quantize.cc; sourceTree = "<group>"; };
1EE8C3821476A73C002496F2 /* quantize.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = quantize.hh; sourceTree = "<group>"; };
1EE8C3831476A73C002496F2 /* read_arpa.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = read_arpa.cc; sourceTree = "<group>"; };
1EE8C3841476A73C002496F2 /* read_arpa.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = read_arpa.hh; sourceTree = "<group>"; };
1EE8C3861476A73C002496F2 /* return.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = return.hh; sourceTree = "<group>"; };
1EE8C3871476A73C002496F2 /* search_hashed.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = search_hashed.cc; sourceTree = "<group>"; };
1EE8C3881476A73C002496F2 /* search_hashed.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = search_hashed.hh; sourceTree = "<group>"; };
1EE8C3891476A73C002496F2 /* search_trie.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = search_trie.cc; sourceTree = "<group>"; };
1EE8C38A1476A73C002496F2 /* search_trie.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = search_trie.hh; sourceTree = "<group>"; };
1EE8C38E1476A73C002496F2 /* trie_sort.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trie_sort.cc; sourceTree = "<group>"; };
1EE8C38F1476A73C002496F2 /* trie_sort.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = trie_sort.hh; sourceTree = "<group>"; };
1EE8C3901476A73C002496F2 /* trie.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = trie.cc; sourceTree = "<group>"; };
1EE8C3911476A73C002496F2 /* trie.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = trie.hh; sourceTree = "<group>"; };
1EE8C3921476A73C002496F2 /* virtual_interface.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = virtual_interface.cc; sourceTree = "<group>"; };
1EE8C3931476A73C002496F2 /* virtual_interface.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = virtual_interface.hh; sourceTree = "<group>"; };
1EE8C3941476A73C002496F2 /* vocab.cc */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = vocab.cc; sourceTree = "<group>"; };
1EE8C3951476A73C002496F2 /* vocab.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = vocab.hh; sourceTree = "<group>"; };
1EE8C3961476A73C002496F2 /* weights.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = weights.hh; sourceTree = "<group>"; };
1EE8C3971476A73C002496F2 /* word_index.hh */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.h; path = word_index.hh; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
1EE8C2E61476A48E002496F2 /* Frameworks */ = {
isa = PBXFrameworksBuildPhase;
buildActionMask = 2147483647;
files = (
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXFrameworksBuildPhase section */
/* Begin PBXGroup section */
1EE8C2DE1476A48E002496F2 = {
isa = PBXGroup;
children = (
1EE8C3651476A73C002496F2 /* bhiksha.cc */,
1EE8C3661476A73C002496F2 /* bhiksha.hh */,
1EE8C3671476A73C002496F2 /* binary_format.cc */,
1EE8C3681476A73C002496F2 /* binary_format.hh */,
1EE8C3691476A73C002496F2 /* blank.hh */,
1EE8C36A1476A73C002496F2 /* build_binary.cc */,
1EE8C36D1476A73C002496F2 /* config.cc */,
1EE8C36E1476A73C002496F2 /* config.hh */,
1EE8C3711476A73C002496F2 /* enumerate_vocab.hh */,
1EE8C3721476A73C002496F2 /* facade.hh */,
1EE8C3731476A73C002496F2 /* left_test.cc */,
1EE8C3741476A73C002496F2 /* left.hh */,
1EE8C3761476A73C002496F2 /* lm_exception.cc */,
1EE8C3771476A73C002496F2 /* lm_exception.hh */,
1EE8C37B1476A73C002496F2 /* max_order.hh */,
1EE8C37C1476A73C002496F2 /* model_test.cc */,
1EE8C37D1476A73C002496F2 /* model_type.hh */,
1EE8C37E1476A73C002496F2 /* model.cc */,
1EE8C37F1476A73C002496F2 /* model.hh */,
1EE8C3801476A73C002496F2 /* ngram_query.cc */,
1EE8C3811476A73C002496F2 /* quantize.cc */,
1EE8C3821476A73C002496F2 /* quantize.hh */,
1EE8C3831476A73C002496F2 /* read_arpa.cc */,
1EE8C3841476A73C002496F2 /* read_arpa.hh */,
1EE8C3861476A73C002496F2 /* return.hh */,
1EE8C3871476A73C002496F2 /* search_hashed.cc */,
1EE8C3881476A73C002496F2 /* search_hashed.hh */,
1EE8C3891476A73C002496F2 /* search_trie.cc */,
1EE8C38A1476A73C002496F2 /* search_trie.hh */,
1EE8C38E1476A73C002496F2 /* trie_sort.cc */,
1EE8C38F1476A73C002496F2 /* trie_sort.hh */,
1EE8C3901476A73C002496F2 /* trie.cc */,
1EE8C3911476A73C002496F2 /* trie.hh */,
1EE8C3921476A73C002496F2 /* virtual_interface.cc */,
1EE8C3931476A73C002496F2 /* virtual_interface.hh */,
1EE8C3941476A73C002496F2 /* vocab.cc */,
1EE8C3951476A73C002496F2 /* vocab.hh */,
1EE8C3961476A73C002496F2 /* weights.hh */,
1EE8C3971476A73C002496F2 /* word_index.hh */,
1EE8C2EA1476A48E002496F2 /* Products */,
);
sourceTree = "<group>";
};
1EE8C2EA1476A48E002496F2 /* Products */ = {
isa = PBXGroup;
children = (
1EE8C2E91476A48E002496F2 /* liblm.a */,
);
name = Products;
sourceTree = "<group>";
};
/* End PBXGroup section */
/* Begin PBXHeadersBuildPhase section */
1EE8C2E71476A48E002496F2 /* Headers */ = {
isa = PBXHeadersBuildPhase;
buildActionMask = 2147483647;
files = (
1EE8C3991476A73C002496F2 /* bhiksha.hh in Headers */,
1EE8C39B1476A73C002496F2 /* binary_format.hh in Headers */,
1EE8C39C1476A73C002496F2 /* blank.hh in Headers */,
1EE8C39F1476A73C002496F2 /* config.hh in Headers */,
1EE8C3A01476A73C002496F2 /* enumerate_vocab.hh in Headers */,
1EE8C3A11476A73C002496F2 /* facade.hh in Headers */,
1EE8C3A31476A73C002496F2 /* left.hh in Headers */,
1EE8C3A51476A73C002496F2 /* lm_exception.hh in Headers */,
1EE8C3A71476A73C002496F2 /* max_order.hh in Headers */,
1EE8C3A91476A73C002496F2 /* model_type.hh in Headers */,
1EE8C3AB1476A73C002496F2 /* model.hh in Headers */,
1EE8C3AE1476A73C002496F2 /* quantize.hh in Headers */,
1EE8C3B01476A73C002496F2 /* read_arpa.hh in Headers */,
1EE8C3B11476A73C002496F2 /* return.hh in Headers */,
1EE8C3B31476A73C002496F2 /* search_hashed.hh in Headers */,
1EE8C3B51476A73C002496F2 /* search_trie.hh in Headers */,
1EE8C3B71476A73C002496F2 /* trie_sort.hh in Headers */,
1EE8C3B91476A73C002496F2 /* trie.hh in Headers */,
1EE8C3BB1476A73C002496F2 /* virtual_interface.hh in Headers */,
1EE8C3BD1476A73C002496F2 /* vocab.hh in Headers */,
1EE8C3BE1476A73C002496F2 /* weights.hh in Headers */,
1EE8C3BF1476A73C002496F2 /* word_index.hh in Headers */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXHeadersBuildPhase section */
/* Begin PBXNativeTarget section */
1EE8C2E81476A48E002496F2 /* lm */ = {
isa = PBXNativeTarget;
buildConfigurationList = 1EE8C2ED1476A48E002496F2 /* Build configuration list for PBXNativeTarget "lm" */;
buildPhases = (
1EE8C2E51476A48E002496F2 /* Sources */,
1EE8C2E61476A48E002496F2 /* Frameworks */,
1EE8C2E71476A48E002496F2 /* Headers */,
);
buildRules = (
);
dependencies = (
);
name = lm;
productName = lm;
productReference = 1EE8C2E91476A48E002496F2 /* liblm.a */;
productType = "com.apple.product-type.library.static";
};
/* End PBXNativeTarget section */
/* Begin PBXProject section */
1EE8C2E01476A48E002496F2 /* Project object */ = {
isa = PBXProject;
buildConfigurationList = 1EE8C2E31476A48E002496F2 /* Build configuration list for PBXProject "lm" */;
compatibilityVersion = "Xcode 3.2";
developmentRegion = English;
hasScannedForEncodings = 0;
knownRegions = (
en,
);
mainGroup = 1EE8C2DE1476A48E002496F2;
productRefGroup = 1EE8C2EA1476A48E002496F2 /* Products */;
projectDirPath = "";
projectRoot = "";
targets = (
1EE8C2E81476A48E002496F2 /* lm */,
);
};
/* End PBXProject section */
/* Begin PBXSourcesBuildPhase section */
1EE8C2E51476A48E002496F2 /* Sources */ = {
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
1EE8C3981476A73C002496F2 /* bhiksha.cc in Sources */,
1EE8C39A1476A73C002496F2 /* binary_format.cc in Sources */,
1EE8C39D1476A73C002496F2 /* build_binary.cc in Sources */,
1EE8C39E1476A73C002496F2 /* config.cc in Sources */,
1EE8C3A21476A73C002496F2 /* left_test.cc in Sources */,
1EE8C3A41476A73C002496F2 /* lm_exception.cc in Sources */,
1EE8C3A81476A73C002496F2 /* model_test.cc in Sources */,
1EE8C3AA1476A73C002496F2 /* model.cc in Sources */,
1EE8C3AC1476A73C002496F2 /* ngram_query.cc in Sources */,
1EE8C3AD1476A73C002496F2 /* quantize.cc in Sources */,
1EE8C3AF1476A73C002496F2 /* read_arpa.cc in Sources */,
1EE8C3B21476A73C002496F2 /* search_hashed.cc in Sources */,
1EE8C3B41476A73C002496F2 /* search_trie.cc in Sources */,
1EE8C3B61476A73C002496F2 /* trie_sort.cc in Sources */,
1EE8C3B81476A73C002496F2 /* trie.cc in Sources */,
1EE8C3BA1476A73C002496F2 /* virtual_interface.cc in Sources */,
1EE8C3BC1476A73C002496F2 /* vocab.cc in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
/* End PBXSourcesBuildPhase section */
/* Begin XCBuildConfiguration section */
1EE8C2EB1476A48E002496F2 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
COPY_PHASE_STRIP = NO;
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_DYNAMIC_NO_PIC = NO;
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
GCC_OPTIMIZATION_LEVEL = 0;
GCC_PREPROCESSOR_DEFINITIONS = (
"DEBUG=1",
"$(inherited)",
);
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
../,
/opt/local/include,
);
MACOSX_DEPLOYMENT_TARGET = 10.7;
ONLY_ACTIVE_ARCH = YES;
SDKROOT = macosx;
};
name = Debug;
};
1EE8C2EC1476A48E002496F2 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
ALWAYS_SEARCH_USER_PATHS = NO;
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
COPY_PHASE_STRIP = YES;
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
GCC_C_LANGUAGE_STANDARD = gnu99;
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
GCC_WARN_ABOUT_RETURN_TYPE = YES;
GCC_WARN_UNUSED_VARIABLE = YES;
HEADER_SEARCH_PATHS = (
../,
/opt/local/include,
);
MACOSX_DEPLOYMENT_TARGET = 10.7;
SDKROOT = macosx;
};
name = Release;
};
1EE8C2EE1476A48E002496F2 /* Debug */ = {
isa = XCBuildConfiguration;
buildSettings = {
EXECUTABLE_PREFIX = lib;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Debug;
};
1EE8C2EF1476A48E002496F2 /* Release */ = {
isa = XCBuildConfiguration;
buildSettings = {
EXECUTABLE_PREFIX = lib;
PRODUCT_NAME = "$(TARGET_NAME)";
};
name = Release;
};
/* End XCBuildConfiguration section */
/* Begin XCConfigurationList section */
1EE8C2E31476A48E002496F2 /* Build configuration list for PBXProject "lm" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1EE8C2EB1476A48E002496F2 /* Debug */,
1EE8C2EC1476A48E002496F2 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
1EE8C2ED1476A48E002496F2 /* Build configuration list for PBXNativeTarget "lm" */ = {
isa = XCConfigurationList;
buildConfigurations = (
1EE8C2EE1476A48E002496F2 /* Debug */,
1EE8C2EF1476A48E002496F2 /* Release */,
);
defaultConfigurationIsVisible = 0;
defaultConfigurationName = Release;
};
/* End XCConfigurationList section */
};
rootObject = 1EE8C2E01476A48E002496F2 /* Project object */;
}

View File

@ -1,90 +1,9 @@
#include "lm/enumerate_vocab.hh" #include "lm/ngram_query.hh"
#include "lm/model.hh"
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <ctype.h>
#if !defined(_WIN32) && !defined(_WIN64)
#include <sys/resource.h>
#include <sys/time.h>
#endif
#if !defined(_WIN32) && !defined(_WIN64)
float FloatSec(const struct timeval &tv) {
return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000000.0);
}
#endif
void PrintUsage(const char *message) {
#if !defined(_WIN32) && !defined(_WIN64)
struct rusage usage;
if (getrusage(RUSAGE_SELF, &usage)) {
perror("getrusage");
return;
}
std::cerr << message;
std::cerr << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n';
// Linux doesn't set memory usage :-(.
std::ifstream status("/proc/self/status", std::ios::in);
std::string line;
while (getline(status, line)) {
if (!strncmp(line.c_str(), "VmRSS:\t", 7)) {
std::cerr << "rss " << (line.c_str() + 7) << '\n';
break;
}
}
#endif
}
template <class Model> void Query(const Model &model, bool sentence_context) {
PrintUsage("Loading statistics:\n");
typename Model::State state, out;
lm::FullScoreReturn ret;
std::string word;
while (std::cin) {
state = sentence_context ? model.BeginSentenceState() : model.NullContextState();
float total = 0.0;
bool got = false;
unsigned int oov = 0;
while (std::cin >> word) {
got = true;
lm::WordIndex vocab = model.GetVocabulary().Index(word);
if (vocab == 0) ++oov;
ret = model.FullScore(state, vocab, out);
total += ret.prob;
std::cout << word << '=' << vocab << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
state = out;
char c;
while (true) {
c = std::cin.get();
if (!std::cin) break;
if (c == '\n') break;
if (!isspace(c)) {
std::cin.unget();
break;
}
}
if (c == '\n') break;
}
if (!got && !std::cin) break;
if (sentence_context) {
ret = model.FullScore(state, model.GetVocabulary().EndSentence(), out);
total += ret.prob;
std::cout << "</s>=" << model.GetVocabulary().EndSentence() << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
}
std::cout << "Total: " << total << " OOV: " << oov << '\n';
}
PrintUsage("After queries:\n");
}
template <class Model> void Query(const char *name) { template <class Model> void Query(const char *name) {
lm::ngram::Config config; lm::ngram::Config config;
Model model(name, config); Model model(name, config, std::cin, std::cout);
Query(model); Query(model);
} }
@ -100,19 +19,19 @@ int main(int argc, char *argv[]) {
if (lm::ngram::RecognizeBinary(argv[1], model_type)) { if (lm::ngram::RecognizeBinary(argv[1], model_type)) {
switch(model_type) { switch(model_type) {
case lm::ngram::HASH_PROBING: case lm::ngram::HASH_PROBING:
Query<lm::ngram::ProbingModel>(argv[1], sentence_context); Query<lm::ngram::ProbingModel>(argv[1], sentence_context, std::cin, std::cout);
break; break;
case lm::ngram::TRIE_SORTED: case lm::ngram::TRIE_SORTED:
Query<lm::ngram::TrieModel>(argv[1], sentence_context); Query<lm::ngram::TrieModel>(argv[1], sentence_context, std::cin, std::cout);
break; break;
case lm::ngram::QUANT_TRIE_SORTED: case lm::ngram::QUANT_TRIE_SORTED:
Query<lm::ngram::QuantTrieModel>(argv[1], sentence_context); Query<lm::ngram::QuantTrieModel>(argv[1], sentence_context, std::cin, std::cout);
break; break;
case lm::ngram::ARRAY_TRIE_SORTED: case lm::ngram::ARRAY_TRIE_SORTED:
Query<lm::ngram::ArrayTrieModel>(argv[1], sentence_context); Query<lm::ngram::ArrayTrieModel>(argv[1], sentence_context, std::cin, std::cout);
break; break;
case lm::ngram::QUANT_ARRAY_TRIE_SORTED: case lm::ngram::QUANT_ARRAY_TRIE_SORTED:
Query<lm::ngram::QuantArrayTrieModel>(argv[1], sentence_context); Query<lm::ngram::QuantArrayTrieModel>(argv[1], sentence_context, std::cin, std::cout);
break; break;
case lm::ngram::HASH_SORTED: case lm::ngram::HASH_SORTED:
default: default:
@ -120,7 +39,7 @@ int main(int argc, char *argv[]) {
abort(); abort();
} }
} else { } else {
Query<lm::ngram::ProbingModel>(argv[1], sentence_context); Query<lm::ngram::ProbingModel>(argv[1], sentence_context, std::cin, std::cout);
} }
PrintUsage("Total time including destruction:\n"); PrintUsage("Total time including destruction:\n");

91
lm/ngram_query.hh Normal file
View File

@ -0,0 +1,91 @@
#ifndef LM_NGRAM_QUERY__
#define LM_NGRAM_QUERY__
#include "lm/enumerate_vocab.hh"
#include "lm/model.hh"
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
#include <ctype.h>
#if !defined(_WIN32) && !defined(_WIN64)
#include <sys/resource.h>
#include <sys/time.h>
#endif
#if !defined(_WIN32) && !defined(_WIN64)
float FloatSec(const struct timeval &tv) {
return static_cast<float>(tv.tv_sec) + (static_cast<float>(tv.tv_usec) / 1000000000.0);
}
#endif
void PrintUsage(const char *message) {
#if !defined(_WIN32) && !defined(_WIN64)
struct rusage usage;
if (getrusage(RUSAGE_SELF, &usage)) {
perror("getrusage");
return;
}
std::cerr << message;
std::cerr << "user\t" << FloatSec(usage.ru_utime) << "\nsys\t" << FloatSec(usage.ru_stime) << '\n';
// Linux doesn't set memory usage :-(.
std::ifstream status("/proc/self/status", std::ios::in);
std::string line;
while (getline(status, line)) {
if (!strncmp(line.c_str(), "VmRSS:\t", 7)) {
std::cerr << "rss " << (line.c_str() + 7) << '\n';
break;
}
}
#endif
}
template <class Model> void Query(const Model &model, bool sentence_context, std::istream &inStream, std::ostream &outStream) {
PrintUsage("Loading statistics:\n");
typename Model::State state, out;
lm::FullScoreReturn ret;
std::string word;
while (inStream) {
state = sentence_context ? model.BeginSentenceState() : model.NullContextState();
float total = 0.0;
bool got = false;
unsigned int oov = 0;
while (inStream >> word) {
got = true;
lm::WordIndex vocab = model.GetVocabulary().Index(word);
if (vocab == 0) ++oov;
ret = model.FullScore(state, vocab, out);
total += ret.prob;
outStream << word << '=' << vocab << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
state = out;
char c;
while (true) {
c = inStream.get();
if (!inStream) break;
if (c == '\n') break;
if (!isspace(c)) {
inStream.unget();
break;
}
}
if (c == '\n') break;
}
if (!got && !inStream) break;
if (sentence_context) {
ret = model.FullScore(state, model.GetVocabulary().EndSentence(), out);
total += ret.prob;
outStream << "</s>=" << model.GetVocabulary().EndSentence() << ' ' << static_cast<unsigned int>(ret.ngram_length) << ' ' << ret.prob << '\t';
}
outStream << "Total: " << total << " OOV: " << oov << '\n';
}
PrintUsage("After queries:\n");
}
#endif // LM_NGRAM_QUERY__

View File

@ -84,7 +84,7 @@ void TerScorer::prepareStats ( size_t sid, const string& text, ScoreStats& entry
} else if ( result.scoreAv() > tmp_result.scoreAv() ) { } else if ( result.scoreAv() > tmp_result.scoreAv() ) {
result = tmp_result; result = tmp_result;
} }
delete evaluation;
} }
ostringstream stats; ostringstream stats;
// multiplication by 100 in order to keep the average precision // multiplication by 100 in order to keep the average precision

View File

@ -37,6 +37,7 @@ POSSIBILITY OF SUCH DAMAGE.
//#include <vld.h> //#include <vld.h>
#endif #endif
#include <exception>
#include <fstream> #include <fstream>
#include "Main.h" #include "Main.h"
#include "FactorCollection.h" #include "FactorCollection.h"
@ -202,6 +203,7 @@ static void ShowWeights()
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
try {
IFVERBOSE(1) { IFVERBOSE(1) {
TRACE_ERR("command: "); TRACE_ERR("command: ");
for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" "); for(int i=0; i<argc; ++i) TRACE_ERR(argv[i]<<" ");
@ -278,6 +280,11 @@ int main(int argc, char* argv[])
IFVERBOSE(1) IFVERBOSE(1)
PrintUserTime("End."); PrintUserTime("End.");
} catch (const std::exception &e) {
std::cerr << "Exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
#ifdef HACK_EXIT #ifdef HACK_EXIT
//This avoids that detructors are called (it can take a long time) //This avoids that detructors are called (it can take a long time)
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);

View File

@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
* Moses main, for single-threaded and multi-threaded. * Moses main, for single-threaded and multi-threaded.
**/ **/
#include <exception>
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <vector> #include <vector>
@ -323,6 +324,7 @@ static void ShowWeights()
/** main function of the command line version of the decoder **/ /** main function of the command line version of the decoder **/
int main(int argc, char** argv) int main(int argc, char** argv)
{ {
try {
#ifdef HAVE_PROTOBUF #ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION; GOOGLE_PROTOBUF_VERIFY_VERSION;
@ -496,6 +498,11 @@ int main(int argc, char** argv)
pool.Stop(true); //flush remaining jobs pool.Stop(true); //flush remaining jobs
#endif #endif
} catch (const std::exception &e) {
std::cerr << "Exception: " << e.what() << std::endl;
return EXIT_FAILURE;
}
#ifndef EXIT_RETURN #ifndef EXIT_RETURN
//This avoids that destructors are called (it can take a long time) //This avoids that destructors are called (it can take a long time)
exit(EXIT_SUCCESS); exit(EXIT_SUCCESS);

View File

@ -33,6 +33,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "Phrase.h" #include "Phrase.h"
#include "InputFileStream.h" #include "InputFileStream.h"
#include "StaticData.h" #include "StaticData.h"
#include "UserMessage.h"
using namespace std; using namespace std;
@ -62,6 +63,13 @@ bool LanguageModelIRST::Load(const std::string &filePath,
{ {
cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n"; cerr << "In LanguageModelIRST::Load: nGramOrder = " << nGramOrder << "\n";
const StaticData &staticData = StaticData::Instance();
if (staticData.ThreadCount() != 1)
{
UserMessage::Add("IRST LM not-threadsafe");
return false;
}
FactorCollection &factorCollection = FactorCollection::Instance(); FactorCollection &factorCollection = FactorCollection::Instance();
m_factorType = factorType; m_factorType = factorType;

View File

@ -71,7 +71,7 @@ template <class Model> class LanguageModelKen : public LanguageModel {
std::string GetScoreProducerDescription(unsigned) const { std::string GetScoreProducerDescription(unsigned) const {
std::ostringstream oss; std::ostringstream oss;
oss << "LM_" << m_ngram->Order() << "gram"; oss << "LM_" << (unsigned)m_ngram->Order() << "gram";
return oss.str(); return oss.str();
} }
@ -101,11 +101,11 @@ template <class Model> class LanguageModelKen : public LanguageModel {
lm::WordIndex *end = indices + m_ngram->Order() - 1; lm::WordIndex *end = indices + m_ngram->Order() - 1;
int position = hypo.GetCurrTargetWordsRange().GetEndPos(); int position = hypo.GetCurrTargetWordsRange().GetEndPos();
for (; ; ++index, --position) { for (; ; ++index, --position) {
if (index == end) return index;
if (position == -1) { if (position == -1) {
*index = m_ngram->GetVocabulary().BeginSentence(); *index = m_ngram->GetVocabulary().BeginSentence();
return index + 1; return index + 1;
} }
if (index == end) return index;
*index = TranslateID(hypo.GetWord(position)); *index = TranslateID(hypo.GetWord(position));
} }
} }

View File

@ -197,9 +197,11 @@ public:
std::string binFname=filePath+".binphr.idx"; std::string binFname=filePath+".binphr.idx";
if(!FileExists(binFname.c_str())) { if(!FileExists(binFname.c_str())) {
TRACE_ERR( "bin ttable does not exist -> create it\n"); UserMessage::Add( "bin ttable does not exist\n");
InputFileStream in(filePath); abort();
m_dict->Create(in,filePath); //TRACE_ERR( "bin ttable does not exist -> create it\n");
//InputFileStream in(filePath);
//m_dict->Create(in,filePath);
} }
TRACE_ERR( "reading bin ttable\n"); TRACE_ERR( "reading bin ttable\n");
// m_dict->Read(filePath); // m_dict->Read(filePath);

View File

@ -23,7 +23,6 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#define moses_Util_h #define moses_Util_h
#include <iostream> #include <iostream>
#include "util/check.hh"
#include <fstream> #include <fstream>
#include <sstream> #include <sstream>
#include <string> #include <string>
@ -31,9 +30,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cmath> #include <cmath>
#include <limits> #include <limits>
#include <map> #include <map>
#include "TypeDef.h"
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include "util/check.hh"
#include "TypeDef.h"
namespace Moses namespace Moses
{ {

View File

@ -18,25 +18,34 @@ pair-extension = fr-en
# moses # moses
moses-src-dir = /home/pkoehn/moses moses-src-dir = /home/pkoehn/moses
# #
# moses binaries
moses-bin-dir = $moses-src-dir/dist/bin
#
# moses scripts # moses scripts
moses-script-dir = /home/pkoehn/moses/scripts moses-script-dir = $moses-src-dir/scripts
# #
# srilm # srilm
srilm-dir = $moses-src-dir/srilm/bin/i686 srilm-dir = $moses-src-dir/srilm/bin/i686
# #
# irstlm
irstlm-dir = $moses-src-dir/irstlm/bin
#
# randlm
randlm-dir = $moses-src-dir/randlm/bin
#
# data # data
wmt10-data = $working-dir/data wmt12-data = $working-dir/data
### basic tools ### basic tools
# #
# moses decoder # moses decoder
decoder = $moses-src-dir/dist/bin/moses decoder = $moses-bin-dir/moses
# conversion of phrase table into binary on-disk format # conversion of phrase table into binary on-disk format
ttable-binarizer = $moses-src-dir/dist/bin/processPhraseTable ttable-binarizer = $moses-bin-dir/processPhraseTable
# conversion of rule table into binary on-disk format # conversion of rule table into binary on-disk format
#ttable-binarizer = "$moses-src-dir/dist/bin/CreateOnDiskPt 1 1 5 100 2" #ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 5 100 2"
# tokenizers - comment out if all your data is already tokenized # tokenizers - comment out if all your data is already tokenized
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension" input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
@ -95,7 +104,7 @@ max-sentence-length = 80
### raw corpus files (untokenized, but sentence aligned) ### raw corpus files (untokenized, but sentence aligned)
# #
raw-stem = $wmt10-data/training/europarl-v5.$pair-extension raw-stem = $wmt12-data/training/europarl-v7.$pair-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -112,10 +121,10 @@ raw-stem = $wmt10-data/training/europarl-v5.$pair-extension
#lowercased-stem = #lowercased-stem =
[CORPUS:nc] [CORPUS:nc]
raw-stem = $wmt10-data/training/news-commentary10.$pair-extension raw-stem = $wmt12-data/training/news-commentary-v7.$pair-extension
[CORPUS:un] IGNORE [CORPUS:un] IGNORE
raw-stem = $wmt10-data/training/undoc.2000.$pair-extension raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
################################################################# #################################################################
# LANGUAGE MODEL TRAINING # LANGUAGE MODEL TRAINING
@ -123,10 +132,15 @@ raw-stem = $wmt10-data/training/undoc.2000.$pair-extension
[LM] [LM]
### tool to be used for language model training ### tool to be used for language model training
# for instance: ngram-count (SRILM), train-lm-on-disk.perl (Edinburgh) # srilm
#
lm-training = $srilm-dir/ngram-count lm-training = $srilm-dir/ngram-count
settings = "-interpolate -kndiscount -unk" settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
#settings = ""
# order of the language model
order = 5 order = 5
### tool to be used for training randomized language model from scratch ### tool to be used for training randomized language model from scratch
@ -138,27 +152,21 @@ order = 5
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
#
# if binarized, set type (default srilm; if binarized: irstlm)
#
# set to 8 when using kenlm
#type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
### each language model to be used has its own section here ### each language model to be used has its own section here
@ -170,7 +178,7 @@ order = 5
### raw corpus (untokenized) ### raw corpus (untokenized)
# #
raw-corpus = $wmt10-data/training/europarl-v5.$output-extension raw-corpus = $wmt12-data/training/europarl-v7.$output-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -182,13 +190,13 @@ raw-corpus = $wmt10-data/training/europarl-v5.$output-extension
#lm = #lm =
[LM:nc] [LM:nc]
raw-corpus = $wmt10-data/training/news-commentary10.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/news-commentary-v7.$pair-extension.$output-extension
[LM:un] IGNORE [LM:un] IGNORE
raw-corpus = $wmt10-data/training/undoc.2000.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/undoc.2000.$pair-extension.$output-extension
[LM:news] IGNORE [LM:news] IGNORE
raw-corpus = $wmt10-data/training/news.$output-extension.shuffled raw-corpus = $wmt12-data/training/news.$output-extension.shuffled
################################################################# #################################################################
@ -208,32 +216,36 @@ script = $moses-script-dir/ems/support/interpolate-lm.perl
### tuning set ### tuning set
# you may use the same set that is used for mert tuning (reference set) # you may use the same set that is used for mert tuning (reference set)
# #
tuning-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm tuning-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-tuning = #raw-tuning =
#tokenized-tuning = #tokenized-tuning =
#factored-tuning = #factored-tuning =
#lowercased-tuning = #lowercased-tuning =
#split-tuning = #split-tuning =
### group language models for hierarchical interpolation
# (flat interpolation is limited to 10 language models)
#group = "first,second fourth,fifth"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
################################################################# #################################################################
# TRANSLATION MODEL TRAINING # TRANSLATION MODEL TRAINING
@ -261,12 +273,18 @@ script = $moses-script-dir/training/train-model.perl
#generation-factors = "word -> pos" #generation-factors = "word -> pos"
#decoding-steps = "t0, g0" #decoding-steps = "t0, g0"
### parallelization of data preparation step
# the two directions of the data preparation can be run in parallel
# comment out if not needed
#
parallel = yes
### pre-computation for giza++ ### pre-computation for giza++
# giza++ has a more efficient data structure that needs to be # giza++ has a more efficient data structure that needs to be
# initialized with snt2cooc. if run in parallel, this may reduces # initialized with snt2cooc. if run in parallel, this may reduces
# memory requirements. set here the number of parts # memory requirements. set here the number of parts
# #
run-giza-in-parts = 5 #run-giza-in-parts = 5
### symmetrization method to obtain word alignments from giza output ### symmetrization method to obtain word alignments from giza output
# (commonly used: grow-diag-final-and) # (commonly used: grow-diag-final-and)
@ -355,18 +373,18 @@ score-settings = "--GoodTuring"
### tuning script to be used ### tuning script to be used
# #
tuning-script = $moses-script-dir/training/mert-moses.pl tuning-script = $moses-script-dir/training/mert-moses.pl
tuning-settings = "-mertdir $moses-src-dir/mert" tuning-settings = "-mertdir $moses-bin-dir"
### specify the corpus used for tuning ### specify the corpus used for tuning
# it should contain 1000s of sentences # it should contain 1000s of sentences
# #
input-sgm = $wmt10-data/dev/news-test2008-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2010-src.$input-extension.sgm
#raw-input = #raw-input =
#tokenized-input = #tokenized-input =
#factorized-input = #factorized-input =
#input = #input =
# #
reference-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-reference = #raw-reference =
#tokenized-reference = #tokenized-reference =
#factorized-reference = #factorized-reference =
@ -394,14 +412,14 @@ decoder-settings = ""
# and also point to a configuration file that contains # and also point to a configuration file that contains
# pointers to all relevant model files # pointers to all relevant model files
# #
#config = #config-with-reused-weights =
######################################################### #########################################################
## RECASER: restore case, this part only trains the model ## RECASER: restore case, this part only trains the model
[RECASING] [RECASING]
#decoder = $moses-src-dir/moses-cmd/src/moses.1521.srilm #decoder = $moses-bin-dir/moses
### training data ### training data
# raw input needs to be still tokenized, # raw input needs to be still tokenized,
@ -448,6 +466,11 @@ trainer = $moses-script-dir/recaser/train-truecaser.perl
### additional decoder settings ### additional decoder settings
# switches for the Moses decoder # switches for the Moses decoder
# common choices:
# "-threads N" for multi-threading
# "-mbr" for MBR decoding
# "-drop-unknown" for dropping unknown source words
# "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" for cube pruning
# #
decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000"
@ -470,8 +493,8 @@ wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension
### BLEU ### BLEU
# #
nist-bleu = $moses-script-dir/generic/mteval-v12.pl nist-bleu = $moses-script-dir/generic/mteval-v13a.pl
nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c" nist-bleu-c = "$moses-script-dir/generic/mteval-v13a.pl -c"
#multi-bleu = $moses-script-dir/generic/multi-bleu.perl #multi-bleu = $moses-script-dir/generic/multi-bleu.perl
#ibm-bleu = #ibm-bleu =
@ -481,9 +504,13 @@ nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c"
# ter = # ter =
### METEOR: gives credit to stem / worknet synonym matches ### METEOR: gives credit to stem / worknet synonym matches
# not yet integrated ## recently integrated - use with care
# ## only for supported languages, needs to be installed separately
# meteor = ##
## uncomment following 3 lines, modify first one adding the location of meteor installation on your disk
# meteor-script = "/project/software/meteor-1.3/meteor-1.3.jar"
# meteor = "java -Xmx2G -jar $meteor-script"
# meteor-params = " -l $output-extension -norm"
### Analysis: carry out various forms of analysis on the output ### Analysis: carry out various forms of analysis on the output
# #
@ -502,11 +529,11 @@ report-segmentation = yes
# further precision breakdown by factor # further precision breakdown by factor
#precision-by-coverage-factor = pos #precision-by-coverage-factor = pos
[EVALUATION:newstest2009] [EVALUATION:newstest2011]
### input data ### input data
# #
input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2011-src.$input-extension.sgm
# raw-input = # raw-input =
# tokenized-input = # tokenized-input =
# factorized-input = # factorized-input =
@ -514,7 +541,7 @@ input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm
### reference data ### reference data
# #
reference-sgm = $wmt10-data/dev/newstest2009-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2011-ref.$output-extension.sgm
# raw-reference = # raw-reference =
# tokenized-reference = # tokenized-reference =
# reference = # reference =

View File

@ -18,25 +18,34 @@ pair-extension = fr-en
# moses # moses
moses-src-dir = /home/pkoehn/moses moses-src-dir = /home/pkoehn/moses
# #
# moses binaries
moses-bin-dir = $moses-src-dir/dist/bin
#
# moses scripts # moses scripts
moses-script-dir = /home/pkoehn/moses/scripts moses-script-dir = $moses-src-dir/scripts
# #
# srilm # srilm
srilm-dir = $moses-src-dir/srilm/bin/i686 srilm-dir = $moses-src-dir/srilm/bin/i686
# #
# irstlm
irstlm-dir = $moses-src-dir/irstlm/bin
#
# randlm
randlm-dir = $moses-src-dir/randlm/bin
#
# data # data
wmt10-data = $working-dir/data wmt12-data = $working-dir/data
### basic tools ### basic tools
# #
# moses decoder # moses decoder
decoder = $moses-src-dir/dist/bin/moses decoder = $moses-bin-dir/moses
# conversion of phrase table into binary on-disk format # conversion of phrase table into binary on-disk format
ttable-binarizer = $moses-src-dir/misc/processPhraseTable ttable-binarizer = $moses-bin-dir/processPhraseTable
# conversion of rule table into binary on-disk format # conversion of rule table into binary on-disk format
#ttable-binarizer = "$moses-src-dir/dist/bin/CreateOnDiskPt 1 1 5 100 2" #ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 5 100 2"
# tokenizers - comment out if all your data is already tokenized # tokenizers - comment out if all your data is already tokenized
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension" input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
@ -95,7 +104,7 @@ max-sentence-length = 80
### raw corpus files (untokenized, but sentence aligned) ### raw corpus files (untokenized, but sentence aligned)
# #
raw-stem = $wmt10-data/training/europarl-v5.$pair-extension raw-stem = $wmt12-data/training/europarl-v7.$pair-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -112,10 +121,10 @@ raw-stem = $wmt10-data/training/europarl-v5.$pair-extension
#lowercased-stem = #lowercased-stem =
[CORPUS:nc] [CORPUS:nc]
raw-stem = $wmt10-data/training/news-commentary10.$pair-extension raw-stem = $wmt12-data/training/news-commentary-v7.$pair-extension
[CORPUS:un] IGNORE [CORPUS:un] IGNORE
raw-stem = $wmt10-data/training/undoc.2000.$pair-extension raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
################################################################# #################################################################
# LANGUAGE MODEL TRAINING # LANGUAGE MODEL TRAINING
@ -123,36 +132,41 @@ raw-stem = $wmt10-data/training/undoc.2000.$pair-extension
[LM] [LM]
### tool to be used for language model training ### tool to be used for language model training
# for instance: ngram-count (SRILM), train-lm-on-disk.perl (Edinburgh) # srilm
#
lm-training = $srilm-dir/ngram-count lm-training = $srilm-dir/ngram-count
settings = "-interpolate -kndiscount -unk" settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
#settings = ""
# order of the language model
order = 5 order = 5
### tool to be used for training randomized language model from scratch ### tool to be used for training randomized language model from scratch
# (more commonly, a SRILM is trained) # (more commonly, a SRILM is trained)
# #
#rlm-training = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #rlm-training = "$randlm-dir/buildlm -falsepos 8 -values 8"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary #lm-binarizer = $moses-bin-dir/build_binary
#type = 8 #type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
### each language model to be used has its own section here ### each language model to be used has its own section here
@ -164,7 +178,7 @@ order = 5
### raw corpus (untokenized) ### raw corpus (untokenized)
# #
raw-corpus = $wmt10-data/training/europarl-v5.$output-extension raw-corpus = $wmt12-data/training/europarl-v7.$output-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -176,19 +190,19 @@ raw-corpus = $wmt10-data/training/europarl-v5.$output-extension
#lm = #lm =
[LM:nc] [LM:nc]
raw-corpus = $wmt10-data/training/news-commentary10.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/news-commentary-v7.$pair-extension.$output-extension
[LM:un] IGNORE [LM:un] IGNORE
raw-corpus = $wmt10-data/training/undoc.2000.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/undoc.2000.$pair-extension.$output-extension
[LM:news] IGNORE [LM:news] IGNORE
raw-corpus = $wmt10-data/training/news.$output-extension.shuffled raw-corpus = $wmt12-data/training/news.$output-extension.shuffled
[LM:nc=pos] [LM:nc=pos]
factors = "pos" factors = "pos"
order = 7 order = 7
settings = "-interpolate -unk" settings = "-interpolate -unk"
raw-corpus = $wmt10-data/training/news-commentary10.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/news-commentary-v7.$pair-extension.$output-extension
################################################################# #################################################################
# INTERPOLATING LANGUAGE MODELS # INTERPOLATING LANGUAGE MODELS
@ -207,32 +221,36 @@ script = $moses-script-dir/ems/support/interpolate-lm.perl
### tuning set ### tuning set
# you may use the same set that is used for mert tuning (reference set) # you may use the same set that is used for mert tuning (reference set)
# #
tuning-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm tuning-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-tuning = #raw-tuning =
#tokenized-tuning = #tokenized-tuning =
#factored-tuning = #factored-tuning =
#lowercased-tuning = #lowercased-tuning =
#split-tuning = #split-tuning =
### group language models for hierarchical interpolation
# (flat interpolation is limited to 10 language models)
#group = "first,second fourth,fifth"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary #lm-binarizer = $moses-bin-dir/build_binary
#type = 8 #type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
################################################################# #################################################################
# FACTOR DEFINITION # FACTOR DEFINITION
@ -275,12 +293,18 @@ reordering-factors = "word -> word"
#generation-factors = #generation-factors =
decoding-steps = "t0" decoding-steps = "t0"
### parallelization of data preparation step
# the two directions of the data preparation can be run in parallel
# comment out if not needed
#
parallel = yes
### pre-computation for giza++ ### pre-computation for giza++
# giza++ has a more efficient data structure that needs to be # giza++ has a more efficient data structure that needs to be
# initialized with snt2cooc. if run in parallel, this may reduces # initialized with snt2cooc. if run in parallel, this may reduces
# memory requirements. set here the number of parts # memory requirements. set here the number of parts
# #
run-giza-in-parts = 5 #run-giza-in-parts = 5
### symmetrization method to obtain word alignments from giza output ### symmetrization method to obtain word alignments from giza output
# (commonly used: grow-diag-final-and) # (commonly used: grow-diag-final-and)
@ -354,7 +378,7 @@ score-settings = "--GoodTuring"
# point to a configuration file that contains # point to a configuration file that contains
# pointers to all relevant model files # pointers to all relevant model files
# #
#config = #config-with-reused-weights =
##################################################### #####################################################
### TUNING: finding good weights for model components ### TUNING: finding good weights for model components
@ -369,18 +393,18 @@ score-settings = "--GoodTuring"
### tuning script to be used ### tuning script to be used
# #
tuning-script = $moses-script-dir/training/mert-moses.pl tuning-script = $moses-script-dir/training/mert-moses.pl
tuning-settings = "-mertdir $moses-src-dir/mert" tuning-settings = "-mertdir $moses-bin-dir"
### specify the corpus used for tuning ### specify the corpus used for tuning
# it should contain 1000s of sentences # it should contain 1000s of sentences
# #
input-sgm = $wmt10-data/dev/news-test2008-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2010-src.$input-extension.sgm
#raw-input = #raw-input =
#tokenized-input = #tokenized-input =
#factorized-input = #factorized-input =
#input = #input =
# #
reference-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-reference = #raw-reference =
#tokenized-reference = #tokenized-reference =
#factorized-reference = #factorized-reference =
@ -415,7 +439,7 @@ decoder-settings = ""
[RECASING] [RECASING]
#decoder = $moses-src-dir/moses-cmd/src/moses.1521.srilm #decoder = $moses-bin-dir/moses
### training data ### training data
# raw input needs to be still tokenized, # raw input needs to be still tokenized,
@ -462,6 +486,11 @@ trainer = $moses-script-dir/recaser/train-truecaser.perl
### additional decoder settings ### additional decoder settings
# switches for the Moses decoder # switches for the Moses decoder
# common choices:
# "-threads N" for multi-threading
# "-mbr" for MBR decoding
# "-drop-unknown" for dropping unknown source words
# "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" for cube pruning
# #
decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000"
@ -484,8 +513,8 @@ wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension
### BLEU ### BLEU
# #
nist-bleu = $moses-script-dir/generic/mteval-v12.pl nist-bleu = $moses-script-dir/generic/mteval-v13a.pl
nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c" nist-bleu-c = "$moses-script-dir/generic/mteval-v13a.pl -c"
#multi-bleu = $moses-script-dir/generic/multi-bleu.perl #multi-bleu = $moses-script-dir/generic/multi-bleu.perl
#ibm-bleu = #ibm-bleu =
@ -516,11 +545,11 @@ report-segmentation = yes
# further precision breakdown by factor # further precision breakdown by factor
#precision-by-coverage-factor = pos #precision-by-coverage-factor = pos
[EVALUATION:newstest2009] [EVALUATION:newstest2011]
### input data ### input data
# #
input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2011-src.$input-extension.sgm
# raw-input = # raw-input =
# tokenized-input = # tokenized-input =
# factorized-input = # factorized-input =
@ -528,7 +557,7 @@ input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm
### reference data ### reference data
# #
reference-sgm = $wmt10-data/dev/newstest2009-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2011-ref.$output-extension.sgm
# raw-reference = # raw-reference =
# tokenized-reference = # tokenized-reference =
# reference = # reference =

View File

@ -18,25 +18,34 @@ pair-extension = fr-en
# moses # moses
moses-src-dir = /home/pkoehn/moses moses-src-dir = /home/pkoehn/moses
# #
# moses binaries
moses-bin-dir = $moses-src-dir/dist/bin
#
# moses scripts # moses scripts
moses-script-dir = /home/pkoehn/moses/scripts moses-script-dir = $moses-src-dir/scripts
# #
# srilm # srilm
srilm-dir = $moses-src-dir/srilm/bin/i686 srilm-dir = $moses-src-dir/srilm/bin/i686
# #
# irstlm
irstlm-dir = $moses-src-dir/irstlm/bin
#
# randlm
randlm-dir = $moses-src-dir/randlm/bin
#
# data # data
wmt10-data = $working-dir/data wmt12-data = $working-dir/data
### basic tools ### basic tools
# #
# moses decoder # moses decoder
decoder = $moses-src-dir/dist/bin/moses_chart decoder = $moses-bin-dir/moses_chart
# conversion of phrase table into binary on-disk format # conversion of phrase table into binary on-disk format
#ttable-binarizer = $moses-src-dir/dist/bin/processPhraseTable #ttable-binarizer = $moses-bin-dir/processPhraseTable
# conversion of rule table into binary on-disk format # conversion of rule table into binary on-disk format
ttable-binarizer = "$moses-src-dir/dist/bin/CreateOnDiskPt 1 1 5 100 2" ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 5 100 2"
# tokenizers - comment out if all your data is already tokenized # tokenizers - comment out if all your data is already tokenized
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension" input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
@ -95,7 +104,7 @@ max-sentence-length = 80
### raw corpus files (untokenized, but sentence aligned) ### raw corpus files (untokenized, but sentence aligned)
# #
raw-stem = $wmt10-data/training/europarl-v5.$pair-extension raw-stem = $wmt12-data/training/europarl-v7.$pair-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -112,10 +121,10 @@ raw-stem = $wmt10-data/training/europarl-v5.$pair-extension
#lowercased-stem = #lowercased-stem =
[CORPUS:nc] [CORPUS:nc]
raw-stem = $wmt10-data/training/news-commentary10.$pair-extension raw-stem = $wmt12-data/training/news-commentary-v7.$pair-extension
[CORPUS:un] IGNORE [CORPUS:un] IGNORE
raw-stem = $wmt10-data/training/undoc.2000.$pair-extension raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
################################################################# #################################################################
# LANGUAGE MODEL TRAINING # LANGUAGE MODEL TRAINING
@ -123,36 +132,41 @@ raw-stem = $wmt10-data/training/undoc.2000.$pair-extension
[LM] [LM]
### tool to be used for language model training ### tool to be used for language model training
# for instance: ngram-count (SRILM), train-lm-on-disk.perl (Edinburgh) # srilm
#
lm-training = $srilm-dir/ngram-count lm-training = $srilm-dir/ngram-count
settings = "-interpolate -kndiscount -unk" settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
#settings = ""
# order of the language model
order = 5 order = 5
### tool to be used for training randomized language model from scratch ### tool to be used for training randomized language model from scratch
# (more commonly, a SRILM is trained) # (more commonly, a SRILM is trained)
# #
#rlm-training = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #rlm-training = "$randlm-dir/buildlm -falsepos 8 -values 8"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
### each language model to be used has its own section here ### each language model to be used has its own section here
@ -164,7 +178,7 @@ order = 5
### raw corpus (untokenized) ### raw corpus (untokenized)
# #
raw-corpus = $wmt10-data/training/europarl-v5.$output-extension raw-corpus = $wmt12-data/training/europarl-v7.$output-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -176,13 +190,13 @@ raw-corpus = $wmt10-data/training/europarl-v5.$output-extension
#lm = #lm =
[LM:nc] [LM:nc]
raw-corpus = $wmt10-data/training/news-commentary10.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/news-commentary-v7.$pair-extension.$output-extension
[LM:un] IGNORE [LM:un] IGNORE
raw-corpus = $wmt10-data/training/undoc.2000.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/undoc.2000.$pair-extension.$output-extension
[LM:news] IGNORE [LM:news] IGNORE
raw-corpus = $wmt10-data/training/news.$output-extension.shuffled raw-corpus = $wmt12-data/training/news.$output-extension.shuffled
################################################################# #################################################################
@ -202,32 +216,36 @@ script = $moses-script-dir/ems/support/interpolate-lm.perl
### tuning set ### tuning set
# you may use the same set that is used for mert tuning (reference set) # you may use the same set that is used for mert tuning (reference set)
# #
tuning-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm tuning-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-tuning = #raw-tuning =
#tokenized-tuning = #tokenized-tuning =
#factored-tuning = #factored-tuning =
#lowercased-tuning = #lowercased-tuning =
#split-tuning = #split-tuning =
### group language models for hierarchical interpolation
# (flat interpolation is limited to 10 language models)
#group = "first,second fourth,fifth"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
################################################################# #################################################################
# TRANSLATION MODEL TRAINING # TRANSLATION MODEL TRAINING
@ -255,12 +273,18 @@ script = $moses-script-dir/training/train-model.perl
#generation-factors = "word -> pos" #generation-factors = "word -> pos"
#decoding-steps = "t0, g0" #decoding-steps = "t0, g0"
### parallelization of data preparation step
# the two directions of the data preparation can be run in parallel
# comment out if not needed
#
parallel = yes
### pre-computation for giza++ ### pre-computation for giza++
# giza++ has a more efficient data structure that needs to be # giza++ has a more efficient data structure that needs to be
# initialized with snt2cooc. if run in parallel, this may reduces # initialized with snt2cooc. if run in parallel, this may reduces
# memory requirements. set here the number of parts # memory requirements. set here the number of parts
# #
run-giza-in-parts = 5 #run-giza-in-parts = 5
### symmetrization method to obtain word alignments from giza output ### symmetrization method to obtain word alignments from giza output
# (commonly used: grow-diag-final-and) # (commonly used: grow-diag-final-and)
@ -334,7 +358,7 @@ score-settings = "--GoodTuring"
# point to a configuration file that contains # point to a configuration file that contains
# pointers to all relevant model files # pointers to all relevant model files
# #
#config = #config-with-reused-weights =
##################################################### #####################################################
### TUNING: finding good weights for model components ### TUNING: finding good weights for model components
@ -349,18 +373,18 @@ score-settings = "--GoodTuring"
### tuning script to be used ### tuning script to be used
# #
tuning-script = $moses-script-dir/training/mert-moses.pl tuning-script = $moses-script-dir/training/mert-moses.pl
tuning-settings = "-mertdir $moses-src-dir/mert" tuning-settings = "-mertdir $moses-bin-dir"
### specify the corpus used for tuning ### specify the corpus used for tuning
# it should contain 1000s of sentences # it should contain 1000s of sentences
# #
input-sgm = $wmt10-data/dev/news-test2008-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2010-src.$input-extension.sgm
#raw-input = #raw-input =
#tokenized-input = #tokenized-input =
#factorized-input = #factorized-input =
#input = #input =
# #
reference-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-reference = #raw-reference =
#tokenized-reference = #tokenized-reference =
#factorized-reference = #factorized-reference =
@ -395,7 +419,7 @@ decoder-settings = ""
[RECASING] [RECASING]
#decoder = $moses-src-dir/moses-cmd/src/moses.1521.srilm #decoder = $moses-bin-dir/moses
### training data ### training data
# raw input needs to be still tokenized, # raw input needs to be still tokenized,
@ -442,6 +466,11 @@ trainer = $moses-script-dir/recaser/train-truecaser.perl
### additional decoder settings ### additional decoder settings
# switches for the Moses decoder # switches for the Moses decoder
# common choices:
# "-threads N" for multi-threading
# "-mbr" for MBR decoding
# "-drop-unknown" for dropping unknown source words
# "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" for cube pruning
# #
#decoder-settings = "" #decoder-settings = ""
@ -464,8 +493,8 @@ wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension
### BLEU ### BLEU
# #
nist-bleu = $moses-script-dir/generic/mteval-v12.pl nist-bleu = $moses-script-dir/generic/mteval-v13a.pl
nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c" nist-bleu-c = "$moses-script-dir/generic/mteval-v13a.pl -c"
#multi-bleu = $moses-script-dir/generic/multi-bleu.perl #multi-bleu = $moses-script-dir/generic/multi-bleu.perl
#ibm-bleu = #ibm-bleu =
@ -496,11 +525,11 @@ report-segmentation = yes
# further precision breakdown by factor # further precision breakdown by factor
#precision-by-coverage-factor = pos #precision-by-coverage-factor = pos
[EVALUATION:newstest2009] [EVALUATION:newstest2011]
### input data ### input data
# #
input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2011-src.$input-extension.sgm
# raw-input = # raw-input =
# tokenized-input = # tokenized-input =
# factorized-input = # factorized-input =
@ -508,7 +537,7 @@ input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm
### reference data ### reference data
# #
reference-sgm = $wmt10-data/dev/newstest2009-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2011-ref.$output-extension.sgm
# raw-reference = # raw-reference =
# tokenized-reference = # tokenized-reference =
# reference = # reference =

View File

@ -18,25 +18,34 @@ pair-extension = fr-en
# moses # moses
moses-src-dir = /home/pkoehn/moses moses-src-dir = /home/pkoehn/moses
# #
# moses binaries
moses-bin-dir = $moses-src-dir/dist/bin
#
# moses scripts # moses scripts
moses-script-dir = /home/pkoehn/moses/scripts moses-script-dir = $moses-src-dir/scripts
# #
# srilm # srilm
srilm-dir = $moses-src-dir/srilm/bin/i686 srilm-dir = $moses-src-dir/srilm/bin/i686
# #
# irstlm
irstlm-dir = $moses-src-dir/irstlm/bin
#
# randlm
randlm-dir = $moses-src-dir/randlm/bin
#
# data # data
wmt10-data = $working-dir/data wmt12-data = $working-dir/data
### basic tools ### basic tools
# #
# moses decoder # moses decoder
decoder = $moses-src-dir/dist/bin/moses_chart decoder = $moses-bin-dir/moses_chart
# conversion of phrase table into binary on-disk format # conversion of phrase table into binary on-disk format
#ttable-binarizer = $moses-src-dir/dist/bin/processPhraseTable #ttable-binarizer = $moses-bin-dir/processPhraseTable
# conversion of rule table into binary on-disk format # conversion of rule table into binary on-disk format
ttable-binarizer = "$moses-src-dir/dist/bin/CreateOnDiskPt 1 1 5 100 2" ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 5 100 2"
# tokenizers - comment out if all your data is already tokenized # tokenizers - comment out if all your data is already tokenized
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension" input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
@ -99,7 +108,7 @@ max-sentence-length = 80
### raw corpus files (untokenized, but sentence aligned) ### raw corpus files (untokenized, but sentence aligned)
# #
raw-stem = $wmt10-data/training/europarl-v5.$pair-extension raw-stem = $wmt12-data/training/europarl-v7.$pair-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -116,10 +125,10 @@ raw-stem = $wmt10-data/training/europarl-v5.$pair-extension
#lowercased-stem = #lowercased-stem =
[CORPUS:nc] [CORPUS:nc]
raw-stem = $wmt10-data/training/news-commentary10.$pair-extension raw-stem = $wmt12-data/training/news-commentary-v7.$pair-extension
[CORPUS:un] IGNORE [CORPUS:un] IGNORE
raw-stem = $wmt10-data/training/undoc.2000.$pair-extension raw-stem = $wmt12-data/training/undoc.2000.$pair-extension
################################################################# #################################################################
# LANGUAGE MODEL TRAINING # LANGUAGE MODEL TRAINING
@ -127,36 +136,41 @@ raw-stem = $wmt10-data/training/undoc.2000.$pair-extension
[LM] [LM]
### tool to be used for language model training ### tool to be used for language model training
# for instance: ngram-count (SRILM), train-lm-on-disk.perl (Edinburgh) # srilm
#
lm-training = $srilm-dir/ngram-count lm-training = $srilm-dir/ngram-count
settings = "-interpolate -kndiscount -unk" settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
#settings = ""
# order of the language model
order = 5 order = 5
### tool to be used for training randomized language model from scratch ### tool to be used for training randomized language model from scratch
# (more commonly, a SRILM is trained) # (more commonly, a SRILM is trained)
# #
#rlm-training = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #rlm-training = "$randlm-dir/buildlm -falsepos 8 -values 8"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
### each language model to be used has its own section here ### each language model to be used has its own section here
@ -168,7 +182,7 @@ order = 5
### raw corpus (untokenized) ### raw corpus (untokenized)
# #
raw-corpus = $wmt10-data/training/europarl-v5.$output-extension raw-corpus = $wmt12-data/training/europarl-v7.$output-extension
### tokenized corpus files (may contain long sentences) ### tokenized corpus files (may contain long sentences)
# #
@ -180,13 +194,13 @@ raw-corpus = $wmt10-data/training/europarl-v5.$output-extension
#lm = #lm =
[LM:nc] [LM:nc]
raw-corpus = $wmt10-data/training/news-commentary10.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/news-commentary-v7.$pair-extension.$output-extension
[LM:un] IGNORE [LM:un] IGNORE
raw-corpus = $wmt10-data/training/undoc.2000.$pair-extension.$output-extension raw-corpus = $wmt12-data/training/undoc.2000.$pair-extension.$output-extension
[LM:news] IGNORE [LM:news] IGNORE
raw-corpus = $wmt10-data/training/news.$output-extension.shuffled raw-corpus = $wmt12-data/training/news.$output-extension.shuffled
################################################################# #################################################################
@ -206,32 +220,36 @@ script = $moses-script-dir/ems/support/interpolate-lm.perl
### tuning set ### tuning set
# you may use the same set that is used for mert tuning (reference set) # you may use the same set that is used for mert tuning (reference set)
# #
tuning-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm tuning-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-tuning = #raw-tuning =
#tokenized-tuning = #tokenized-tuning =
#factored-tuning = #factored-tuning =
#lowercased-tuning = #lowercased-tuning =
#split-tuning = #split-tuning =
### group language models for hierarchical interpolation
# (flat interpolation is limited to 10 language models)
#group = "first,second fourth,fifth"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
################################################################# #################################################################
# TRANSLATION MODEL TRAINING # TRANSLATION MODEL TRAINING
@ -259,12 +277,18 @@ script = $moses-script-dir/training/train-model.perl
#generation-factors = "word -> pos" #generation-factors = "word -> pos"
#decoding-steps = "t0, g0" #decoding-steps = "t0, g0"
### parallelization of data preparation step
# the two directions of the data preparation can be run in parallel
# comment out if not needed
#
parallel = yes
### pre-computation for giza++ ### pre-computation for giza++
# giza++ has a more efficient data structure that needs to be # giza++ has a more efficient data structure that needs to be
# initialized with snt2cooc. if run in parallel, this may reduces # initialized with snt2cooc. if run in parallel, this may reduces
# memory requirements. set here the number of parts # memory requirements. set here the number of parts
# #
run-giza-in-parts = 5 #run-giza-in-parts = 5
### symmetrization method to obtain word alignments from giza output ### symmetrization method to obtain word alignments from giza output
# (commonly used: grow-diag-final-and) # (commonly used: grow-diag-final-and)
@ -338,7 +362,7 @@ score-settings = "--GoodTuring"
# point to a configuration file that contains # point to a configuration file that contains
# pointers to all relevant model files # pointers to all relevant model files
# #
#config = #config-with-reused-weights =
##################################################### #####################################################
### TUNING: finding good weights for model components ### TUNING: finding good weights for model components
@ -353,18 +377,18 @@ score-settings = "--GoodTuring"
### tuning script to be used ### tuning script to be used
# #
tuning-script = $moses-script-dir/training/mert-moses.pl tuning-script = $moses-script-dir/training/mert-moses.pl
tuning-settings = "-mertdir $moses-src-dir/mert" tuning-settings = "-mertdir $moses-bin-dir"
### specify the corpus used for tuning ### specify the corpus used for tuning
# it should contain 1000s of sentences # it should contain 1000s of sentences
# #
input-sgm = $wmt10-data/dev/news-test2008-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2010-src.$input-extension.sgm
#raw-input = #raw-input =
#tokenized-input = #tokenized-input =
#factorized-input = #factorized-input =
#input = #input =
# #
reference-sgm = $wmt10-data/dev/news-test2008-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2010-ref.$output-extension.sgm
#raw-reference = #raw-reference =
#tokenized-reference = #tokenized-reference =
#factorized-reference = #factorized-reference =
@ -399,7 +423,7 @@ decoder-settings = ""
[RECASING] [RECASING]
#decoder = $moses-src-dir/moses-cmd/src/moses.1521.srilm #decoder = $moses-bin-dir/moses
### training data ### training data
# raw input needs to be still tokenized, # raw input needs to be still tokenized,
@ -446,6 +470,11 @@ trainer = $moses-script-dir/recaser/train-truecaser.perl
### additional decoder settings ### additional decoder settings
# switches for the Moses decoder # switches for the Moses decoder
# common choices:
# "-threads N" for multi-threading
# "-mbr" for MBR decoding
# "-drop-unknown" for dropping unknown source words
# "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" for cube pruning
# #
#decoder-settings = "" #decoder-settings = ""
@ -468,8 +497,8 @@ wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension
### BLEU ### BLEU
# #
nist-bleu = $moses-script-dir/generic/mteval-v12.pl nist-bleu = $moses-script-dir/generic/mteval-v13a.pl
nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c" nist-bleu-c = "$moses-script-dir/generic/mteval-v13a.pl -c"
#multi-bleu = $moses-script-dir/generic/multi-bleu.perl #multi-bleu = $moses-script-dir/generic/multi-bleu.perl
#ibm-bleu = #ibm-bleu =
@ -500,11 +529,11 @@ report-segmentation = yes
# further precision breakdown by factor # further precision breakdown by factor
#precision-by-coverage-factor = pos #precision-by-coverage-factor = pos
[EVALUATION:newstest2009] [EVALUATION:newstest2011]
### input data ### input data
# #
input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm input-sgm = $wmt12-data/dev/newstest2011-src.$input-extension.sgm
# raw-input = # raw-input =
# tokenized-input = # tokenized-input =
# factorized-input = # factorized-input =
@ -512,7 +541,7 @@ input-sgm = $wmt10-data/dev/newstest2009-src.$input-extension.sgm
### reference data ### reference data
# #
reference-sgm = $wmt10-data/dev/newstest2009-ref.$output-extension.sgm reference-sgm = $wmt12-data/dev/newstest2011-ref.$output-extension.sgm
# raw-reference = # raw-reference =
# tokenized-reference = # tokenized-reference =
# reference = # reference =

View File

@ -18,25 +18,34 @@ pair-extension = fr-en
# moses # moses
moses-src-dir = /home/pkoehn/moses moses-src-dir = /home/pkoehn/moses
# #
# moses binaries
moses-bin-dir = $moses-src-dir/dist/bin
#
# moses scripts # moses scripts
moses-script-dir = /home/pkoehn/moses/scripts moses-script-dir = $moses-src-dir/scripts
# #
# srilm # srilm
srilm-dir = $moses-src-dir/srilm/bin/i686 srilm-dir = $moses-src-dir/srilm/bin/i686
# #
# irstlm
irstlm-dir = $moses-src-dir/irstlm/bin
#
# randlm
randlm-dir = $moses-src-dir/randlm/bin
#
# data # data
toy-data = $moses-script-dir/ems/example/data toy-data = $moses-script-dir/ems/example/data
### basic tools ### basic tools
# #
# moses decoder # moses decoder
decoder = $moses-src-dir/dist/bin/moses decoder = $moses-bin-dir/moses
# conversion of phrase table into binary on-disk format # conversion of phrase table into binary on-disk format
ttable-binarizer = $moses-src-dir/dist/bin/processPhraseTable ttable-binarizer = $moses-bin-dir/processPhraseTable
# conversion of rule table into binary on-disk format # conversion of rule table into binary on-disk format
#ttable-binarizer = "$moses-src-dir/dist/bin/CreateOnDiskPt 1 1 5 100 2" #ttable-binarizer = "$moses-bin-dir/CreateOnDiskPt 1 1 5 100 2"
# tokenizers - comment out if all your data is already tokenized # tokenizers - comment out if all your data is already tokenized
input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension" input-tokenizer = "$moses-script-dir/tokenizer/tokenizer.perl -a -l $input-extension"
@ -117,36 +126,41 @@ raw-stem = $toy-data/nc-5k
[LM] [LM]
### tool to be used for language model training ### tool to be used for language model training
# for instance: ngram-count (SRILM), train-lm-on-disk.perl (Edinburgh) # srilm
#
lm-training = $srilm-dir/ngram-count lm-training = $srilm-dir/ngram-count
settings = "-interpolate -kndiscount -unk" settings = "-interpolate -kndiscount -unk"
# irstlm
#lm-training = "$moses-script-dir/generic/trainlm-irst.perl -cores $cores -irst-dir $irstlm-dir -temp-dir $working-dir/lm"
#settings = ""
# order of the language model
order = 5 order = 5
### tool to be used for training randomized language model from scratch ### tool to be used for training randomized language model from scratch
# (more commonly, a SRILM is trained) # (more commonly, a SRILM is trained)
# #
#rlm-training = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #rlm-training = "$randlm-dir/buildlm -falsepos 8 -values 8"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
### each language model to be used has its own section here ### each language model to be used has its own section here
@ -193,25 +207,29 @@ raw-corpus = $toy-data/nc-5k.$output-extension
#lowercased-tuning = #lowercased-tuning =
#split-tuning = #split-tuning =
### group language models for hierarchical interpolation
# (flat interpolation is limited to 10 language models)
#group = "first,second fourth,fifth"
### script to use for binary table format for irstlm or kenlm ### script to use for binary table format for irstlm or kenlm
# (default: no binarization) # (default: no binarization)
# irstlm # irstlm
#lm-binarizer = $moses-src-dir/irstlm/bin/compile-lm #lm-binarizer = $irstlm-dir/compile-lm
# kenlm, also set type to 8 # kenlm, also set type to 8
#lm-binarizer = $moses-src-dir/dist/bin/build_binary lm-binarizer = $moses-bin-dir/build_binary
#type = 8 type = 8
### script to create quantized language model format (irstlm) ### script to create quantized language model format (irstlm)
# (default: no quantization) # (default: no quantization)
# #
#lm-quantizer = $moses-src-dir/irstlm/bin/quantize-lm #lm-quantizer = $irstlm-dir/quantize-lm
### script to use for converting into randomized table format ### script to use for converting into randomized table format
# (default: no randomization) # (default: no randomization)
# #
#lm-randomizer = "$moses-src-dir/randlm/bin/buildlm -falsepos 8 -values 8" #lm-randomizer = "$randlm-dir/buildlm -falsepos 8 -values 8"
################################################################# #################################################################
# TRANSLATION MODEL TRAINING # TRANSLATION MODEL TRAINING
@ -239,12 +257,18 @@ script = $moses-script-dir/training/train-model.perl
#generation-factors = "word -> pos" #generation-factors = "word -> pos"
#decoding-steps = "t0, g0" #decoding-steps = "t0, g0"
### parallelization of data preparation step
# the two directions of the data preparation can be run in parallel
# comment out if not needed
#
parallel = yes
### pre-computation for giza++ ### pre-computation for giza++
# giza++ has a more efficient data structure that needs to be # giza++ has a more efficient data structure that needs to be
# initialized with snt2cooc. if run in parallel, this may reduces # initialized with snt2cooc. if run in parallel, this may reduces
# memory requirements. set here the number of parts # memory requirements. set here the number of parts
# #
run-giza-in-parts = 5 #run-giza-in-parts = 5
### symmetrization method to obtain word alignments from giza output ### symmetrization method to obtain word alignments from giza output
# (commonly used: grow-diag-final-and) # (commonly used: grow-diag-final-and)
@ -318,7 +342,7 @@ score-settings = "--GoodTuring"
# point to a configuration file that contains # point to a configuration file that contains
# pointers to all relevant model files # pointers to all relevant model files
# #
#config = #config-with-reused-weights =
##################################################### #####################################################
### TUNING: finding good weights for model components ### TUNING: finding good weights for model components
@ -333,7 +357,7 @@ weight-config = $toy-data/weight.ini
### tuning script to be used ### tuning script to be used
# #
tuning-script = $moses-script-dir/training/mert-moses.pl tuning-script = $moses-script-dir/training/mert-moses.pl
tuning-settings = "-mertdir $moses-src-dir/mert" tuning-settings = "-mertdir $moses-bin-dir"
### specify the corpus used for tuning ### specify the corpus used for tuning
# it should contain 1000s of sentences # it should contain 1000s of sentences
@ -379,7 +403,7 @@ decoder-settings = ""
[RECASING] [RECASING]
#decoder = $moses-src-dir/moses-cmd/src/moses.1521.srilm #decoder = $moses-bin-dir/moses
### training data ### training data
# raw input needs to be still tokenized, # raw input needs to be still tokenized,
@ -422,6 +446,11 @@ trainer = $moses-script-dir/recaser/train-truecaser.perl
### additional decoder settings ### additional decoder settings
# switches for the Moses decoder # switches for the Moses decoder
# common choices:
# "-threads N" for multi-threading
# "-mbr" for MBR decoding
# "-drop-unknown" for dropping unknown source words
# "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" for cube pruning
# #
decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000" decoder-settings = "-search-algorithm 1 -cube-pruning-pop-limit 5000 -s 5000"
@ -444,8 +473,8 @@ wrapping-script = "$moses-script-dir/ems/support/wrap-xml.perl $output-extension
### BLEU ### BLEU
# #
nist-bleu = $moses-script-dir/generic/mteval-v12.pl nist-bleu = $moses-script-dir/generic/mteval-v13a.pl
nist-bleu-c = "$moses-script-dir/generic/mteval-v12.pl -c" nist-bleu-c = "$moses-script-dir/generic/mteval-v13a.pl -c"
#multi-bleu = $moses-script-dir/generic/multi-bleu.perl #multi-bleu = $moses-script-dir/generic/multi-bleu.perl
#ibm-bleu = #ibm-bleu =

View File

@ -207,6 +207,7 @@ binarize
rerun-on-change: lm rerun-on-change: lm
default-name: lm/binlm default-name: lm/binlm
template: $lm-binarizer IN OUT template: $lm-binarizer IN OUT
error: set kMaxOrder to at least this value
[INTERPOLATED-LM] single [INTERPOLATED-LM] single
tuning-from-sgm tuning-from-sgm
@ -253,27 +254,27 @@ split-tuning
template: $output-splitter -model IN1.$output-extension < IN > OUT template: $output-splitter -model IN1.$output-extension < IN > OUT
interpolate interpolate
in: script split-tuning LM:lm in: script split-tuning LM:lm
rerun-on-change: srilm-dir rerun-on-change: srilm-dir group
out: lm out: lm
default-name: lm/interpolated-lm default-name: lm/interpolated-lm
randomize randomize
in: lm in: lm
out: rlm out: rlm
pass-unless: lm-randomizer pass-unless: lm-randomizer
default-name: lm/rlm default-name: lm/interpolated-rlm
quantize quantize
in: rlm in: rlm
out: qlm out: qlm
pass-unless: lm-quantizer pass-unless: lm-quantizer
default-name: lm/interpolated-qlm default-name: lm/interpolated-qlm
template: $lm-quantizer IN OUT
binarize binarize
in: qlm in: qlm
out: binlm out: binlm
pass-unless: lm-binarizer pass-unless: lm-binarizer
ignore-unless: script
rerun-on-change: lm rerun-on-change: lm
default-name: lm/interpolated-binlm default-name: lm/interpolated-binlm
template: $lm-binarizer IN OUT error: set kMaxOrder to at least this value
[TRAINING] single [TRAINING] single
consolidate consolidate
@ -372,17 +373,9 @@ build-generation-custom
ignore-unless: AND generation-factors generation-corpus ignore-unless: AND generation-factors generation-corpus
default-name: model/generation-table default-name: model/generation-table
create-config create-config
in: reordering-table phrase-translation-table generation-table LM:binlm in: reordering-table phrase-translation-table generation-table INTERPOLATED-LM:binlm LM:binlm
out: config
ignore-if: use-hiero INTERPOLATED-LM:script
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings
default-name: model/moses.ini
error: Unknown option
create-config-interpolated-lm
in: reordering-table phrase-translation-table generation-table INTERPOLATED-LM:binlm
out: config out: config
ignore-if: use-hiero ignore-if: use-hiero
ignore-unless: INTERPOLATED-LM:script
rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings
default-name: model/moses.ini default-name: model/moses.ini
error: Unknown option error: Unknown option
@ -754,11 +747,12 @@ wer
rerun-on-change: wer rerun-on-change: wer
template: $wer IN IN1 > OUT template: $wer IN IN1 > OUT
meteor meteor
in: wrapped-output reference-sgm in: cleaned-output reference
out: meteor-score out: meteor-score
default-name: evaluation/detokenized.sgm.METEOR default-name: evaluation/meteor
ignore-unless: meteor ignore-unless: meteor
rerun-on-change: meteor rerun-on-change: meteor
template: $meteor IN IN1 $meteor-params > OUT
analysis analysis
in: recased-output reference input in: recased-output reference input
out: analysis out: analysis

View File

@ -934,7 +934,12 @@ sub define_step {
&define_training_create_config($i); &define_training_create_config($i);
} }
elsif ($DO_STEP[$i] eq 'INTERPOLATED-LM:interpolate') { elsif ($DO_STEP[$i] eq 'INTERPOLATED-LM:interpolate') {
&define_training_interpolated_lm_interpolate($i); &define_interpolated_lm_interpolate($i);
}
elsif ($DO_STEP[$i] eq 'INTERPOLATED-LM:binarize' ||
$DO_STEP[$i] eq 'INTERPOLATED-LM:quantize' ||
$DO_STEP[$i] eq 'INTERPOLATED-LM:randomize') {
&define_interpolated_lm_process($i);
} }
elsif ($DO_STEP[$i] eq 'TUNING:factorize-input') { elsif ($DO_STEP[$i] eq 'TUNING:factorize-input') {
&define_tuningevaluation_factorize($i); &define_tuningevaluation_factorize($i);
@ -991,6 +996,9 @@ sub execute_steps {
while(1) { while(1) {
# find steps to be done # find steps to be done
my $repeat_if_passed = 1;
while($repeat_if_passed) {
$repeat_if_passed = 0;
for(my $i=0;$i<=$#DO_STEP;$i++) { for(my $i=0;$i<=$#DO_STEP;$i++) {
next if (defined($DONE{$i})); next if (defined($DONE{$i}));
next if (defined($DO{$i})); next if (defined($DO{$i}));
@ -1000,10 +1008,19 @@ sub execute_steps {
foreach my $prev_step (@{$DEPENDENCY[$i]}) { foreach my $prev_step (@{$DEPENDENCY[$i]}) {
$doable = 0 if !defined($DONE{$prev_step}); $doable = 0 if !defined($DONE{$prev_step});
} }
$DO{$i} = 1 if $doable; next unless $doable;
$DO{$i} = 1;
# immediately label pass steps as done
next unless defined($PASS{$i});
$DONE{$i} = 1;
delete($DO{$i});
$repeat_if_passed = 1;
}
} }
print "number of steps doable or running: ".(scalar keys %DO)."\n"; print "number of steps doable or running: ".(scalar keys %DO)."\n";
foreach my $step (keys %DO) { print "\t".($DO{$step}==2?"running: ":"doable: ").$DO_STEP[$step]."\n"; }
return unless scalar keys %DO; return unless scalar keys %DO;
# execute new step # execute new step
@ -1033,7 +1050,7 @@ sub execute_steps {
elsif ($CLUSTER || $active < $MAX_ACTIVE) { elsif ($CLUSTER || $active < $MAX_ACTIVE) {
$active++; $active++;
$DO{$i}++; $DO{$i}++;
print "sh ($active)\n"; print "sh ($active active)\n";
sleep(5); sleep(5);
if (!fork) { if (!fork) {
`sh $step >$step.STDOUT 2> $step.STDERR`; `sh $step >$step.STDOUT 2> $step.STDERR`;
@ -1275,7 +1292,8 @@ sub check_if_crashed {
foreach my $pattern (@{$ERROR{&defined_step_id($i)}}, foreach my $pattern (@{$ERROR{&defined_step_id($i)}},
'error','killed','core dumped','can\'t read', 'error','killed','core dumped','can\'t read',
'no such file or directory','unknown option', 'no such file or directory','unknown option',
'died at','exit code','permission denied') { 'died at','exit code','permission denied',
"Can't locate") {
if (/$pattern/i) { if (/$pattern/i) {
my $not_error = 0; my $not_error = 0;
if (defined($NOT_ERROR{&defined_step_id($i)})) { if (defined($NOT_ERROR{&defined_step_id($i)})) {
@ -1769,11 +1787,11 @@ sub define_training_create_config {
# find out which language model files have been built # find out which language model files have been built
my @LM_SETS = &get_sets("LM"); my @LM_SETS = &get_sets("LM");
my %INTERPOLATED_AWAY;
my %OUTPUT_FACTORS; my %OUTPUT_FACTORS;
%OUTPUT_FACTORS = &get_factor_id("output") if &backoff_and_get("TRAINING:output-factors"); %OUTPUT_FACTORS = &get_factor_id("output") if &backoff_and_get("TRAINING:output-factors");
my $interpolated = &get("INTERPOLATED-LM:script"); # flag if (&get("INTERPOLATED-LM:script")) {
if ($interpolated) {
my $type = 0; my $type = 0;
# binarizing the lm? # binarizing the lm?
$type = 1 if (&get("INTERPOLATED-LM:binlm") || $type = 1 if (&get("INTERPOLATED-LM:binlm") ||
@ -1783,23 +1801,32 @@ sub define_training_create_config {
&backoff_and_get("INTERPOLATED-LM:lm-randomizer")); &backoff_and_get("INTERPOLATED-LM:lm-randomizer"));
# manually set type # manually set type
$type = &get("INTERPOLATED-LM:type") if (&get("INTERPOLATED-LM:type")); $type = &get("INTERPOLATED-LM:type") if &get("INTERPOLATED-LM:type");
# order and factor inherited from individual LMs # go through each interpolated language model
my $set = shift @LM_SETS; my ($icount,$ILM_SETS) = &get_interpolated_lm_sets();
my $order = &check_backoff_and_get("LM:$set:order"); my $FACTOR = &backoff_and_get_array("TRAINING:output-factors");
my $factor = 0; foreach my $factor (keys %{$ILM_SETS}) {
if (&backoff_and_get("TRAINING:output-factors") && foreach my $order (keys %{$$ILM_SETS{$factor}}) {
&backoff_and_get("LM:$set:factors")) { next unless scalar(@{$$ILM_SETS{$factor}{$order}}) > 1;
$factor = $OUTPUT_FACTORS{&backoff_and_get("LM:$set:factors")}; my $suffix = "";
$suffix = ".$$FACTOR[$factor]" if $icount > 1 && defined($FACTOR);
$suffix .= ".order$order" if $icount > 1;
$cmd .= "-lm $factor:$order:$LM[0]$suffix:$type ";
foreach my $id_set (@{$$ILM_SETS{$factor}{$order}}) {
my ($id,$set) = split(/ /,$id_set,2);
$INTERPOLATED_AWAY{$set} = 1;
} }
$cmd .= "-lm $factor:$order:$LM[0]:$type ";
} }
else { }
}
shift @LM; # remove interpolated lm
die("ERROR: number of defined LM sets (".(scalar @LM_SETS).":".join(",",@LM_SETS).") and LM files (".(scalar @LM).":".join(",",@LM).") does not match") die("ERROR: number of defined LM sets (".(scalar @LM_SETS).":".join(",",@LM_SETS).") and LM files (".(scalar @LM).":".join(",",@LM).") does not match")
unless scalar @LM == scalar @LM_SETS; unless scalar @LM == scalar @LM_SETS;
foreach my $lm (@LM) { foreach my $lm (@LM) {
my $set = shift @LM_SETS; my $set = shift @LM_SETS;
next if defined($INTERPOLATED_AWAY{$set});
my $order = &check_backoff_and_get("LM:$set:order"); my $order = &check_backoff_and_get("LM:$set:order");
my $lm_file = "$lm"; my $lm_file = "$lm";
my $type = 0; # default: SRILM my $type = 0; # default: SRILM
@ -1825,53 +1852,142 @@ sub define_training_create_config {
$cmd .= "-lm $factor:$order:$lm_file:$type "; $cmd .= "-lm $factor:$order:$lm_file:$type ";
} }
}
&create_step($step_id,$cmd); &create_step($step_id,$cmd);
} }
sub define_training_interpolated_lm_interpolate { sub define_interpolated_lm_interpolate {
my ($step_id) = @_; my ($step_id) = @_;
my ($interpolated_lm, my ($interpolated_lm,
$interpolation_script, $tuning, @LM) $interpolation_script, $tuning, @LM) = &get_output_and_input($step_id);
= &get_output_and_input($step_id);
my $srilm_dir = &check_backoff_and_get("INTERPOLATED-LM:srilm-dir"); my $srilm_dir = &check_backoff_and_get("INTERPOLATED-LM:srilm-dir");
my $group = &get("INTERPOLATED-LM:group");
my $cmd = "";
# go through language models by factor and order
my ($icount,$ILM_SETS) = &get_interpolated_lm_sets();
foreach my $factor (keys %{$ILM_SETS}) {
foreach my $order (keys %{$$ILM_SETS{$factor}}) {
next unless scalar(@{$$ILM_SETS{$factor}{$order}}) > 1;
# get list of language model files
my $lm_list = ""; my $lm_list = "";
foreach (@LM) { foreach my $id_set (@{$$ILM_SETS{$factor}{$order}}) {
$lm_list .= $_.","; my ($id,$set) = split(/ /,$id_set,2);
$lm_list .= $LM[$id].",";
} }
chop($lm_list); chop($lm_list);
# sanity checks on order and factors # if grouping, identify position in list
my @LM_SETS = &get_sets("LM"); my $numbered_string = "";
my %OUTPUT_FACTORS; if (defined($group)) {
%OUTPUT_FACTORS = &get_factor_id("output") my %POSITION;
if &backoff_and_get("TRAINING:output-factors"); foreach my $id_set (@{$$ILM_SETS{$factor}{$order}}) {
my ($factor,$order); my ($id,$set) = split(/ /,$id_set,2);
foreach my $set (@LM_SETS) { $POSITION{$set} = scalar keys %POSITION;
my $set_order = &check_backoff_and_get("LM:$set:order"); }
if (defined($order) && $order != $set_order) { my $group_string = $group;
die("ERROR: language models have mismatching order - no interpolation possible!"); $group_string =~ s/\s+/ /g;
$group_string =~ s/ *, */,/g;
$group_string =~ s/^ //;
$group_string =~ s/ $//;
$group_string .= " ";
while($group_string =~ /^([^ ,]+)([ ,]+)(.*)$/) {
die("ERROR: unknown set $1 in INTERPOLATED-LM:group definition")
if ! defined($POSITION{$1});
$numbered_string .= $POSITION{$1}.$2;
$group_string = $3;
}
chop($numbered_string);
} }
$order = $set_order;
if (&backoff_and_get("TRAINING:output-factors") && my $FACTOR = &backoff_and_get_array("TRAINING:output-factors");
&backoff_and_get("LM:$set:factors")) { my $name = $interpolated_lm;
my $set_factor = $OUTPUT_FACTORS{&backoff_and_get("LM:$set:factors")}; if ($icount > 1) {
if (defined($factor) && $factor != $set_factor) { $name .= ".$$FACTOR[$factor]" if defined($FACTOR);
die("ERROR: language models have mismatching factors - no interpolation possible!"); $name .= ".order$order";
} }
$factor = $set_factor; $cmd .= "$interpolation_script --tuning $tuning --name $name --srilm $srilm_dir --lm $lm_list";
$cmd .= " --group \"$numbered_string\"" if defined($group);
$cmd .= "\n";
} }
} }
my $cmd = "$interpolation_script --tuning $tuning --name $interpolated_lm --srilm $srilm_dir --lm $lm_list"; die("ERROR: Nothing to interpolate, remove interpolation step!") if $cmd eq "";
&create_step($step_id,$cmd);
}
sub define_interpolated_lm_process {
my ($step_id) = @_;
my ($processed_lm, $interpolatd_lm) = &get_output_and_input($step_id);
my ($module,$set,$stepname) = &deconstruct_name($DO_STEP[$step_id]);
my $tool = &check_backoff_and_get("INTERPOLATED-LM:lm-${stepname}r");
my $FACTOR = &backoff_and_get_array("TRAINING:output-factors");
# go through language models by factor and order
my ($icount,$ILM_SETS) = &get_interpolated_lm_sets();
my $cmd = "";
foreach my $factor (keys %{$ILM_SETS}) {
foreach my $order (keys %{$$ILM_SETS{$factor}}) {
next unless scalar(@{$$ILM_SETS{$factor}{$order}}) > 1;
my $suffix = "";
$suffix = ".$$FACTOR[$factor]" if $icount > 1 && defined($FACTOR);
$suffix .= ".order$order" if $icount > 1;
$cmd .= "$tool $interpolatd_lm$suffix $processed_lm$suffix\n";
}
}
&create_step($step_id,$cmd); &create_step($step_id,$cmd);
} }
sub get_interpolated_lm_processed_names {
my ($processed_lm) = @_;
my @ILM_NAME;
my ($icount,$ILM_SETS) = &get_interpolated_lm_sets();
my $FACTOR = &backoff_and_get_array("TRAINING:output-factors");
foreach my $factor (keys %{$ILM_SETS}) {
foreach my $order (keys %{$$ILM_SETS{$factor}}) {
if (scalar(@{$$ILM_SETS{$factor}{$order}}) > 1) {
my $suffix = "";
$suffix = ".$$FACTOR[$factor]" if $icount > 1 && defined($FACTOR);
$suffix .= ".order$order" if $icount > 1;
push @ILM_NAME,"$processed_lm$suffix";
}
else {
push @ILM_NAME,"$processed_lm.".($FACTOR?"":".$$FACTOR[$factor]").".order$order";
}
}
}
return @ILM_NAME;
}
sub get_interpolated_lm_sets {
my %ILM_SETS;
my @LM_SETS = &get_sets("LM");
my %OUTPUT_FACTORS;
%OUTPUT_FACTORS = &get_factor_id("output") if &backoff_and_get("TRAINING:output-factors");
my $count=0;
my $icount=0;
foreach my $set (@LM_SETS) {
my $order = &check_backoff_and_get("LM:$set:order");
my $factor = 0;
if (&backoff_and_get("TRAINING:output-factors") &&
&backoff_and_get("LM:$set:factors")) {
$factor = $OUTPUT_FACTORS{&backoff_and_get("LM:$set:factors")};
}
push @{$ILM_SETS{$factor}{$order}}, ($count++)." ".$set;
$icount++ if scalar(@{$ILM_SETS{$factor}{$order}}) == 2;
}
return ($icount,\%ILM_SETS);
}
sub get_training_setting { sub get_training_setting {
my ($step) = @_; my ($step) = @_;
my $dir = &check_and_get("GENERAL:working-dir"); my $dir = &check_and_get("GENERAL:working-dir");
@ -1888,6 +2004,7 @@ sub get_training_setting {
my $source_syntax = &get("GENERAL:input-parser"); my $source_syntax = &get("GENERAL:input-parser");
my $target_syntax = &get("GENERAL:output-parser"); my $target_syntax = &get("GENERAL:output-parser");
my $score_settings = &get("TRAINING:score-settings"); my $score_settings = &get("TRAINING:score-settings");
my $parallel = &get("TRAINING:parallel");
my $xml = $source_syntax || $target_syntax; my $xml = $source_syntax || $target_syntax;
@ -1909,6 +2026,7 @@ sub get_training_setting {
$cmd .= "-source-syntax " if $source_syntax; $cmd .= "-source-syntax " if $source_syntax;
$cmd .= "-glue-grammar " if $hierarchical; $cmd .= "-glue-grammar " if $hierarchical;
$cmd .= "-score-options '".$score_settings."' " if $score_settings; $cmd .= "-score-options '".$score_settings."' " if $score_settings;
$cmd .= "-parallel " if $parallel;
# factored training # factored training
if (&backoff_and_get("TRAINING:input-factors")) { if (&backoff_and_get("TRAINING:input-factors")) {
@ -2267,6 +2385,7 @@ sub get_output_and_input {
my $output = &get_default_file(&deconstruct_name($step)); my $output = &get_default_file(&deconstruct_name($step));
my @INPUT; my @INPUT;
if (defined($USES_INPUT{$step_id})) {
for(my $i=0; $i<scalar @{$USES_INPUT{$step_id}}; $i++) { for(my $i=0; $i<scalar @{$USES_INPUT{$step_id}}; $i++) {
# get name of input file needed # get name of input file needed
my $in_file = $USES_INPUT{$step_id}[$i]; my $in_file = $USES_INPUT{$step_id}[$i];
@ -2298,6 +2417,7 @@ sub get_output_and_input {
push @INPUT,&get_specified_or_default_file(&deconstruct_name($in_file), push @INPUT,&get_specified_or_default_file(&deconstruct_name($in_file),
&deconstruct_name($prev_step)); &deconstruct_name($prev_step));
} }
}
return ($output,@INPUT); return ($output,@INPUT);
} }
@ -2397,6 +2517,9 @@ sub define_template {
} }
# input is defined as IN or IN0, IN1, IN2 # input is defined as IN or IN0, IN1, IN2
else { else {
if ($cmd =~ /([^ANS])IN/ && scalar(@INPUT) == 0) {
die("ERROR: Step $step requires input from prior steps, but none defined.");
}
$cmd =~ s/([^ANS])IN(\d+)/$1$INPUT[$2]/g; # a bit trickier to $cmd =~ s/([^ANS])IN(\d+)/$1$INPUT[$2]/g; # a bit trickier to
$cmd =~ s/([^ANS])IN/$1$INPUT[0]/g; # avoid matching TRAINING, RECASING $cmd =~ s/([^ANS])IN/$1$INPUT[0]/g; # avoid matching TRAINING, RECASING
$cmd =~ s/^IN(\d+)/$INPUT[$2]/g; $cmd =~ s/^IN(\d+)/$INPUT[$2]/g;

View File

@ -12,13 +12,14 @@ binmode(STDERR, ":utf8");
my $SRILM = "/home/pkoehn/moses/srilm/bin/i686-m64"; my $SRILM = "/home/pkoehn/moses/srilm/bin/i686-m64";
my $TEMPDIR = "/tmp"; my $TEMPDIR = "/tmp";
my ($TUNING,$LM,$NAME); my ($TUNING,$LM,$NAME,$GROUP);
die("interpolate-lm.perl --tuning set --name out-lm --lm lm1,lm2,lm3 [--srilm srtilm-dir --tempdir tempdir]") die("interpolate-lm.perl --tuning set --name out-lm --lm lm0,lm1,lm2,lm3 [--srilm srilm-dir --tempdir tempdir --group \"0,1 2,3\"]")
unless &GetOptions('tuning=s' => => \$TUNING, unless &GetOptions('tuning=s' => => \$TUNING,
'name=s' => \$NAME, 'name=s' => \$NAME,
'srilm=s' => \$SRILM, 'srilm=s' => \$SRILM,
'tempdir=s' => \$TEMPDIR, 'tempdir=s' => \$TEMPDIR,
'group=s' => \$GROUP,
'lm=s' => \$LM); 'lm=s' => \$LM);
# check and set default to unset parameters # check and set default to unset parameters
@ -52,6 +53,66 @@ foreach my $lm (@LM) {
} }
print STDERR "language models have order $order.\n"; print STDERR "language models have order $order.\n";
# too many language models? group them first
if (!defined($GROUP) && scalar(@LM) > 10) {
print STDERR "more than 10, automatically grouping language models.\n";
my $num_groups = int(scalar(@LM)/10 + 0.99);
my $size_groups = int(scalar(@LM)/$num_groups + 0.99);
$GROUP = "";
for(my $i=0;$i<$num_groups;$i++) {
$GROUP .= " " unless $i==0;
for(my $j=0;$j<$size_groups;$j++) {
my $lm_i = $i*$size_groups+$j;
next if $lm_i >= scalar(@LM);
$GROUP .= "," unless $j==0;
$GROUP .= $lm_i;
}
}
print STDERR "groups: $GROUP\n";
}
# normal interpolation
if (!defined($GROUP)) {
&interpolate($NAME,@LM);
exit;
}
# group language models into sub-interpolated models
my %ALREADY;
my $g = 0;
my @SUB_NAME;
foreach my $subgroup (split(/ /,$GROUP)) {
my @SUB_LM;
foreach my $lm_i (split(/,/,$subgroup)) {
die("ERROR: LM id $lm_i in group definition out of range") if $lm_i >= scalar(@LM);
push @SUB_LM,$LM[$lm_i];
$ALREADY{$lm_i} = 1;
}
#if (scalar @SUB_NAME == 0 && scalar keys %ALREADY == scalar @LM) {
# print STDERR "WARNING: grouped all language models into one, perform normal interpolation\n";
# &interpolate($NAME,@LM);
# exit;
#}
my $name = $NAME.".group-".chr(97+($g++));
push @SUB_NAME,$name;
print STDERR "\n=== BUILDING SUB LM $name from\n\t".join("\n\t",@SUB_LM)."\n===\n\n";
&interpolate($name, @SUB_LM);
}
for(my $lm_i=0; $lm_i < scalar(@LM); $lm_i++) {
next if defined($ALREADY{$lm_i});
push @SUB_NAME, $LM[$lm_i];
}
print STDERR "\n=== BUILDING FINAL LM ===\n\n";
&interpolate($NAME, @SUB_NAME);
# main interpolation function
sub interpolate {
my ($name,@LM) = @_;
die("cannot interpolate more than 10 language models at once.")
if scalar(@LM) > 10;
my $tmp = tempdir(DIR=>$TEMPDIR); my $tmp = tempdir(DIR=>$TEMPDIR);
# compute perplexity # compute perplexity
@ -76,10 +137,10 @@ my $mix = $mixout;
$mix =~ /best lambda \(([\d\. ]+)\)/ || die("ERROR: computing lambdas failed: $mix"); $mix =~ /best lambda \(([\d\. ]+)\)/ || die("ERROR: computing lambdas failed: $mix");
my @LAMBDA = split(/ /,$1); my @LAMBDA = split(/ /,$1);
# create new language models # create new language model
print STDERR "creating new language model...\n"; print STDERR "creating new language model...\n";
$i = 0; $i = 0;
$cmd = "$SRILM/ngram -unk -order $order -write-lm $NAME"; $cmd = "$SRILM/ngram -unk -order $order -write-lm $name";
foreach my $lm (@LM) { foreach my $lm (@LM) {
$cmd .= " -lm " if $i==0; $cmd .= " -lm " if $i==0;
$cmd .= " -mix-lm " if $i==1; $cmd .= " -mix-lm " if $i==1;
@ -94,7 +155,7 @@ safesystem($cmd) or die "Failed.";
rmtree($tmp); # remove the temp dir rmtree($tmp); # remove the temp dir
print STDERR "done.\n"; print STDERR "done.\n";
}
sub safesystem { sub safesystem {
print STDERR "Executing: @_\n"; print STDERR "Executing: @_\n";

View File

@ -13,6 +13,7 @@ $TYPE{"nist-bleu-c"} = "BLEU-c";
$TYPE{"multi-bleu-c"}= "BLEU-c"; $TYPE{"multi-bleu-c"}= "BLEU-c";
$TYPE{"ibm-bleu"} = "IBM"; $TYPE{"ibm-bleu"} = "IBM";
$TYPE{"ibm-bleu-c"} = "IBM-c"; $TYPE{"ibm-bleu-c"} = "IBM-c";
$TYPE{"meteor"} = "METEOR";
my %SCORE; my %SCORE;
my %AVERAGE; my %AVERAGE;
@ -56,6 +57,9 @@ sub process {
elsif ($type eq 'multi-bleu' || $type eq 'multi-bleu-c') { elsif ($type eq 'multi-bleu' || $type eq 'multi-bleu-c') {
$SCORE{$set} .= &extract_multi_bleu($file,$type)." "; $SCORE{$set} .= &extract_multi_bleu($file,$type)." ";
} }
elsif ($type eq 'meteor') {
$SCORE{$set} .= &extract_meteor($file,$type)." ";
}
} }
sub extract_nist_bleu { sub extract_nist_bleu {
@ -110,3 +114,18 @@ sub extract_multi_bleu {
return $output.$TYPE{$type}; return $output.$TYPE{$type};
} }
sub extract_meteor {
my ($file,$type) = @_;
my ($meteor, $precision);
foreach (`cat $file`) {
$meteor = $1*100 if /Final score:\s*(\S+)/;
$precision = $1 if /Precision:\s*(\S+)/;
}
my $output = sprintf("%.02f ",$meteor);
$output .= sprintf("(%.03f) ",$precision) if $precision;
$AVERAGE{"meteor"} += $meteor;
return $output.$TYPE{$type};
}

View File

@ -16,7 +16,7 @@ while(<WEIGHT>) {
if (/^\[weight\-(\S+)\]/) { if (/^\[weight\-(\S+)\]/) {
$current_weight = $1; $current_weight = $1;
} }
elsif ($current_weight && /^([\-\d\.]+)([Ee][+-]?[\d]+)?$/) { elsif ($current_weight && /^(([\-\d\.]+)([Ee][+-]?[\d]+)?)$/) {
push @{$WEIGHT{$current_weight}},$1; push @{$WEIGHT{$current_weight}},$1;
} }
elsif (/^\[/) { elsif (/^\[/) {

View File

@ -282,9 +282,8 @@ function output_score($id,$info) {
$each_score = explode(" ; ",$score); $each_score = explode(" ; ",$score);
for($i=0;$i<count($each_score);$i++) { for($i=0;$i<count($each_score);$i++) {
if (preg_match('/([\d\(\)\.\s]+) (BLEU[\-c]*)/',$each_score[$i],$match) || if (preg_match('/([\d\(\)\.\s]+) (\S*)/',$each_score[$i],$match)) {
preg_match('/([\d\(\)\.\s]+) (IBM[\-c]*)/',$each_score[$i],$match)) { //if ($i>0) { print "&nbsp;"; }
if ($i>0) { print "<BR>"; }
$opened_a_tag = 0; $opened_a_tag = 0;
if ($set != "avg") { if ($set != "avg") {
if (file_exists("$dir/evaluation/$set.cleaned.$id")) { if (file_exists("$dir/evaluation/$set.cleaned.$id")) {
@ -297,7 +296,7 @@ function output_score($id,$info) {
} }
} }
if ($set == "avg" && count($each_score)>1) { print $match[2].": "; } if ($set == "avg" && count($each_score)>1) { print $match[2].": "; }
print $match[1]; print "<div title=". $match[2] ." class=". $match[2] .">".$match[1]."</div>";
if ($opened_a_tag) { print "</a>"; } if ($opened_a_tag) { print "</a>"; }
} }
else { else {

View File

@ -553,6 +553,7 @@ sub bleu_score {
my $score = 0; my $score = 0;
my $iscore = 0; my $iscore = 0;
my $len_score = min (0, 1-$shortest_ref_length/$tst_ngrams->[1]); my $len_score = min (0, 1-$shortest_ref_length/$tst_ngrams->[1]);
print "length ratio: ".($tst_ngrams->[1]/$shortest_ref_length)." ($tst_ngrams->[1]/$shortest_ref_length), penalty (log): $len_score\n";
for (my $j=1; $j<=$max_Ngram; $j++) { for (my $j=1; $j<=$max_Ngram; $j++) {
if ($matching_ngrams->[$j] == 0) { if ($matching_ngrams->[$j] == 0) {

1168
scripts/generic/mteval-v13a.pl Executable file

File diff suppressed because it is too large Load Diff

View File

@ -3,9 +3,15 @@
# $Id$ # $Id$
use strict; use strict;
my $lowercase = 0;
if ($ARGV[0] eq "-lc") {
$lowercase = 1;
shift;
}
my $stem = $ARGV[0]; my $stem = $ARGV[0];
if (!defined $stem) { if (!defined $stem) {
print STDERR "usage: multi-bleu.pl reference < hypothesis\n"; print STDERR "usage: multi-bleu.pl [-lc] reference < hypothesis\n";
print STDERR "Reads the references from reference or reference0, reference1, ...\n"; print STDERR "Reads the references from reference or reference0, reference1, ...\n";
exit(1); exit(1);
} }
@ -35,12 +41,14 @@ my(@CORRECT,@TOTAL,$length_translation,$length_reference);
my $s=0; my $s=0;
while(<STDIN>) { while(<STDIN>) {
chop; chop;
$_ = lc if $lowercase;
my @WORD = split; my @WORD = split;
my %REF_NGRAM = (); my %REF_NGRAM = ();
my $length_translation_this_sentence = scalar(@WORD); my $length_translation_this_sentence = scalar(@WORD);
my ($closest_diff,$closest_length) = (9999,9999); my ($closest_diff,$closest_length) = (9999,9999);
foreach my $reference (@{$REF[$s]}) { foreach my $reference (@{$REF[$s]}) {
# print "$s $_ <=> $reference\n"; # print "$s $_ <=> $reference\n";
$reference = lc($reference) if $lowercase;
my @WORD = split(/ /,$reference); my @WORD = split(/ /,$reference);
my $length = scalar(@WORD); my $length = scalar(@WORD);
my $diff = abs($length_translation_this_sentence-$length); my $diff = abs($length_translation_this_sentence-$length);

View File

@ -17,34 +17,42 @@ use Getopt::Long;
my $order; my $order;
my $corpusPath; my $corpusPath;
my $lmPath; my $lmPath;
my $cores; my $cores = 2;
my $irstPath; my $irstPath;
my $tempPath = "tmp";
GetOptions("order=s" => \$order, GetOptions("order=s" => \$order,
"text=s" => \$corpusPath, "text=s" => \$corpusPath,
"lm=s" => \$lmPath, "lm=s" => \$lmPath,
"cores=s" => \$cores, "cores=s" => \$cores,
"irst-dir=s" => \$irstPath, "irst-dir=s" => \$irstPath,
"temp-dir=s" => \$tempPath
) or exit 1; ) or exit 1;
die("ERROR: please set order") unless defined($order);
die("ERROR: please set text") unless defined($corpusPath);
die("ERROR: please set lm") unless defined($lmPath);
die("ERROR: please set irst-dir") unless defined($irstPath);
my $ext = ($corpusPath =~ m/([^.]+)$/)[0]; my $ext = ($corpusPath =~ m/([^.]+)$/)[0];
print "extension is $ext\n"; print "extension is $ext\n";
mkdir 'temp'; $tempPath .= "/irstlm-build-tmp.$$";
`mkdir -p $tempPath`;
my $cmd; my $cmd;
if ($ext eq "gz") if ($ext eq "gz")
{ {
$cmd = "zcat $corpusPath | $irstPath/bin/add-start-end.sh | gzip -c > temp/monolingual.setagged.gz"; $cmd = "zcat $corpusPath | $irstPath/add-start-end.sh | gzip -c > $tempPath/monolingual.setagged.gz";
} }
else else
{ {
$cmd = "cat $corpusPath | $irstPath/bin/add-start-end.sh | gzip -c > temp/monolingual.setagged.gz"; $cmd = "cat $corpusPath | $irstPath/add-start-end.sh | gzip -c > $tempPath/monolingual.setagged.gz";
} }
print STDERR "EXECUTING $cmd\n"; print STDERR "EXECUTING $cmd\n";
`$cmd`; `$cmd`;
$cmd = "IRSTLM=$irstPath $irstPath/bin/build-lm.sh -t stat4 -i \"gunzip -c temp/monolingual.setagged.gz\" -n $order -p -o temp/iarpa.gz -k $cores"; $cmd = "IRSTLM=$irstPath/.. $irstPath/build-lm.sh -t $tempPath/stat4 -i \"gunzip -c $tempPath/monolingual.setagged.gz\" -n $order -p -o $tempPath/iarpa.gz -k $cores";
print STDERR "EXECUTING $cmd\n"; print STDERR "EXECUTING $cmd\n";
`$cmd`; `$cmd`;
@ -53,17 +61,17 @@ print "extension is $ext\n";
if ($ext eq "gz") if ($ext eq "gz")
{ {
$cmd = "$irstPath/bin/compile-lm temp/iarpa.gz --text yes /dev/stdout | gzip -c > $lmPath"; $cmd = "$irstPath/compile-lm $tempPath/iarpa.gz --text yes /dev/stdout | gzip -c > $lmPath";
} }
else else
{ {
$cmd = "$irstPath/bin/compile-lm temp/iarpa.gz --text yes $lmPath"; $cmd = "$irstPath/compile-lm $tempPath/iarpa.gz --text yes $lmPath";
} }
print STDERR "EXECUTING $cmd\n"; print STDERR "EXECUTING $cmd\n";
`$cmd`; `$cmd`;
$cmd = "rm -rf temp stat4"; $cmd = "rm -rf $tempPath";
print STDERR "EXECUTING $cmd\n"; print STDERR "EXECUTING $cmd\n";
`$cmd`; `$cmd`;

View File

@ -425,7 +425,7 @@ Model* Model::createModel(ModelScore* modelscore, const string& config, const st
void Model::createSmoothing(double w) void Model::createSmoothing(double w)
{ {
scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_prev); scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_prev);
scorer->createSmoothing(modelscore->get_scores_fe_prev(), w, smoothing_next); scorer->createSmoothing(modelscore->get_scores_fe_next(), w, smoothing_next);
} }
void Model::createConstSmoothing(double w) void Model::createConstSmoothing(double w)

View File

@ -10,6 +10,7 @@
# Excerpts from revision history # Excerpts from revision history
# Dec 2011 update the script for the mert-moses.pl compatibility
# Sept 2011 multi-threaded mert (Barry Haddow) # Sept 2011 multi-threaded mert (Barry Haddow)
# 3 Aug 2011 Added random directions, historic best, pairwise ranked (PK) # 3 Aug 2011 Added random directions, historic best, pairwise ranked (PK)
# Jul 2011 simplifications (Ondrej Bojar) # Jul 2011 simplifications (Ondrej Bojar)
@ -47,9 +48,13 @@
# 13 Oct 2004 Use alternative decoders (DWC) # 13 Oct 2004 Use alternative decoders (DWC)
# Original version by Philipp Koehn # Original version by Philipp Koehn
use strict;
use FindBin qw($Bin); use FindBin qw($Bin);
use File::Basename; use File::Basename;
use File::Path; use File::Path;
use File::Spec;
use Cwd;
my $SCRIPTS_ROOTDIR = $Bin; my $SCRIPTS_ROOTDIR = $Bin;
$SCRIPTS_ROOTDIR =~ s/\/training$//; $SCRIPTS_ROOTDIR =~ s/\/training$//;
$SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"}); $SCRIPTS_ROOTDIR = $ENV{"SCRIPTS_ROOTDIR"} if defined($ENV{"SCRIPTS_ROOTDIR"});
@ -82,12 +87,16 @@ my $minimum_required_change_in_weights = 0.00001;
my $verbose = 0; my $verbose = 0;
my $usage = 0; # request for --help my $usage = 0; # request for --help
my $___WORKING_DIR = "mert-work";
# We assume that if you don't specify working directory,
# we set the default is set to `pwd`/mert-work
my $___WORKING_DIR = File::Spec->catfile(Cwd::getcwd(), "mert-work");
my $___DEV_F = undef; # required, input text to decode my $___DEV_F = undef; # required, input text to decode
my $___DEV_E = undef; # required, basename of files with references my $___DEV_E = undef; # required, basename of files with references
my $___DECODER = undef; # required, pathname to the decoder executable my $___DECODER = undef; # required, pathname to the decoder executable
my $___CONFIG = undef; # required, pathname to startup ini file my $___CONFIG = undef; # required, pathname to startup ini file
my $___N_BEST_LIST_SIZE = 100; my $___N_BEST_LIST_SIZE = 100;
my $___LATTICE_SAMPLES = 0;
my $queue_flags = "-hard"; # extra parameters for parallelizer my $queue_flags = "-hard"; # extra parameters for parallelizer
# the -l ws0ssmt was relevant only to JHU 2006 workshop # the -l ws0ssmt was relevant only to JHU 2006 workshop
my $___JOBS = undef; # if parallel, number of jobs to use (undef or 0 -> serial) my $___JOBS = undef; # if parallel, number of jobs to use (undef or 0 -> serial)
@ -133,7 +142,6 @@ my $filtercmd = undef; # path to filter-model-given-input.pl
my $filterfile = undef; my $filterfile = undef;
my $qsubwrapper = undef; my $qsubwrapper = undef;
my $moses_parallel_cmd = undef; my $moses_parallel_cmd = undef;
my $scorer_config = "BLEU:1";
my $old_sge = 0; # assume sge<6.0 my $old_sge = 0; # assume sge<6.0
my $___CONFIG_ORIG = undef; # pathname to startup ini file before filtering my $___CONFIG_ORIG = undef; # pathname to startup ini file before filtering
my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on my $___ACTIVATE_FEATURES = undef; # comma-separated (or blank-separated) list of features to work on
@ -146,8 +154,8 @@ my $prev_aggregate_nbl_size = -1; # number of previous step to consider when loa
# 1 means 1 previous data , i.e. from the actual iteration and from the previous one # 1 means 1 previous data , i.e. from the actual iteration and from the previous one
# and so on # and so on
my $maximum_iterations = 25; my $maximum_iterations = 25;
my $scorer_config = undef ;
use strict;
use Getopt::Long; use Getopt::Long;
GetOptions( GetOptions(
"working-dir=s" => \$___WORKING_DIR, "working-dir=s" => \$___WORKING_DIR,
@ -157,6 +165,7 @@ GetOptions(
"decoder=s" => \$___DECODER, "decoder=s" => \$___DECODER,
"config=s" => \$___CONFIG, "config=s" => \$___CONFIG,
"nbest=i" => \$___N_BEST_LIST_SIZE, "nbest=i" => \$___N_BEST_LIST_SIZE,
"lattice-samples=i" => \$___LATTICE_SAMPLES,
"queue-flags=s" => \$queue_flags, "queue-flags=s" => \$queue_flags,
"jobs=i" => \$___JOBS, "jobs=i" => \$___JOBS,
"decoder-flags=s" => \$___DECODER_FLAGS, "decoder-flags=s" => \$___DECODER_FLAGS,
@ -191,8 +200,8 @@ GetOptions(
"pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER, "pairwise-ranked" => \$___PAIRWISE_RANKED_OPTIMIZER,
"pro-starting-point" => \$___PRO_STARTING_POINT, "pro-starting-point" => \$___PRO_STARTING_POINT,
"historic-interpolation=f" => \$___HISTORIC_INTERPOLATION, "historic-interpolation=f" => \$___HISTORIC_INTERPOLATION,
"threads=i" => \$__THREADS, "sc-config=s" => \$scorer_config,
"sc-config=s" => \$scorer_config "threads=i" => \$__THREADS
) or exit(1); ) or exit(1);
# the 4 required parameters can be supplied on the command line directly # the 4 required parameters can be supplied on the command line directly
@ -210,6 +219,7 @@ if ($usage || !defined $___DEV_F || !defined $___DEV_E || !defined $___DECODER |
Options: Options:
--working-dir=mert-dir ... where all the files are created --working-dir=mert-dir ... where all the files are created
--nbest=100 ... how big nbestlist to generate --nbest=100 ... how big nbestlist to generate
--lattice-samples ... how many lattice samples (Chatterjee & Cancedda, emnlp 2010)
--jobs=N ... set this to anything to run moses in parallel --jobs=N ... set this to anything to run moses in parallel
--mosesparallelcmd=STR ... use a different script instead of moses-parallel --mosesparallelcmd=STR ... use a different script instead of moses-parallel
--queue-flags=STRING ... anything you with to pass to qsub, eg. --queue-flags=STRING ... anything you with to pass to qsub, eg.
@ -276,7 +286,7 @@ Options:
--threads=NUMBER ... Use multi-threaded mert (must be compiled in). --threads=NUMBER ... Use multi-threaded mert (must be compiled in).
--historic-interpolation ... Interpolate optimized weights with prior iterations' weight --historic-interpolation ... Interpolate optimized weights with prior iterations' weight
(parameter sets factor [0;1] given to current weights) (parameter sets factor [0;1] given to current weights)
--sc-config=STRING ... extra option to specify multiscoring. --sc-config=\"METRIC1:WEIGHT1,METRIC2:WEIGHT2\" ... extra option to specify tuning with multiple metrics.
"; ";
exit 1; exit 1;
} }
@ -284,7 +294,6 @@ Options:
# Check validity of input parameters and set defaults if needed # Check validity of input parameters and set defaults if needed
print STDERR "Using WORKING_DIR: $___WORKING_DIR\n";
print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n"; print STDERR "Using SCRIPTS_ROOTDIR: $SCRIPTS_ROOTDIR\n";
# path of script for filtering phrase tables and running the decoder # path of script for filtering phrase tables and running the decoder
@ -308,9 +317,11 @@ if (!defined $mertdir) {
my $mert_extract_cmd = "$mertdir/extractor"; my $mert_extract_cmd = "$mertdir/extractor";
my $mert_mert_cmd = "$mertdir/mert"; my $mert_mert_cmd = "$mertdir/mert";
my $mert_pro_cmd = "$mertdir/pro";
die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd; die "Not executable: $mert_extract_cmd" if ! -x $mert_extract_cmd;
die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd; die "Not executable: $mert_mert_cmd" if ! -x $mert_mert_cmd;
die "Not executable: $mert_pro_cmd" if ! -x $mert_pro_cmd;
my $pro_optimizer = "$mertdir/megam_i686.opt"; # or set to your installation my $pro_optimizer = "$mertdir/megam_i686.opt"; # or set to your installation
if (($___PAIRWISE_RANKED_OPTIMIZER || $___PRO_STARTING_POINT) && ! -x $pro_optimizer) { if (($___PAIRWISE_RANKED_OPTIMIZER || $___PRO_STARTING_POINT) && ! -x $pro_optimizer) {
@ -610,6 +621,8 @@ my $oldallsorted = undef;
my $allsorted = undef; my $allsorted = undef;
my $nbest_file=undef; my $nbest_file=undef;
my $lsamp_file=undef; #Lattice samples
my $orig_nbest_file=undef; # replaced if lattice sampling
while(1) { while(1) {
$run++; $run++;
@ -629,8 +642,20 @@ while(1) {
# skip running the decoder if the user wanted # skip running the decoder if the user wanted
if (!$skip_decoder) { if (!$skip_decoder) {
print "($run) run decoder to produce n-best lists\n"; print "($run) run decoder to produce n-best lists\n";
$nbest_file = run_decoder($featlist, $run, $need_to_normalize); ($nbest_file,$lsamp_file) = run_decoder($featlist, $run, $need_to_normalize);
$need_to_normalize = 0; $need_to_normalize = 0;
if ($___LATTICE_SAMPLES) {
my $combined_file = "$nbest_file.comb";
safesystem("sort -k1,1n $nbest_file $lsamp_file > $combined_file") or
die("failed to merge nbest and lattice samples");
safesystem("gzip -f $nbest_file; gzip -f $lsamp_file") or
die "Failed to gzip nbests and lattice samples";
$orig_nbest_file = "$nbest_file.gz";
$orig_nbest_file = "$nbest_file.gz";
$lsamp_file = "$lsamp_file.gz";
$lsamp_file = "$lsamp_file.gz";
$nbest_file = "$combined_file";
}
safesystem("gzip -f $nbest_file") or die "Failed to gzip run*out"; safesystem("gzip -f $nbest_file") or die "Failed to gzip run*out";
$nbest_file = $nbest_file.".gz"; $nbest_file = $nbest_file.".gz";
} }
@ -648,9 +673,12 @@ while(1) {
my $base_score_file = "scores.dat"; my $base_score_file = "scores.dat";
my $feature_file = "run$run.${base_feature_file}"; my $feature_file = "run$run.${base_feature_file}";
my $score_file = "run$run.${base_score_file}"; my $score_file = "run$run.${base_score_file}";
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
my $cmd = ""; my $cmd = "";
if (defined($scorer_config))
{
#process the mulitple metric way
print STDERR "-- process the mulitple metric way --\n";
my $scorer_name; my $scorer_name;
my $scorer_weight; my $scorer_weight;
$scorer_config=~s/ //g; $scorer_config=~s/ //g;
@ -659,108 +687,153 @@ while(1) {
my $scorer_config_spec; my $scorer_config_spec;
foreach $scorer_config_spec(@lists_scorer_config) foreach $scorer_config_spec(@lists_scorer_config)
{ {
# print STDERR $scorer_config_spec."\n";
my @lists_scorer_config_spec=split(":",$scorer_config_spec); my @lists_scorer_config_spec=split(":",$scorer_config_spec);
$scorer_name=$lists_scorer_config_spec[0]; $scorer_name=$lists_scorer_config_spec[0];
$scorer_weight=$lists_scorer_config_spec[1]; $scorer_weight=$lists_scorer_config_spec[1];
# print STDERR $scorer_name."\n";
# print STDERR $scorer_weight."\n";
$cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file.$scorer_name --ffile $feature_file.$scorer_name --sctype $scorer_name -r ".join(",", @references)." -n $nbest_file"; $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file.$scorer_name --ffile $feature_file.$scorer_name --sctype $scorer_name -r ".join(",", @references)." -n $nbest_file";
# print STDERR "LANCEMENT $scorer_name ********************************************\n";
&submit_or_exec($cmd,"extract.out.$scorer_name","extract.err.$scorer_name"); &submit_or_exec($cmd,"extract.out.$scorer_name","extract.err.$scorer_name");
# print STDERR "FIN $scorer_name ************************************************** \n";
# print STDERR "executing $cmd\n";
# print STDERR "\n";
# safesystem("date");
# print STDERR "\n";
# if (defined $___JOBS) {
# safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=extract.out.$scorer_name -stderr=extract.err.$scorer_name" )
# or die "$scorer_name Failed to submit extraction to queue (via $qsubwrapper)";
# } else {
# safesystem("$cmd > extract.out.$scorer_name 2> extract.err.$scorer_name") or die "$scorer_name Failed to do extraction of statistics.";
# }
# print FILE "$scorer_name $scorer_weight $score_file.$scorer_name $feature_file.$scorer_name\n";
} }
# print STDERR "CREATION INI\n";
my @scorer_content; my @scorer_content;
my $fileIncrement=0; my $fileIncrement=0;
open(FILE,">merge.init") || die ("File creation ERROR : merge.init"); open(FILE,">merge.init") || die ("File creation ERROR : merge.init");
my $minFileName="";
my $minFileSize;
my %scoreFileContent;
my %featureFileContent;
my $firstContent;
foreach $scorer_config_spec(@lists_scorer_config) foreach $scorer_config_spec(@lists_scorer_config)
{ {
my @lists_scorer_config_spec=split(":",$scorer_config_spec); my @lists_scorer_config_spec=split(":",$scorer_config_spec);
$scorer_name=$lists_scorer_config_spec[0]; $scorer_name=$lists_scorer_config_spec[0];
$scorer_weight=$lists_scorer_config_spec[1]; $scorer_weight=$lists_scorer_config_spec[1];
print FILE "$scorer_name $scorer_weight $score_file.$scorer_name $feature_file.$scorer_name\n"; print FILE "$scorer_name $scorer_weight $score_file.$scorer_name $feature_file.$scorer_name\n";
my @tmp_content=`/bin/cat $score_file.$scorer_name`; my @tmp_scoreContent=`/bin/cat $score_file.$scorer_name`;
$scorer_content[$fileIncrement] = [ @tmp_content ]; my @tmp_featContent=`/bin/cat $feature_file.$scorer_name`;
my $localIncrementFileContent=0;
my $fileContentInfo=0;
my $localIncrementInfo=0;
for ($localIncrementFileContent=0; $localIncrementFileContent<scalar(@tmp_scoreContent); $localIncrementFileContent++)
{
if (rindex($tmp_scoreContent[$localIncrementFileContent],"BEGIN")>-1)
{
my @split_local=split(" ",$tmp_scoreContent[$localIncrementFileContent]);
$fileContentInfo=$split_local[1];
$localIncrementInfo=0;
}
chomp($tmp_scoreContent[$localIncrementFileContent]);
chomp($tmp_featContent[$localIncrementFileContent]);
$scoreFileContent{$fileIncrement}{$fileContentInfo}{$localIncrementInfo}=$tmp_scoreContent[$localIncrementFileContent];
$featureFileContent{$fileIncrement}{$fileContentInfo}{$localIncrementInfo}=$tmp_featContent[$localIncrementFileContent];
$localIncrementInfo++;
}
if ($fileIncrement==0) if ($fileIncrement==0)
{ {
`/bin/cp $feature_file.$scorer_name $feature_file`; $minFileSize=$localIncrementFileContent;
$minFileName=$scorer_name;
}
else
{
if ($minFileSize>$localIncrementFileContent)
{
$minFileSize=$localIncrementFileContent;
$minFileName=$scorer_name;
}
} }
$fileIncrement++; $fileIncrement++;
} }
close(FILE); close(FILE);
# print STDERR "\n";
# safesystem("date");
# print STDERR "\n";
# print STDERR "ON VA RASSEMBLER dans $score_file\n";
open(SCOREFILE,">$score_file") || die ("File creation ERROR : $score_file"); open(SCOREFILE,">$score_file") || die ("File creation ERROR : $score_file");
open(FEATUREFILE,">$feature_file") || die ("File creation ERROR : $feature_file");
my $newFileIncrement=0; my $newFileIncrement=0;
my $contentIncrement=0; my $contentIncrement=0;
my $contentSize=scalar(@{$scorer_content[0]}); my @nbestSize;
# print STDERR "TAILLE : ".$contentSize."|".$fileIncrement."\n"; my $contentSize;
while ($contentIncrement< $contentSize) my $lineScore="";
my $lineFeature="";
my $minSize;
my $localContentIncrement=0;
my @localContentSizeSize;
my $scoreFileName;
my $notFinished=1;
my $scoreName=$minFileName;
my $minInfoSize=-1;
$fileIncrement=0;
while (defined($scoreFileContent{$fileIncrement}{$contentIncrement}))
{ {
my $line=""; if ($localContentIncrement==0)
$newFileIncrement=0;
while($newFileIncrement< $fileIncrement)
{ {
if (rindex($scorer_content[$newFileIncrement][$contentIncrement],"BEGIN")<0) foreach $fileIncrement(sort keys %scoreFileContent)
{ {
$line=$line." ".$scorer_content[$newFileIncrement][$contentIncrement]; # process the score file
chomp($line); my @tmp_split=split(" ",$scoreFileContent{$fileIncrement}{$contentIncrement}{$localContentIncrement});
if ($minInfoSize==-1)
{
$minInfoSize=$tmp_split[2];
}
elsif ($minInfoSize>$tmp_split[2])
{
$minInfoSize=$tmp_split[2];
}
my @split_line=split(" ",$lineScore);
if (scalar(@split_line)>0)
{
$tmp_split[3]=$split_line[3]+$tmp_split[3];
}
$lineScore=$tmp_split[0]." ".$contentIncrement." ".$minInfoSize." ".$tmp_split[3]." MERGE";
# process the feature file
@tmp_split=split(" ",$featureFileContent{$fileIncrement}{$contentIncrement}{$localContentIncrement});
$lineFeature=$tmp_split[0]." ".$contentIncrement." ".$minInfoSize." ".$tmp_split[3]." MERGE";
}
$localContentIncrement++;
} }
else else
{ {
my @split_line_input=split(" ",$scorer_content[$newFileIncrement][$contentIncrement]); LOOP_CONTENT: foreach $scoreName(sort keys %scoreFileContent)
my @split_line=split(" ",$line);
if (scalar(@split_line)>0)
{ {
$split_line_input[3]=$split_line[3]+$split_line_input[3]; if ((rindex($scoreFileContent{$fileIncrement}{$contentIncrement}{$localContentIncrement},"END")>-1) || ($minInfoSize < $localContentIncrement))
} {
$line=$split_line_input[0]." ".$split_line_input[1]." ".$split_line_input[2]." ".$split_line_input[3]." MERGE"; $lineScore="SCORES_TXT_END_0";
} $lineFeature="FEATURES_TXT_END_0";
$newFileIncrement++; $localContentIncrement=0;
}
$line=~s/^[ ]+//g;
$line=~s/[ ]+$//g;
$line=~s/[ ]+/ /g;
# print STDERR $line."\n";
print SCOREFILE $line."\n";
$contentIncrement++; $contentIncrement++;
$minInfoSize=-1;
last LOOP_CONTENT;
}
else
{
$lineScore=$lineScore." ".$scoreFileContent{$fileIncrement}{$contentIncrement}{$localContentIncrement};
$lineFeature=$featureFileContent{$fileIncrement}{$contentIncrement}{$localContentIncrement};
}
}
if ($localContentIncrement!=0)
{
$localContentIncrement++;
}
}
$lineScore=~s/^[ ]+//g;
$lineScore=~s/[ ]+$//g;
$lineScore=~s/[ ]+/ /g;
$lineFeature=~s/^[ ]+//g;
$lineFeature=~s/[ ]+$//g;
$lineFeature=~s/[ ]+/ /g;
print SCOREFILE $lineScore."\n";
print FEATUREFILE $lineFeature."\n";
$lineScore="";
$lineFeature="";
} }
close(SCOREFILE); close(SCOREFILE);
# `/bin/cp ` close(FEATUREFILE);
}
# $cmd="$mertdir/mergeWeights -c merge.init -s $score_file -f $feature_file"; else
# print STDERR "executing : $cmd\n"; {
# continue with the classical way
# if (defined $___JOBS) { $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file --ffile $feature_file -r ".join(",", @references)." -n $nbest_file";
# safesystem("$qsubwrapper $pass_old_sge -command='$cmd' -queue-parameter=\"$queue_flags\" -stdout=mergeWeight.out.MERGE -stderr=mergeWeight.err.MERGE" ) $cmd = create_extractor_script($cmd, $___WORKING_DIR);
# or die "MERGE Failed to submit extraction to queue (via $qsubwrapper)"; &submit_or_exec($cmd,"extract.out","extract.err");
# } else { }
# safesystem("$cmd > mergeWeight.out.MERGE 2> mergeWeight.err.MERGE") or die "MERGE Failed to do extraction of statistics.";
# }
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
# my $cmd = "$mert_extract_cmd $mert_extract_args --scfile $score_file --ffile $feature_file -r ".join(",", @references)." -n $nbest_file";
# &submit_or_exec($cmd,"extract.out","extract.err");
# Create the initial weights file for mert: init.opt # Create the initial weights file for mert: init.opt
my @MIN = @{$featlist->{"mins"}}; my @MIN = @{$featlist->{"mins"}};
@ -785,10 +858,12 @@ while(1) {
$cmd = "$mert_mert_cmd -d $DIM $mert_mert_args"; $cmd = "$mert_mert_cmd -d $DIM $mert_mert_args";
my $mert_settings = " -n $___RANDOM_RESTARTS"; my $mert_settings = " -n $___RANDOM_RESTARTS";
my $seed_settings = "";
if ($___PREDICTABLE_SEEDS) { if ($___PREDICTABLE_SEEDS) {
my $seed = $run * 1000; my $seed = $run * 1000;
$mert_settings .= " -r $seed"; $seed_settings .= " -r $seed";
} }
$mert_settings .= $seed_settings;
if ($___RANDOM_DIRECTIONS) { if ($___RANDOM_DIRECTIONS) {
if ($___NUM_RANDOM_DIRECTIONS == 0) { if ($___NUM_RANDOM_DIRECTIONS == 0) {
$mert_settings .= " -m 50"; $mert_settings .= " -m 50";
@ -802,19 +877,25 @@ while(1) {
$mert_settings .= " --threads $__THREADS"; $mert_settings .= " --threads $__THREADS";
} }
my $file_settings = ""; my $ffiles = "";
my $scfiles = "";
if (defined $prev_feature_file) { if (defined $prev_feature_file) {
$file_settings .= " --ffile $prev_feature_file,$feature_file"; $ffiles = "$prev_feature_file,$feature_file";
} }
else{ else{
$file_settings .= " --ffile $feature_file"; $ffiles = "$feature_file";
} }
if (defined $prev_score_file) { if (defined $prev_score_file) {
$file_settings .= " --scfile $prev_score_file,$score_file"; $scfiles = "$prev_score_file,$score_file";
} }
else{ else{
$file_settings .= " --scfile $score_file"; $scfiles = "$score_file";
} }
my $file_settings = " --ffile $ffiles --scfile $scfiles";
my $pro_file_settings = "--ffile " . join( " --ffile ", split(/,/, $ffiles)) .
" --scfile " . join( " --scfile ", split(/,/, $scfiles));
if ($___START_WITH_HISTORIC_BESTS && defined $prev_init_file) { if ($___START_WITH_HISTORIC_BESTS && defined $prev_init_file) {
$file_settings .= " --ifile $prev_init_file,run$run.$weights_in_file"; $file_settings .= " --ifile $prev_init_file,run$run.$weights_in_file";
} }
@ -826,13 +907,13 @@ while(1) {
# pro optimization # pro optimization
if ($___PAIRWISE_RANKED_OPTIMIZER) { if ($___PAIRWISE_RANKED_OPTIMIZER) {
$cmd .= " --pro run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data"; $cmd = "$mert_pro_cmd $seed_settings $pro_file_settings -o run$run.pro.data ; echo 'not used' > $weights_out_file; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data";
&submit_or_exec($cmd,$mert_outfile,$mert_logfile); &submit_or_exec($cmd,$mert_outfile,$mert_logfile);
} }
# first pro, then mert # first pro, then mert
elsif ($___PRO_STARTING_POINT) { elsif ($___PRO_STARTING_POINT) {
# run pro... # run pro...
my $pro_cmd = $cmd." --pro run$run.pro.data ; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data"; my $pro_cmd = "$mert_pro_cmd $seed_settings $pro_file_settings -o run$run.pro.data ; $pro_optimizer -fvals -maxi 30 -nobias binary run$run.pro.data";
&submit_or_exec($pro_cmd,"run$run.pro.out","run$run.pro.err"); &submit_or_exec($pro_cmd,"run$run.pro.out","run$run.pro.err");
# ... get results ... # ... get results ...
my %dummy; my %dummy;
@ -858,7 +939,6 @@ while(1) {
chomp $extractFiles; chomp $extractFiles;
safesystem ("\\cp -f $extractFiles run$run.$extractFiles") or die; safesystem ("\\cp -f $extractFiles run$run.$extractFiles") or die;
} }
# safesystem ("\\cp -f extract.err run$run.extract.err") or die; # safesystem ("\\cp -f extract.err run$run.extract.err") or die;
# safesystem ("\\cp -f extract.out run$run.extract.out") or die; # safesystem ("\\cp -f extract.out run$run.extract.out") or die;
safesystem ("\\cp -f $mert_outfile run$run.$mert_outfile") or die; safesystem ("\\cp -f $mert_outfile run$run.$mert_outfile") or die;
@ -985,7 +1065,7 @@ if (defined $allsorted){ safesystem ("\\rm -f $allsorted") or die; };
safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die; safesystem("\\cp -f $weights_in_file run$run.$weights_in_file") or die;
safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die; safesystem("\\cp -f $mert_logfile run$run.$mert_logfile") or die;
create_config($___CONFIG_ORIG, "./moses.ini", $featlist, $run, $devbleu); create_config($___CONFIG_ORIG, "./moses.ini", $featlist, $run, $devbleu, $sparse_weights_file);
# just to be sure that we have the really last finished step marked # just to be sure that we have the really last finished step marked
open F, "> finished_step.txt" or die "Can't mark finished step"; open F, "> finished_step.txt" or die "Can't mark finished step";
@ -1040,6 +1120,11 @@ sub run_decoder {
my ($featlist, $run, $need_to_normalize) = @_; my ($featlist, $run, $need_to_normalize) = @_;
my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out"; my $filename_template = "run%d.best$___N_BEST_LIST_SIZE.out";
my $filename = sprintf($filename_template, $run); my $filename = sprintf($filename_template, $run);
my $lsamp_filename = undef;
if ($___LATTICE_SAMPLES) {
my $lsamp_filename_template = "run%d.lsamp$___LATTICE_SAMPLES.out";
$lsamp_filename = sprintf($lsamp_filename_template, $run);
}
# user-supplied parameters # user-supplied parameters
print "params = $___DECODER_FLAGS\n"; print "params = $___DECODER_FLAGS\n";
@ -1060,23 +1145,28 @@ sub run_decoder {
$model_weights{$name} .= sprintf " %.6f", $vals[$i]; $model_weights{$name} .= sprintf " %.6f", $vals[$i];
} }
my $decoder_config = join(" ", values %model_weights); my $decoder_config = join(" ", values %model_weights);
$decoder_config .= " -weight-file run$run.sparse-weights" if -e "run$run.sparse-weights";
print STDERR "DECODER_CFG = $decoder_config\n"; print STDERR "DECODER_CFG = $decoder_config\n";
print "decoder_config = $decoder_config\n"; print "decoder_config = $decoder_config\n";
# run the decoder # run the decoder
my $nBest_cmd = "-n-best-size $___N_BEST_LIST_SIZE";
my $decoder_cmd; my $decoder_cmd;
my $lsamp_cmd = "";
if ($___LATTICE_SAMPLES) {
$lsamp_cmd = " -lattice-samples $lsamp_filename $___LATTICE_SAMPLES ";
}
if (defined $___JOBS && $___JOBS > 0) { if (defined $___JOBS && $___JOBS > 0) {
$decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out"; $decoder_cmd = "$moses_parallel_cmd $pass_old_sge -config $___CONFIG -inputtype $___INPUTTYPE -qsub-prefix mert$run -queue-parameters \"$queue_flags\" -decoder-parameters \"$___DECODER_FLAGS $decoder_config\" $lsamp_cmd -n-best-list \"$filename $___N_BEST_LIST_SIZE\" -input-file $___DEV_F -jobs $___JOBS -decoder $___DECODER > run$run.out";
} else { } else {
$decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config -n-best-list $filename $___N_BEST_LIST_SIZE -input-file $___DEV_F > run$run.out"; $decoder_cmd = "$___DECODER $___DECODER_FLAGS -config $___CONFIG -inputtype $___INPUTTYPE $decoder_config $lsamp_cmd -n-best-list $filename $___N_BEST_LIST_SIZE -input-file $___DEV_F > run$run.out";
} }
safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n"; safesystem($decoder_cmd) or die "The decoder died. CONFIG WAS $decoder_config \n";
sanity_check_order_of_lambdas($featlist, $filename); sanity_check_order_of_lambdas($featlist, $filename);
return $filename; return ($filename, $lsamp_filename);
} }
@ -1374,3 +1464,20 @@ sub submit_or_exec {
safesystem("$cmd > $stdout 2> $stderr") or die "ERROR: Failed to run '$cmd'."; safesystem("$cmd > $stdout 2> $stderr") or die "ERROR: Failed to run '$cmd'.";
} }
} }
sub create_extractor_script
{
my ($cmd, $outdir) = @_;
my $script_path = File::Spec->catfile($outdir, "extractor.sh");
open my $out, '>', $script_path
or die "Couldn't open $script_path for writing: $!\n";
print $out "#!/bin/bash\n";
print $out "cd $outdir\n";
print $out "$cmd\n";
close($out);
`chmod +x $script_path`;
return $script_path;
}

View File

@ -64,7 +64,7 @@ int main(int argc, char* argv[])
<< toksAlign.size() << " " << lineAlign << endl; << toksAlign.size() << " " << lineAlign << endl;
*/ */
extractSingleton.Process(toksTarget, toksSource, toksAlign); extractSingleton.Process(toksTarget, toksSource, toksAlign, lineCount);
++lineCount; ++lineCount;
} }
@ -86,7 +86,7 @@ const std::string *Vocab::GetOrAdd(const std::string &word)
return ret; return ret;
} }
void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource, vector<string> &toksAlign) void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource, vector<string> &toksAlign, size_t lineCount)
{ {
std::vector<bool> m_sourceAligned(toksSource.size(), false) std::vector<bool> m_sourceAligned(toksSource.size(), false)
, m_targetAligned(toksTarget.size(), false); , m_targetAligned(toksTarget.size(), false);
@ -99,6 +99,18 @@ void ExtractLex::Process(vector<string> &toksTarget, vector<string> &toksSource,
vector<size_t> alignPos; vector<size_t> alignPos;
Tokenize(alignPos, alignTok, "-"); Tokenize(alignPos, alignTok, "-");
assert(alignPos.size() == 2); assert(alignPos.size() == 2);
if (alignPos[0] >= toksSource.size())
{
cerr << "ERROR: alignment over source length. Alignment " << alignPos[0] << " at line " << lineCount << endl;
continue;
}
if (alignPos[1] >= toksTarget.size())
{
cerr << "ERROR: alignment over target length. Alignment " << alignPos[1] << " at line " << lineCount << endl;
continue;
}
assert(alignPos[0] < toksSource.size()); assert(alignPos[0] < toksSource.size());
assert(alignPos[1] < toksTarget.size()); assert(alignPos[1] < toksTarget.size());

View File

@ -110,7 +110,7 @@ class ExtractLex
void Output(const std::map<const std::string*, WordCount> &coll, std::ofstream &outStream); void Output(const std::map<const std::string*, WordCount> &coll, std::ofstream &outStream);
public: public:
void Process(std::vector<std::string> &toksTarget, std::vector<std::string> &toksSource, std::vector<std::string> &toksAlign); void Process(std::vector<std::string> &toksTarget, std::vector<std::string> &toksSource, std::vector<std::string> &toksAlign, size_t lineCount);
void Output(std::ofstream &streamLexS2T, std::ofstream &streamLexT2S); void Output(std::ofstream &streamLexS2T, std::ofstream &streamLexT2S);
}; };

View File

@ -58,8 +58,8 @@ vector<string> tokenize( const char [] );
void writeCountOfCounts( const char* fileNameCountOfCounts ); void writeCountOfCounts( const char* fileNameCountOfCounts );
void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile); void processPhrasePairs( vector< PhraseAlignment > & , ostream &phraseTableFile);
PhraseAlignment* findBestAlignment( vector< PhraseAlignment* > & ); PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair );
void outputPhrasePair( vector< PhraseAlignment * > &, float, int, ostream &phraseTableFile ); void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float, int, ostream &phraseTableFile );
double computeLexicalTranslation( const PHRASE &, const PHRASE &, PhraseAlignment * ); double computeLexicalTranslation( const PHRASE &, const PHRASE &, PhraseAlignment * );
double computeUnalignedPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * ); double computeUnalignedPenalty( const PHRASE &, const PHRASE &, PhraseAlignment * );
set<string> functionWordList; set<string> functionWordList;
@ -267,7 +267,7 @@ void writeCountOfCounts( const char* fileNameCountOfCounts )
} }
// Kneser-Ney needs the total number of phrase pairs // Kneser-Ney needs the total number of phrase pairs
countOfCountsFile << totalDistinct; countOfCountsFile << totalDistinct << endl;
// write out counts // write out counts
for(int i=1; i<=COC_MAX; i++) { for(int i=1; i<=COC_MAX; i++) {
@ -282,40 +282,48 @@ void processPhrasePairs( vector< PhraseAlignment > &phrasePair, ostream &phraseT
// group phrase pairs based on alignments that matter // group phrase pairs based on alignments that matter
// (i.e. that re-arrange non-terminals) // (i.e. that re-arrange non-terminals)
vector< vector< PhraseAlignment * > > phrasePairGroup; PhrasePairGroup phrasePairGroup;
float totalSource = 0; float totalSource = 0;
//cerr << "phrasePair.size() = " << phrasePair.size() << endl;
// loop through phrase pairs // loop through phrase pairs
for(size_t i=0; i<phrasePair.size(); i++) { for(size_t i=0; i<phrasePair.size(); i++) {
// add to total count // add to total count
PhraseAlignment &currPhrasePair = phrasePair[i];
totalSource += phrasePair[i].count; totalSource += phrasePair[i].count;
// check for matches // check for matches
bool matched = false; //cerr << "phrasePairGroup.size() = " << phrasePairGroup.size() << endl;
for(size_t g=0; g<phrasePairGroup.size(); g++) {
vector< PhraseAlignment* > &group = phrasePairGroup[g]; PhraseAlignmentCollection phraseAlignColl;
// matched? place into same group phraseAlignColl.push_back(&currPhrasePair);
if ( group[0]->match( phrasePair[i] )) { pair<PhrasePairGroup::iterator, bool> retInsert;
group.push_back( &phrasePair[i] ); retInsert = phrasePairGroup.insert(phraseAlignColl);
matched = true; if (!retInsert.second)
} { // already exist. Add to that collection instead
} PhraseAlignmentCollection &existingColl = const_cast<PhraseAlignmentCollection&>(*retInsert.first);
// not matched? create new group existingColl.push_back(&currPhrasePair);
if (! matched) {
vector< PhraseAlignment* > newGroup;
newGroup.push_back( &phrasePair[i] );
phrasePairGroup.push_back( newGroup );
} }
} }
// output the distinct phrase pairs, one at a time // output the distinct phrase pairs, one at a time
for(size_t g=0; g<phrasePairGroup.size(); g++) { const PhrasePairGroup::SortedColl &sortedColl = phrasePairGroup.GetSortedColl();
vector< PhraseAlignment* > &group = phrasePairGroup[g]; PhrasePairGroup::SortedColl::const_iterator iter;
outputPhrasePair( group, totalSource, phrasePairGroup.size(), phraseTableFile );
} for(iter = sortedColl.begin(); iter != sortedColl.end(); ++iter)
{
const PhraseAlignmentCollection &group = **iter;
outputPhrasePair( group, totalSource, phrasePairGroup.GetSize(), phraseTableFile );
} }
PhraseAlignment* findBestAlignment( vector< PhraseAlignment* > &phrasePair ) }
PhraseAlignment* findBestAlignment(const PhraseAlignmentCollection &phrasePair )
{ {
float bestAlignmentCount = -1; float bestAlignmentCount = -1;
PhraseAlignment* bestAlignment; PhraseAlignment* bestAlignment;
@ -330,6 +338,7 @@ PhraseAlignment* findBestAlignment( vector< PhraseAlignment* > &phrasePair )
return bestAlignment; return bestAlignment;
} }
void calcNTLengthProb(const map<size_t, map<size_t, size_t> > &lengths void calcNTLengthProb(const map<size_t, map<size_t, size_t> > &lengths
, size_t total , size_t total
, map<size_t, map<size_t, float> > &probs) , map<size_t, map<size_t, float> > &probs)
@ -417,7 +426,7 @@ void outputNTLengthProbs(ostream &phraseTableFile, const map<size_t, map<size_t,
} }
void outputPhrasePair( vector< PhraseAlignment* > &phrasePair, float totalCount, int distinctCount, ostream &phraseTableFile ) void outputPhrasePair(const PhraseAlignmentCollection &phrasePair, float totalCount, int distinctCount, ostream &phraseTableFile )
{ {
if (phrasePair.size() == 0) return; if (phrasePair.size() == 0) return;
@ -658,3 +667,18 @@ void LexicalTable::load( char *fileName )
} }
cerr << endl; cerr << endl;
} }
std::pair<PhrasePairGroup::Coll::iterator,bool> PhrasePairGroup::insert ( const PhraseAlignmentCollection& obj )
{
std::pair<iterator,bool> ret = m_coll.insert(obj);
if (ret.second)
{ // obj inserted. Also add to sorted vector
const PhraseAlignmentCollection &insertedObj = *ret.first;
m_sortedColl.push_back(&insertedObj);
}
return ret;
}

View File

@ -50,6 +50,8 @@ public:
const SortedColl &GetSortedColl() const const SortedColl &GetSortedColl() const
{ return m_sortedColl; } { return m_sortedColl; }
size_t GetSize() const
{ return m_coll.size(); }
private: private:
SortedColl m_sortedColl; SortedColl m_sortedColl;

View File

@ -40,7 +40,7 @@ my($_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_DIR, $_TEMP_DIR, $_
my $debug = 0; # debug this script, do not delete any files in debug mode my $debug = 0; # debug this script, do not delete any files in debug mode
# the following line is set installation time by 'make release'. BEWARE! # the following line is set installation time by 'make release'. BEWARE!
my $BINDIR="/home/pkoehn/statmt/bin"; my $BINDIR="/home/azouzi/SMT-Engine/Tools/Giza";
$_HELP = 1 $_HELP = 1
unless &GetOptions('root-dir=s' => \$_ROOT_DIR, unless &GetOptions('root-dir=s' => \$_ROOT_DIR,
@ -179,12 +179,23 @@ foreach my $step (@step_conf) {
# supporting binaries from other packages # supporting binaries from other packages
my $MGIZA_MERGE_ALIGN = "$BINDIR/merge_alignment.py"; my $MGIZA_MERGE_ALIGN = "$BINDIR/merge_alignment.py";
my $GIZA; my $GIZA;
my $SNT2COOC;
if(!defined $_MGIZA ){ if(!defined $_MGIZA ){
$GIZA = "$BINDIR/GIZA++"; $GIZA = "$BINDIR/GIZA++";
print STDERR "Using single-thread GIZA\n"; if (-x "$BINDIR/snt2cooc.out") {
$SNT2COOC = "$BINDIR/snt2cooc.out";
} elsif (-x "$BINDIR/snt2cooc") { # Since "snt2cooc.out" and "snt2cooc" work the same
$SNT2COOC = "$BINDIR/snt2cooc";
}
print STDERR "Using single-thread GIZA\n";
} else {
$GIZA = "$BINDIR/mgiza";
if (-x "$BINDIR/snt2cooc") {
$SNT2COOC = "$BINDIR/snt2cooc";
} elsif (-x "$BINDIR/snt2cooc.out") { # Important for users that use MGIZA and copy only the "mgiza" file to $BINDIR
$SNT2COOC = "$BINDIR/snt2cooc.out";
} }
else {
$GIZA = "$BINDIR/mgizapp";
print STDERR "Using multi-thread GIZA\n"; print STDERR "Using multi-thread GIZA\n";
if (!defined($_MGIZA_CPUS)) { if (!defined($_MGIZA_CPUS)) {
$_MGIZA_CPUS=4; $_MGIZA_CPUS=4;
@ -192,7 +203,6 @@ else {
die("ERROR: Cannot find $MGIZA_MERGE_ALIGN") unless (-x $MGIZA_MERGE_ALIGN); die("ERROR: Cannot find $MGIZA_MERGE_ALIGN") unless (-x $MGIZA_MERGE_ALIGN);
} }
my $SNT2COOC = "$BINDIR/snt2cooc.out";
my $MKCLS = "$BINDIR/mkcls"; my $MKCLS = "$BINDIR/mkcls";
# supporting scripts/binaries from this package # supporting scripts/binaries from this package
@ -219,8 +229,8 @@ my $BZCAT = "bzcat";
# do a sanity check to make sure we can find the necessary binaries since # do a sanity check to make sure we can find the necessary binaries since
# these are not installed by default # these are not installed by default
# not needed if we start after step 2 # not needed if we start after step 2
die("ERROR: Cannot find mkcls, GIZA++, & snt2cooc.out in $BINDIR.\nDid you install this script using 'make release'?") unless ((!$STEPS[2]) || die("ERROR: Cannot find mkcls, GIZA++/mgiza, & snt2cooc.out/snt2cooc in $BINDIR.\nDid you install this script using 'make release'?") unless ((!$STEPS[2]) ||
(-x $GIZA && -x $SNT2COOC && -x $MKCLS)); (-x $GIZA && defined($SNT2COOC) && -x $MKCLS));
# set varibles to defaults or from options # set varibles to defaults or from options
my $___ROOT_DIR = "."; my $___ROOT_DIR = ".";
@ -304,6 +314,8 @@ my $___CONTINUE = 0;
$___CONTINUE = $_CONTINUE if $_CONTINUE; $___CONTINUE = $_CONTINUE if $_CONTINUE;
my $___MAX_PHRASE_LENGTH = "7"; my $___MAX_PHRASE_LENGTH = "7";
$___MAX_PHRASE_LENGTH = "10" if $_HIERARCHICAL;
my $___LEXICAL_WEIGHTING = 1; my $___LEXICAL_WEIGHTING = 1;
my $___LEXICAL_FILE = $___MODEL_DIR."/lex"; my $___LEXICAL_FILE = $___MODEL_DIR."/lex";
$___MAX_PHRASE_LENGTH = $_MAX_PHRASE_LENGTH if $_MAX_PHRASE_LENGTH; $___MAX_PHRASE_LENGTH = $_MAX_PHRASE_LENGTH if $_MAX_PHRASE_LENGTH;
@ -1040,8 +1052,13 @@ sub run_single_snt2cooc {
my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_; my($dir,$e,$f,$vcb_e,$vcb_f,$train) = @_;
print STDERR "(2.1a) running snt2cooc $f-$e @ ".`date`."\n"; print STDERR "(2.1a) running snt2cooc $f-$e @ ".`date`."\n";
safesystem("mkdir -p $dir") or die("ERROR"); safesystem("mkdir -p $dir") or die("ERROR");
if ($SNT2COOC eq "$BINDIR/snt2cooc.out") {
print "$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc\n"; print "$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc\n";
safesystem("$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc") or die("ERROR"); safesystem("$SNT2COOC $vcb_e $vcb_f $train > $dir/$f-$e.cooc") or die("ERROR");
} else {
print "$SNT2COOC $dir/$f-$e.cooc $vcb_e $vcb_f $train\n";
safesystem("$SNT2COOC $dir/$f-$e.cooc $vcb_e $vcb_f $train") or die("ERROR");
}
} }
### (3) CREATE WORD ALIGNMENT FROM GIZA ALIGNMENTS ### (3) CREATE WORD ALIGNMENT FROM GIZA ALIGNMENTS
@ -1308,6 +1325,8 @@ sub extract_phrase {
my $cmd; my $cmd;
if ($_HIERARCHICAL) if ($_HIERARCHICAL)
{ {
my $max_length = &get_max_phrase_length($table_number);
$cmd = "$RULE_EXTRACT $alignment_file_e $alignment_file_f $alignment_file_a $extract_file"; $cmd = "$RULE_EXTRACT $alignment_file_e $alignment_file_f $alignment_file_a $extract_file";
$cmd .= " --GlueGrammar $___GLUE_GRAMMAR_FILE" if $_GLUE_GRAMMAR; $cmd .= " --GlueGrammar $___GLUE_GRAMMAR_FILE" if $_GLUE_GRAMMAR;
$cmd .= " --UnknownWordLabel $_UNKNOWN_WORD_LABEL_FILE" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_LABEL_FILE); $cmd .= " --UnknownWordLabel $_UNKNOWN_WORD_LABEL_FILE" if $_TARGET_SYNTAX && defined($_UNKNOWN_WORD_LABEL_FILE);
@ -1315,6 +1334,7 @@ sub extract_phrase {
$cmd .= " --SourceSyntax" if $_SOURCE_SYNTAX; $cmd .= " --SourceSyntax" if $_SOURCE_SYNTAX;
$cmd .= " --TargetSyntax" if $_TARGET_SYNTAX; $cmd .= " --TargetSyntax" if $_TARGET_SYNTAX;
} }
$cmd .= " --MaxSpan $max_length";
$cmd .= " ".$_EXTRACT_OPTIONS if defined($_EXTRACT_OPTIONS); $cmd .= " ".$_EXTRACT_OPTIONS if defined($_EXTRACT_OPTIONS);
} }
else else