mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 15:00:33 +03:00
merged master into this branch
This commit is contained in:
commit
8f861a7f9e
15
Jamroot
15
Jamroot
@ -114,10 +114,24 @@ requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
|
||||
requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
|
||||
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
|
||||
|
||||
if [ option.get "with-lbllm" ] {
|
||||
external-lib boost_serialization ;
|
||||
external-lib gomp ;
|
||||
requirements += <library>boost_serialization ;
|
||||
requirements += <library>gomp ;
|
||||
}
|
||||
|
||||
if [ option.get "with-cmph" ] {
|
||||
requirements += <define>HAVE_CMPH ;
|
||||
}
|
||||
|
||||
if [ option.get "with-probing-pt" : : "yes" ]
|
||||
{
|
||||
external-lib boost_serialization ;
|
||||
requirements += <define>HAVE_PROBINGPT ;
|
||||
requirements += <library>boost_serialization ;
|
||||
}
|
||||
|
||||
project : default-build
|
||||
<threading>multi
|
||||
<warnings>on
|
||||
@ -145,6 +159,7 @@ build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses
|
||||
if [ option.get "with-mm" : : "yes" ]
|
||||
{
|
||||
alias mm :
|
||||
moses/TranslationModel/UG//ptable-lookup
|
||||
moses/TranslationModel/UG/mm//mtt-build
|
||||
moses/TranslationModel/UG/mm//mtt-dump
|
||||
moses/TranslationModel/UG/mm//symal2mam
|
||||
|
@ -66,10 +66,9 @@ int main (int argc, char * const argv[])
|
||||
|
||||
PhraseNode &rootNode = onDiskWrapper.GetRootSourceNode();
|
||||
size_t lineNum = 0;
|
||||
char line[100000];
|
||||
string line;
|
||||
|
||||
//while(getline(inStream, line))
|
||||
while(inStream.getline(line, 100000)) {
|
||||
while(getline(inStream, line)) {
|
||||
lineNum++;
|
||||
if (lineNum%1000 == 0) cerr << "." << flush;
|
||||
if (lineNum%10000 == 0) cerr << ":" << flush;
|
||||
@ -107,8 +106,13 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc
|
||||
return ret;
|
||||
}
|
||||
|
||||
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
|
||||
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, const std::string &lineStr, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
|
||||
{
|
||||
char line[lineStr.size() + 1];
|
||||
strcpy(line, lineStr.c_str());
|
||||
|
||||
stringstream sparseFeatures, property;
|
||||
|
||||
size_t scoreInd = 0;
|
||||
|
||||
// MAIN LOOP
|
||||
@ -118,6 +122,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
|
||||
2 = scores
|
||||
3 = align
|
||||
4 = count
|
||||
7 = properties
|
||||
*/
|
||||
char *tok = strtok (line," ");
|
||||
OnDiskPt::PhrasePtr out(new Phrase());
|
||||
@ -148,29 +153,20 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
|
||||
targetPhrase.CreateAlignFromString(tok);
|
||||
break;
|
||||
}
|
||||
case 4:
|
||||
++stage;
|
||||
break;
|
||||
/* case 5: {
|
||||
// count info. Only store the 2nd one
|
||||
float val = Moses::Scan<float>(tok);
|
||||
misc[0] = val;
|
||||
++stage;
|
||||
break;
|
||||
}*/
|
||||
case 4: {
|
||||
// store only the 3rd one (rule count)
|
||||
float val = Moses::Scan<float>(tok);
|
||||
misc[0] = val;
|
||||
break;
|
||||
}
|
||||
case 5: {
|
||||
// count info. Only store the 2nd one
|
||||
//float val = Moses::Scan<float>(tok);
|
||||
//misc[0] = val;
|
||||
++stage;
|
||||
// sparse features
|
||||
sparseFeatures << tok << " ";
|
||||
break;
|
||||
}
|
||||
case 6: {
|
||||
// store only the 3rd one (rule count)
|
||||
float val = Moses::Scan<float>(tok);
|
||||
misc[0] = val;
|
||||
++stage;
|
||||
break;
|
||||
property << tok << " ";
|
||||
break;
|
||||
}
|
||||
default:
|
||||
cerr << "ERROR in line " << line << endl;
|
||||
@ -183,6 +179,8 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
|
||||
} // while (tok != NULL)
|
||||
|
||||
assert(scoreInd == numScores);
|
||||
targetPhrase.SetSparseFeatures(Moses::Trim(sparseFeatures.str()));
|
||||
targetPhrase.SetProperty(Moses::Trim(property.str()));
|
||||
targetPhrase.SortAlign();
|
||||
return out;
|
||||
} // Tokenize()
|
||||
|
@ -29,7 +29,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
|
||||
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
|
||||
, OnDiskPt::OnDiskWrapper &onDiskWrapper, int retSourceTarget);
|
||||
OnDiskPt::PhrasePtr Tokenize(OnDiskPt::SourcePhrase &sourcePhrase, OnDiskPt::TargetPhrase &targetPhrase
|
||||
, char *line, OnDiskPt::OnDiskWrapper &onDiskWrapper
|
||||
, const std::string &lineStr, OnDiskPt::OnDiskWrapper &onDiskWrapper
|
||||
, int numScores
|
||||
, std::vector<float> &misc);
|
||||
|
||||
|
@ -31,7 +31,7 @@ using namespace std;
|
||||
namespace OnDiskPt
|
||||
{
|
||||
|
||||
int OnDiskWrapper::VERSION_NUM = 5;
|
||||
int OnDiskWrapper::VERSION_NUM = 7;
|
||||
|
||||
OnDiskWrapper::OnDiskWrapper()
|
||||
{
|
||||
|
@ -162,10 +162,14 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
|
||||
// allocate mem
|
||||
size_t numScores = onDiskWrapper.GetNumScores()
|
||||
,numAlign = GetAlign().size();
|
||||
size_t sparseFeatureSize = m_sparseFeatures.size();
|
||||
size_t propSize = m_property.size();
|
||||
|
||||
size_t memNeeded = sizeof(UINT64); // file pos (phrase id)
|
||||
memNeeded += sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign; // align
|
||||
memNeeded += sizeof(float) * numScores; // scores
|
||||
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
|
||||
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
|
||||
+ sizeof(float) * numScores // scores
|
||||
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
|
||||
+ sizeof(UINT64) + propSize; // property string
|
||||
|
||||
char *mem = (char*) malloc(memNeeded);
|
||||
//memset(mem, 0, memNeeded);
|
||||
@ -183,11 +187,33 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
|
||||
// scores
|
||||
memUsed += WriteScoresToMemory(mem + memUsed);
|
||||
|
||||
// sparse features
|
||||
memUsed += WriteStringToMemory(mem + memUsed, m_sparseFeatures);
|
||||
|
||||
// property string
|
||||
memUsed += WriteStringToMemory(mem + memUsed, m_property);
|
||||
|
||||
//DebugMem(mem, memNeeded);
|
||||
assert(memNeeded == memUsed);
|
||||
return mem;
|
||||
}
|
||||
|
||||
size_t TargetPhrase::WriteStringToMemory(char *mem, const std::string &str) const
|
||||
{
|
||||
size_t memUsed = 0;
|
||||
UINT64 *memTmp = (UINT64*) mem;
|
||||
|
||||
size_t strSize = str.size();
|
||||
memTmp[0] = strSize;
|
||||
memUsed += sizeof(UINT64);
|
||||
|
||||
const char *charStr = str.c_str();
|
||||
memcpy(mem + memUsed, charStr, strSize);
|
||||
memUsed += strSize;
|
||||
|
||||
return memUsed;
|
||||
}
|
||||
|
||||
size_t TargetPhrase::WriteAlignToMemory(char *mem) const
|
||||
{
|
||||
size_t memUsed = 0;
|
||||
@ -279,6 +305,13 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
|
||||
|
||||
// scores
|
||||
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
|
||||
|
||||
// sparse features
|
||||
ret->GetScoreBreakdown().Assign(&phraseDict, m_sparseFeatures);
|
||||
|
||||
// property
|
||||
ret->SetProperties(m_property);
|
||||
|
||||
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
|
||||
|
||||
return ret;
|
||||
@ -299,9 +332,36 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
|
||||
memUsed += ReadScoresFromFile(fileTPColl);
|
||||
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());
|
||||
|
||||
// sparse features
|
||||
memUsed += ReadStringFromFile(fileTPColl, m_sparseFeatures);
|
||||
|
||||
// properties
|
||||
memUsed += ReadStringFromFile(fileTPColl, m_property);
|
||||
|
||||
return memUsed;
|
||||
}
|
||||
|
||||
UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &outStr)
|
||||
{
|
||||
UINT64 bytesRead = 0;
|
||||
|
||||
UINT64 strSize;
|
||||
fileTPColl.read((char*) &strSize, sizeof(UINT64));
|
||||
bytesRead += sizeof(UINT64);
|
||||
|
||||
if (strSize) {
|
||||
char *mem = (char*) malloc(strSize + 1);
|
||||
mem[strSize] = '\0';
|
||||
fileTPColl.read(mem, strSize);
|
||||
outStr = string(mem);
|
||||
free(mem);
|
||||
|
||||
bytesRead += strSize;
|
||||
}
|
||||
|
||||
return bytesRead;
|
||||
}
|
||||
|
||||
UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP)
|
||||
{
|
||||
UINT64 bytesRead = 0;
|
||||
|
@ -50,15 +50,18 @@ class TargetPhrase: public Phrase
|
||||
protected:
|
||||
AlignType m_align;
|
||||
PhrasePtr m_sourcePhrase;
|
||||
std::string m_sparseFeatures, m_property;
|
||||
|
||||
std::vector<float> m_scores;
|
||||
UINT64 m_filePos;
|
||||
|
||||
size_t WriteAlignToMemory(char *mem) const;
|
||||
size_t WriteScoresToMemory(char *mem) const;
|
||||
size_t WriteStringToMemory(char *mem, const std::string &str) const;
|
||||
|
||||
UINT64 ReadAlignFromFile(std::fstream &fileTPColl);
|
||||
UINT64 ReadScoresFromFile(std::fstream &fileTPColl);
|
||||
UINT64 ReadStringFromFile(std::fstream &fileTPColl, std::string &outStr);
|
||||
|
||||
public:
|
||||
TargetPhrase() {
|
||||
@ -110,6 +113,15 @@ public:
|
||||
|
||||
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
|
||||
|
||||
void SetProperty(const std::string &value)
|
||||
{
|
||||
m_property = value;
|
||||
}
|
||||
|
||||
void SetSparseFeatures(const std::string &value)
|
||||
{
|
||||
m_sparseFeatures = value;
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -104,14 +104,20 @@ void Word::ConvertToMoses(
|
||||
Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
|
||||
overwrite = Moses::Word(m_isNonTerminal);
|
||||
|
||||
// TODO: this conversion should have been done at load time.
|
||||
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
|
||||
|
||||
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
|
||||
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
|
||||
if (m_isNonTerminal) {
|
||||
const std::string &tok = vocab.GetString(m_vocabId);
|
||||
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
|
||||
}
|
||||
else {
|
||||
// TODO: this conversion should have been done at load time.
|
||||
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
|
||||
|
||||
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
|
||||
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
|
||||
}
|
||||
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
}
|
||||
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
|
||||
}
|
||||
|
||||
int Word::Compare(const Word &compare) const
|
||||
|
122
contrib/moses-speedtest/README.md
Normal file
122
contrib/moses-speedtest/README.md
Normal file
@ -0,0 +1,122 @@
|
||||
# Moses speedtesting framework
|
||||
|
||||
### Description
|
||||
|
||||
This is an automatic test framework that is designed to test the day to day performance changes in Moses.
|
||||
|
||||
### Set up
|
||||
|
||||
#### Set up a Moses repo
|
||||
Set up a Moses repo and build it with the desired configuration.
|
||||
```bash
|
||||
git clone https://github.com/moses-smt/mosesdecoder.git
|
||||
cd mosesdecoder
|
||||
./bjam -j10 --with-cmph=/usr/include/
|
||||
```
|
||||
You need to build Moses first, so that the testsuite knows what command you want it to use when rebuilding against newer revisions.
|
||||
|
||||
#### Create a parent directory.
|
||||
Create a parent directory where the **runtests.py** and related scripts and configuration file should reside.
|
||||
This should also be the location of the TEST_DIR and TEST_LOG_DIR as explained in the next section.
|
||||
|
||||
#### Set up a global configuration file.
|
||||
You need a configuration file for the testsuite. A sample configuration file is provided in **testsuite\_config**
|
||||
<pre>
|
||||
MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder
|
||||
DROP_CACHES_COMM: sys_drop_caches 3
|
||||
TEST_DIR: /home/moses-speedtest/phrase_tables/tests
|
||||
TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
|
||||
BASEBRANCH: RELEASE-2.1.1
|
||||
</pre>
|
||||
|
||||
The _MOSES\_REPO\_PATH_ is the place where you have set up and built moses.
|
||||
The _DROP\_CACHES\_COMM_ is the command that would beused to drop caches. It should run without needing root access.
|
||||
_TEST\_DIR_ is the directory where all the tests will reside.
|
||||
_TEST\_LOG\_DIR_ is the directory where the performance logs will be gathered. It should be created before running the testsuite for the first time.
|
||||
_BASEBRANCH_ is the branch against which all new tests will be compared. It should normally be set to be the latest Moses stable release.
|
||||
|
||||
### Creating tests
|
||||
|
||||
In order to create a test one should go into the TEST_DIR and create a new folder. That folder will be used for the name of the test.
|
||||
Inside that folder one should place a configuration file named **config**. The naming is mandatory.
|
||||
An example such configuration file is **test\_config**
|
||||
|
||||
<pre>
|
||||
Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
|
||||
LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/,
|
||||
Variants: vanilla, cached, ldpre #Can't have cached without ldpre or vanilla
|
||||
</pre>
|
||||
|
||||
The _Command:_ line specifies the executable (which is looked up in the /bin directory of the repo.) and any arguments necessary. Before running the test, the script cds to the current test directory so you can use relative paths.
|
||||
The _LDPRE:_ specifies if tests should be run with any LD\_PRELOAD flags.
|
||||
The _Variants:_ line specifies what type of tests should we run. This particular line will run the following tests:
|
||||
1. A Vanilla test meaning just the command after _Command_ will be issued.
|
||||
2. A vanilla cached test meaning that after the vanilla test, the test will be run again without dropping caches in order to benchmark performance on cached filesystem.
|
||||
3. A test with LD_PRELOAD ldpreloads moses -f command. For each available LDPRELOAD comma separated library to preload.
|
||||
4. A cached version of all LD_PRELOAD tests.
|
||||
|
||||
### Running tests.
|
||||
Running the tests is done through the **runtests.py** script.
|
||||
|
||||
#### Running all tests.
|
||||
To run all tests, with the base branch and the latests revision (and generate new basebranch test data if such is missing) do a:
|
||||
```bash
|
||||
python3 runtests.py -c testsuite_config
|
||||
```
|
||||
|
||||
#### Running specific tests.
|
||||
The script allows the user to manually run a particular test or to test against a specific branch or revision:
|
||||
<pre>
|
||||
moses-speedtest@crom:~/phrase_tables$ python3 runtests.py --help
|
||||
usage: runtests.py [-h] -c CONFIGFILE [-s SINGLETESTDIR] [-r REVISION]
|
||||
[-b BRANCH]
|
||||
|
||||
A python based speedtest suite for moses.
|
||||
|
||||
optional arguments:
|
||||
-h, --help show this help message and exit
|
||||
-c CONFIGFILE, --configfile CONFIGFILE
|
||||
Specify test config file
|
||||
-s SINGLETESTDIR, --singletest SINGLETESTDIR
|
||||
Single test name directory. Specify directory name,
|
||||
not full path!
|
||||
-r REVISION, --revision REVISION
|
||||
Specify a specific revison for the test.
|
||||
-b BRANCH, --branch BRANCH
|
||||
Specify a branch for the test.
|
||||
</pre>
|
||||
|
||||
### Generating HTML report.
|
||||
To generate a summary of the test results use the **html\_gen.py** script. It places a file named *index.html* in the current script directory.
|
||||
```bash
|
||||
python3 html_gen.py testsuite_config
|
||||
```
|
||||
You should use the generated file with the **style.css** file provided in the html directory.
|
||||
|
||||
### Command line regression testing.
|
||||
Alternatively you could check for regressions from the command line using the **check\_fo\r_regression.py** script:
|
||||
```bash
|
||||
python3 check_for_regression.py TESTLOGS_DIRECTORY
|
||||
```
|
||||
|
||||
Alternatively the results of all tests are logged inside the the specified TESTLOGS directory so you can manually check them for additional information such as date, time, revision, branch, etc...
|
||||
|
||||
### Create a cron job:
|
||||
Create a cron job to run the tests daily and generate an html report. An example *cronjob* is available.
|
||||
```bash
|
||||
#!/bin/sh
|
||||
cd /home/moses-speedtest/phrase_tables
|
||||
|
||||
python3 runtests.py -c testsuite_config #Run the tests.
|
||||
python3 html_gen.py testsuite_config #Generate html
|
||||
|
||||
cp index.html /fs/thor4/html/www/speed-test/ #Update the html
|
||||
```
|
||||
|
||||
Place the script in _/etc/cron.daily_ for dayly testing
|
||||
|
||||
###### Author
|
||||
Nikolay Bogoychev, 2014
|
||||
|
||||
###### License
|
||||
This software is licensed under the LGPL.
|
63
contrib/moses-speedtest/check_for_regression.py
Normal file
63
contrib/moses-speedtest/check_for_regression.py
Normal file
@ -0,0 +1,63 @@
|
||||
"""Checks if any of the latests tests has performed considerably different than
|
||||
the previous ones. Takes the log directory as an argument."""
|
||||
import os
|
||||
import sys
|
||||
from testsuite_common import Result, processLogLine, bcolors, getLastTwoLines
|
||||
|
||||
LOGDIR = sys.argv[1] #Get the log directory as an argument
|
||||
PERCENTAGE = 5 #Default value for how much a test shoudl change
|
||||
if len(sys.argv) == 3:
|
||||
PERCENTAGE = float(sys.argv[2]) #Default is 5%, but we can specify more
|
||||
#line parameter
|
||||
|
||||
def printResults(regressed, better, unchanged, firsttime):
|
||||
"""Pretty print the results in different colours"""
|
||||
if regressed != []:
|
||||
for item in regressed:
|
||||
print(bcolors.RED + "REGRESSION! " + item.testname + " Was: "\
|
||||
+ str(item.previous) + " Is: " + str(item.current) + " Change: "\
|
||||
+ str(abs(item.percentage)) + "%. Revision: " + item.revision\
|
||||
+ bcolors.ENDC)
|
||||
print('\n')
|
||||
if unchanged != []:
|
||||
for item in unchanged:
|
||||
print(bcolors.BLUE + "UNCHANGED: " + item.testname + " Revision: " +\
|
||||
item.revision + bcolors.ENDC)
|
||||
print('\n')
|
||||
if better != []:
|
||||
for item in better:
|
||||
print(bcolors.GREEN + "IMPROVEMENT! " + item.testname + " Was: "\
|
||||
+ str(item.previous) + " Is: " + str(item.current) + " Change: "\
|
||||
+ str(abs(item.percentage)) + "%. Revision: " + item.revision\
|
||||
+ bcolors.ENDC)
|
||||
if firsttime != []:
|
||||
for item in firsttime:
|
||||
print(bcolors.PURPLE + "First time test! " + item.testname +\
|
||||
" Took: " + str(item.real) + " seconds. Revision: " +\
|
||||
item.revision + bcolors.ENDC)
|
||||
|
||||
|
||||
all_files = os.listdir(LOGDIR)
|
||||
regressed = []
|
||||
better = []
|
||||
unchanged = []
|
||||
firsttime = []
|
||||
|
||||
#Go through all log files and find which tests have performed better.
|
||||
for logfile in all_files:
|
||||
(line1, line2) = getLastTwoLines(logfile, LOGDIR)
|
||||
log1 = processLogLine(line1)
|
||||
if line2 == '\n': # Empty line, only one test ever run
|
||||
firsttime.append(log1)
|
||||
continue
|
||||
log2 = processLogLine(line2)
|
||||
res = Result(log1.testname, log1.real, log2.real, log2.revision,\
|
||||
log2.branch, log1.revision, log1.branch)
|
||||
if res.percentage < -PERCENTAGE:
|
||||
regressed.append(res)
|
||||
elif res.change > PERCENTAGE:
|
||||
better.append(res)
|
||||
else:
|
||||
unchanged.append(res)
|
||||
|
||||
printResults(regressed, better, unchanged, firsttime)
|
7
contrib/moses-speedtest/cronjob
Normal file
7
contrib/moses-speedtest/cronjob
Normal file
@ -0,0 +1,7 @@
|
||||
#!/bin/sh
|
||||
cd /home/moses-speedtest/phrase_tables
|
||||
|
||||
python3 runtests.py -c testsuite_config #Run the tests.
|
||||
python3 html_gen.py testsuite_config #Generate html
|
||||
|
||||
cp index.html /fs/thor4/html/www/speed-test/ #Update the html
|
5
contrib/moses-speedtest/helpers/README.md
Normal file
5
contrib/moses-speedtest/helpers/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
###Helpers
|
||||
|
||||
This is a python script that basically gives you the equivalent of:
|
||||
```echo 3 > /proc/sys/vm/drop_caches```
|
||||
You need to set it up so it is executed with root access without needing a password so that the tests can be automated.
|
22
contrib/moses-speedtest/helpers/sys_drop_caches.py
Normal file
22
contrib/moses-speedtest/helpers/sys_drop_caches.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/spython
|
||||
from sys import argv, stderr, exit
|
||||
from os import linesep as ls
|
||||
procfile = "/proc/sys/vm/drop_caches"
|
||||
options = ["1","2","3"]
|
||||
flush_type = None
|
||||
try:
|
||||
flush_type = argv[1][0:1]
|
||||
if not flush_type in options:
|
||||
raise IndexError, "not in options"
|
||||
with open(procfile, "w") as f:
|
||||
f.write("%s%s" % (flush_type,ls))
|
||||
exit(0)
|
||||
except IndexError, e:
|
||||
stderr.write("Argument %s required.%s" % (options, ls))
|
||||
except IOError, e:
|
||||
stderr.write("Error writing to file.%s" % ls)
|
||||
except StandardError, e:
|
||||
stderr.write("Unknown Error.%s" % ls)
|
||||
|
||||
exit(1)
|
||||
|
5
contrib/moses-speedtest/html/README.md
Normal file
5
contrib/moses-speedtest/html/README.md
Normal file
@ -0,0 +1,5 @@
|
||||
###HTML files.
|
||||
|
||||
_index.html_ is a sample generated file by this testsuite.
|
||||
|
||||
_style.css_ should be placed in the html directory in which _index.html_ will be placed in order to visualize the test results in a browser.
|
32
contrib/moses-speedtest/html/index.html
Normal file
32
contrib/moses-speedtest/html/index.html
Normal file
File diff suppressed because one or more lines are too long
21
contrib/moses-speedtest/html/style.css
Normal file
21
contrib/moses-speedtest/html/style.css
Normal file
@ -0,0 +1,21 @@
|
||||
table,th,td
|
||||
{
|
||||
border:1px solid black;
|
||||
border-collapse:collapse
|
||||
}
|
||||
|
||||
tr:nth-child(odd) {
|
||||
background-color: Gainsboro;
|
||||
}
|
||||
|
||||
.better {
|
||||
color: Green;
|
||||
}
|
||||
|
||||
.worse {
|
||||
color: Red;
|
||||
}
|
||||
|
||||
.unchanged {
|
||||
color: SkyBlue;
|
||||
}
|
192
contrib/moses-speedtest/html_gen.py
Normal file
192
contrib/moses-speedtest/html_gen.py
Normal file
@ -0,0 +1,192 @@
|
||||
"""Generates HTML page containing the testresults"""
|
||||
from testsuite_common import Result, processLogLine, getLastTwoLines
|
||||
from runtests import parse_testconfig
|
||||
import os
|
||||
import sys
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
HTML_HEADING = """<html>
|
||||
<head>
|
||||
<title>Moses speed testing</title>
|
||||
<link rel="stylesheet" type="text/css" href="style.css"></head><body>"""
|
||||
HTML_ENDING = "</table></body></html>\n"
|
||||
|
||||
TABLE_HEADING = """<table><tr class="heading">
|
||||
<th>Date</th>
|
||||
<th>Time</th>
|
||||
<th>Testname</th>
|
||||
<th>Revision</th>
|
||||
<th>Branch</th>
|
||||
<th>Time</th>
|
||||
<th>Prevtime</th>
|
||||
<th>Prevrev</th>
|
||||
<th>Change (%)</th>
|
||||
<th>Time (Basebranch)</th>
|
||||
<th>Change (%, Basebranch)</th>
|
||||
<th>Time (Days -2)</th>
|
||||
<th>Change (%, Days -2)</th>
|
||||
<th>Time (Days -3)</th>
|
||||
<th>Change (%, Days -3)</th>
|
||||
<th>Time (Days -4)</th>
|
||||
<th>Change (%, Days -4)</th>
|
||||
<th>Time (Days -5)</th>
|
||||
<th>Change (%, Days -5)</th>
|
||||
<th>Time (Days -6)</th>
|
||||
<th>Change (%, Days -6)</th>
|
||||
<th>Time (Days -7)</th>
|
||||
<th>Change (%, Days -7)</th>
|
||||
<th>Time (Days -14)</th>
|
||||
<th>Change (%, Days -14)</th>
|
||||
<th>Time (Years -1)</th>
|
||||
<th>Change (%, Years -1)</th>
|
||||
</tr>"""
|
||||
|
||||
def get_prev_days(date, numdays):
|
||||
"""Gets the date numdays previous days so that we could search for
|
||||
that test in the config file"""
|
||||
date_obj = datetime.strptime(date, '%d.%m.%Y').date()
|
||||
past_date = date_obj - timedelta(days=numdays)
|
||||
return past_date.strftime('%d.%m.%Y')
|
||||
|
||||
def gather_necessary_lines(logfile, date):
|
||||
"""Gathers the necessary lines corresponding to past dates
|
||||
and parses them if they exist"""
|
||||
#Get a dictionary of dates
|
||||
dates = {}
|
||||
dates[get_prev_days(date, 2)] = ('-2', None)
|
||||
dates[get_prev_days(date, 3)] = ('-3', None)
|
||||
dates[get_prev_days(date, 4)] = ('-4', None)
|
||||
dates[get_prev_days(date, 5)] = ('-5', None)
|
||||
dates[get_prev_days(date, 6)] = ('-6', None)
|
||||
dates[get_prev_days(date, 7)] = ('-7', None)
|
||||
dates[get_prev_days(date, 14)] = ('-14', None)
|
||||
dates[get_prev_days(date, 365)] = ('-365', None)
|
||||
|
||||
openfile = open(logfile, 'r')
|
||||
for line in openfile:
|
||||
if line.split()[0] in dates.keys():
|
||||
day = dates[line.split()[0]][0]
|
||||
dates[line.split()[0]] = (day, processLogLine(line))
|
||||
openfile.close()
|
||||
return dates
|
||||
|
||||
def append_date_to_table(resline):
|
||||
"""Appends past dates to the html"""
|
||||
cur_html = '<td>' + str(resline.previous) + '</td>'
|
||||
|
||||
if resline.percentage > 0.05: #If we have improvement of more than 5%
|
||||
cur_html = cur_html + '<td class="better">' + str(resline.percentage) + '</td>'
|
||||
elif resline.percentage < -0.05: #We have a regression of more than 5%
|
||||
cur_html = cur_html + '<td class="worse">' + str(resline.percentage) + '</td>'
|
||||
else:
|
||||
cur_html = cur_html + '<td class="unchanged">' + str(resline.percentage) + '</td>'
|
||||
return cur_html
|
||||
|
||||
def compare_rev(filename, rev1, rev2, branch1=False, branch2=False):
|
||||
"""Compare the test results of two lines. We can specify either a
|
||||
revision or a branch for comparison. The first rev should be the
|
||||
base version and the second revision should be the later version"""
|
||||
|
||||
#In the log file the index of the revision is 2 but the index of
|
||||
#the branch is 12. Alternate those depending on whether we are looking
|
||||
#for a specific revision or branch.
|
||||
firstidx = 2
|
||||
secondidx = 2
|
||||
if branch1 == True:
|
||||
firstidx = 12
|
||||
if branch2 == True:
|
||||
secondidx = 12
|
||||
|
||||
rev1line = ''
|
||||
rev2line = ''
|
||||
resfile = open(filename, 'r')
|
||||
for line in resfile:
|
||||
if rev1 == line.split()[firstidx]:
|
||||
rev1line = line
|
||||
elif rev2 == line.split()[secondidx]:
|
||||
rev2line = line
|
||||
if rev1line != '' and rev2line != '':
|
||||
break
|
||||
resfile.close()
|
||||
if rev1line == '':
|
||||
raise ValueError('Revision ' + rev1 + " was not found!")
|
||||
if rev2line == '':
|
||||
raise ValueError('Revision ' + rev2 + " was not found!")
|
||||
|
||||
logLine1 = processLogLine(rev1line)
|
||||
logLine2 = processLogLine(rev2line)
|
||||
res = Result(logLine1.testname, logLine1.real, logLine2.real,\
|
||||
logLine2.revision, logLine2.branch, logLine1.revision, logLine1.branch)
|
||||
|
||||
return res
|
||||
|
||||
def produce_html(path, global_config):
|
||||
"""Produces html file for the report."""
|
||||
html = '' #The table HTML
|
||||
for filenam in os.listdir(global_config.testlogs):
|
||||
#Generate html for the newest two lines
|
||||
#Get the lines from the config file
|
||||
(ll1, ll2) = getLastTwoLines(filenam, global_config.testlogs)
|
||||
logLine1 = processLogLine(ll1)
|
||||
logLine2 = processLogLine(ll2) #This is the life from the latest revision
|
||||
|
||||
#Generate html
|
||||
res1 = Result(logLine1.testname, logLine1.real, logLine2.real,\
|
||||
logLine2.revision, logLine2.branch, logLine1.revision, logLine1.branch)
|
||||
html = html + '<tr><td>' + logLine2.date + '</td><td>' + logLine2.time + '</td><td>' +\
|
||||
res1.testname + '</td><td>' + res1.revision[:10] + '</td><td>' + res1.branch + '</td><td>' +\
|
||||
str(res1.current) + '</td><td>' + str(res1.previous) + '</td><td>' + res1.prevrev[:10] + '</td>'
|
||||
|
||||
#Add fancy colours depending on the change
|
||||
if res1.percentage > 0.05: #If we have improvement of more than 5%
|
||||
html = html + '<td class="better">' + str(res1.percentage) + '</td>'
|
||||
elif res1.percentage < -0.05: #We have a regression of more than 5%
|
||||
html = html + '<td class="worse">' + str(res1.percentage) + '</td>'
|
||||
else:
|
||||
html = html + '<td class="unchanged">' + str(res1.percentage) + '</td>'
|
||||
|
||||
#Get comparison against the base version
|
||||
filenam = global_config.testlogs + '/' + filenam #Get proper directory
|
||||
res2 = compare_rev(filenam, global_config.basebranch, res1.revision, branch1=True)
|
||||
html = html + '<td>' + str(res2.previous) + '</td>'
|
||||
|
||||
#Add fancy colours depending on the change
|
||||
if res2.percentage > 0.05: #If we have improvement of more than 5%
|
||||
html = html + '<td class="better">' + str(res2.percentage) + '</td>'
|
||||
elif res2.percentage < -0.05: #We have a regression of more than 5%
|
||||
html = html + '<td class="worse">' + str(res2.percentage) + '</td>'
|
||||
else:
|
||||
html = html + '<td class="unchanged">' + str(res2.percentage) + '</td>'
|
||||
|
||||
#Add extra dates comparison dating from the beginning of time if they exist
|
||||
past_dates = list(range(2, 8))
|
||||
past_dates.append(14)
|
||||
past_dates.append(365) # Get the 1 year ago day
|
||||
linesdict = gather_necessary_lines(filenam, logLine2.date)
|
||||
|
||||
for days in past_dates:
|
||||
act_date = get_prev_days(logLine2.date, days)
|
||||
if linesdict[act_date][1] is not None:
|
||||
logline_date = linesdict[act_date][1]
|
||||
restemp = Result(logline_date.testname, logline_date.real, logLine2.real,\
|
||||
logLine2.revision, logLine2.branch, logline_date.revision, logline_date.branch)
|
||||
html = html + append_date_to_table(restemp)
|
||||
else:
|
||||
html = html + '<td>N/A</td><td>N/A</td>'
|
||||
|
||||
|
||||
|
||||
html = html + '</tr>' #End row
|
||||
|
||||
#Write out the file
|
||||
basebranch_info = '<text><b>Basebranch:</b> ' + res2.prevbranch + ' <b>Revision:</b> ' +\
|
||||
res2.prevrev + '</text>'
|
||||
writeoutstr = HTML_HEADING + basebranch_info + TABLE_HEADING + html + HTML_ENDING
|
||||
writefile = open(path, 'w')
|
||||
writefile.write(writeoutstr)
|
||||
writefile.close()
|
||||
|
||||
if __name__ == '__main__':
|
||||
CONFIG = parse_testconfig(sys.argv[1])
|
||||
produce_html('index.html', CONFIG)
|
293
contrib/moses-speedtest/runtests.py
Normal file
293
contrib/moses-speedtest/runtests.py
Normal file
@ -0,0 +1,293 @@
|
||||
"""Given a config file, runs tests"""
|
||||
import os
|
||||
import subprocess
|
||||
import time
|
||||
from argparse import ArgumentParser
|
||||
from testsuite_common import processLogLine
|
||||
|
||||
def parse_cmd():
|
||||
"""Parse the command line arguments"""
|
||||
description = "A python based speedtest suite for moses."
|
||||
parser = ArgumentParser(description=description)
|
||||
parser.add_argument("-c", "--configfile", action="store",\
|
||||
dest="configfile", required=True,\
|
||||
help="Specify test config file")
|
||||
parser.add_argument("-s", "--singletest", action="store",\
|
||||
dest="singletestdir", default=None,\
|
||||
help="Single test name directory. Specify directory name,\
|
||||
not full path!")
|
||||
parser.add_argument("-r", "--revision", action="store",\
|
||||
dest="revision", default=None,\
|
||||
help="Specify a specific revison for the test.")
|
||||
parser.add_argument("-b", "--branch", action="store",\
|
||||
dest="branch", default=None,\
|
||||
help="Specify a branch for the test.")
|
||||
|
||||
arguments = parser.parse_args()
|
||||
return arguments
|
||||
|
||||
def repoinit(testconfig):
|
||||
"""Determines revision and sets up the repo."""
|
||||
revision = ''
|
||||
#Update the repo
|
||||
os.chdir(testconfig.repo)
|
||||
#Checkout specific branch, else maintain main branch
|
||||
if testconfig.branch != 'master':
|
||||
subprocess.call(['git', 'checkout', testconfig.branch])
|
||||
rev, _ = subprocess.Popen(['git', 'rev-parse', 'HEAD'],\
|
||||
stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
|
||||
revision = str(rev).replace("\\n'", '').replace("b'", '')
|
||||
else:
|
||||
subprocess.call(['git checkout master'], shell=True)
|
||||
|
||||
#Check a specific revision. Else checkout master.
|
||||
if testconfig.revision:
|
||||
subprocess.call(['git', 'checkout', testconfig.revision])
|
||||
revision = testconfig.revision
|
||||
elif testconfig.branch == 'master':
|
||||
subprocess.call(['git pull'], shell=True)
|
||||
rev, _ = subprocess.Popen(['git rev-parse HEAD'], stdout=subprocess.PIPE,\
|
||||
stderr=subprocess.PIPE, shell=True).communicate()
|
||||
revision = str(rev).replace("\\n'", '').replace("b'", '')
|
||||
|
||||
return revision
|
||||
|
||||
class Configuration:
|
||||
"""A simple class to hold all of the configuration constatns"""
|
||||
def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev):
|
||||
self.repo = repo
|
||||
self.drop_caches = drop_caches
|
||||
self.tests = tests
|
||||
self.testlogs = testlogs
|
||||
self.basebranch = basebranch
|
||||
self.baserev = baserev
|
||||
self.singletest = None
|
||||
self.revision = None
|
||||
self.branch = 'master' # Default branch
|
||||
|
||||
def additional_args(self, singletest, revision, branch):
|
||||
"""Additional configuration from command line arguments"""
|
||||
self.singletest = singletest
|
||||
if revision is not None:
|
||||
self.revision = revision
|
||||
if branch is not None:
|
||||
self.branch = branch
|
||||
|
||||
def set_revision(self, revision):
|
||||
"""Sets the current revision that is being tested"""
|
||||
self.revision = revision
|
||||
|
||||
|
||||
class Test:
|
||||
"""A simple class to contain all information about tests"""
|
||||
def __init__(self, name, command, ldopts, permutations):
|
||||
self.name = name
|
||||
self.command = command
|
||||
self.ldopts = ldopts.replace(' ', '').split(',') #Not tested yet
|
||||
self.permutations = permutations
|
||||
|
||||
def parse_configfile(conffile, testdir, moses_repo):
|
||||
"""Parses the config file"""
|
||||
command, ldopts = '', ''
|
||||
permutations = []
|
||||
fileopen = open(conffile, 'r')
|
||||
for line in fileopen:
|
||||
line = line.split('#')[0] # Discard comments
|
||||
if line == '' or line == '\n':
|
||||
continue # Discard lines with comments only and empty lines
|
||||
opt, args = line.split(' ', 1) # Get arguments
|
||||
|
||||
if opt == 'Command:':
|
||||
command = args.replace('\n', '')
|
||||
command = moses_repo + '/bin/' + command
|
||||
elif opt == 'LDPRE:':
|
||||
ldopts = args.replace('\n', '')
|
||||
elif opt == 'Variants:':
|
||||
permutations = args.replace('\n', '').replace(' ', '').split(',')
|
||||
else:
|
||||
raise ValueError('Unrecognized option ' + opt)
|
||||
#We use the testdir as the name.
|
||||
testcase = Test(testdir, command, ldopts, permutations)
|
||||
fileopen.close()
|
||||
return testcase
|
||||
|
||||
def parse_testconfig(conffile):
|
||||
"""Parses the config file for the whole testsuite."""
|
||||
repo_path, drop_caches, tests_dir, testlog_dir = '', '', '', ''
|
||||
basebranch, baserev = '', ''
|
||||
fileopen = open(conffile, 'r')
|
||||
for line in fileopen:
|
||||
line = line.split('#')[0] # Discard comments
|
||||
if line == '' or line == '\n':
|
||||
continue # Discard lines with comments only and empty lines
|
||||
opt, args = line.split(' ', 1) # Get arguments
|
||||
if opt == 'MOSES_REPO_PATH:':
|
||||
repo_path = args.replace('\n', '')
|
||||
elif opt == 'DROP_CACHES_COMM:':
|
||||
drop_caches = args.replace('\n', '')
|
||||
elif opt == 'TEST_DIR:':
|
||||
tests_dir = args.replace('\n', '')
|
||||
elif opt == 'TEST_LOG_DIR:':
|
||||
testlog_dir = args.replace('\n', '')
|
||||
elif opt == 'BASEBRANCH:':
|
||||
basebranch = args.replace('\n', '')
|
||||
elif opt == 'BASEREV:':
|
||||
baserev = args.replace('\n', '')
|
||||
else:
|
||||
raise ValueError('Unrecognized option ' + opt)
|
||||
config = Configuration(repo_path, drop_caches, tests_dir, testlog_dir,\
|
||||
basebranch, baserev)
|
||||
fileopen.close()
|
||||
return config
|
||||
|
||||
def get_config():
|
||||
"""Builds the config object with all necessary attributes"""
|
||||
args = parse_cmd()
|
||||
config = parse_testconfig(args.configfile)
|
||||
config.additional_args(args.singletestdir, args.revision, args.branch)
|
||||
revision = repoinit(config)
|
||||
config.set_revision(revision)
|
||||
return config
|
||||
|
||||
def check_for_basever(testlogfile, basebranch):
|
||||
"""Checks if the base revision is present in the testlogs"""
|
||||
filetoopen = open(testlogfile, 'r')
|
||||
for line in filetoopen:
|
||||
templine = processLogLine(line)
|
||||
if templine.branch == basebranch:
|
||||
return True
|
||||
return False
|
||||
|
||||
def split_time(filename):
|
||||
"""Splits the output of the time function into seperate parts.
|
||||
We will write time to file, because many programs output to
|
||||
stderr which makes it difficult to get only the exact results we need."""
|
||||
timefile = open(filename, 'r')
|
||||
realtime = float(timefile.readline().replace('\n', '').split()[1])
|
||||
usertime = float(timefile.readline().replace('\n', '').split()[1])
|
||||
systime = float(timefile.readline().replace('\n', '').split()[1])
|
||||
timefile.close()
|
||||
|
||||
return (realtime, usertime, systime)
|
||||
|
||||
|
||||
def write_log(time_file, logname, config):
|
||||
"""Writes to a logfile"""
|
||||
log_write = open(config.testlogs + '/' + logname, 'a') # Open logfile
|
||||
date_run = time.strftime("%d.%m.%Y %H:%M:%S") # Get the time of the test
|
||||
realtime, usertime, systime = split_time(time_file) # Get the times in a nice form
|
||||
|
||||
# Append everything to a log file.
|
||||
writestr = date_run + " " + config.revision + " Testname: " + logname +\
|
||||
" RealTime: " + str(realtime) + " UserTime: " + str(usertime) +\
|
||||
" SystemTime: " + str(systime) + " Branch: " + config.branch +'\n'
|
||||
log_write.write(writestr)
|
||||
log_write.close()
|
||||
|
||||
|
||||
def execute_tests(testcase, cur_directory, config):
|
||||
"""Executes timed tests based on the config file"""
|
||||
#Figure out the order of which tests must be executed.
|
||||
#Change to the current test directory
|
||||
os.chdir(config.tests + '/' + cur_directory)
|
||||
#Clear caches
|
||||
subprocess.call(['sync'], shell=True)
|
||||
subprocess.call([config.drop_caches], shell=True)
|
||||
#Perform vanilla test and if a cached test exists - as well
|
||||
print(testcase.name)
|
||||
if 'vanilla' in testcase.permutations:
|
||||
print(testcase.command)
|
||||
subprocess.Popen(['time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
|
||||
stderr=subprocess.PIPE, shell=True).communicate()
|
||||
write_log('/tmp/time_moses_tests', testcase.name + '_vanilla', config)
|
||||
if 'cached' in testcase.permutations:
|
||||
subprocess.Popen(['time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
|
||||
stderr=None, shell=True).communicate()
|
||||
write_log('/tmp/time_moses_tests', testcase.name + '_vanilla_cached', config)
|
||||
|
||||
#Now perform LD_PRELOAD tests
|
||||
if 'ldpre' in testcase.permutations:
|
||||
for opt in testcase.ldopts:
|
||||
#Clear caches
|
||||
subprocess.call(['sync'], shell=True)
|
||||
subprocess.call([config.drop_caches], shell=True)
|
||||
|
||||
#test
|
||||
subprocess.Popen(['LD_PRELOAD ' + opt + ' time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
|
||||
stderr=None, shell=True).communicate()
|
||||
write_log('/tmp/time_moses_tests', testcase.name + '_ldpre_' + opt, config)
|
||||
if 'cached' in testcase.permutations:
|
||||
subprocess.Popen(['LD_PRELOAD ' + opt + ' time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
|
||||
stderr=None, shell=True).communicate()
|
||||
write_log('/tmp/time_moses_tests', testcase.name + '_ldpre_' +opt +'_cached', config)
|
||||
|
||||
# Go through all the test directories and executes tests
|
||||
if __name__ == '__main__':
|
||||
CONFIG = get_config()
|
||||
ALL_DIR = os.listdir(CONFIG.tests)
|
||||
|
||||
#We should first check if any of the tests is run for the first time.
|
||||
#If some of them are run for the first time we should first get their
|
||||
#time with the base version (usually the previous release)
|
||||
FIRSTTIME = []
|
||||
TESTLOGS = []
|
||||
#Strip filenames of test underscores
|
||||
for listline in os.listdir(CONFIG.testlogs):
|
||||
listline = listline.replace('_vanilla', '')
|
||||
listline = listline.replace('_cached', '')
|
||||
listline = listline.replace('_ldpre', '')
|
||||
TESTLOGS.append(listline)
|
||||
for directory in ALL_DIR:
|
||||
if directory not in TESTLOGS:
|
||||
FIRSTTIME.append(directory)
|
||||
|
||||
#Sometimes even though we have the log files, we will need to rerun them
|
||||
#Against a base version, because we require a different baseversion (for
|
||||
#example when a new version of Moses is released.) Therefore we should
|
||||
#Check if the version of Moses that we have as a base version is in all
|
||||
#of the log files.
|
||||
|
||||
for logfile in os.listdir(CONFIG.testlogs):
|
||||
logfile_name = CONFIG.testlogs + '/' + logfile
|
||||
if not check_for_basever(logfile_name, CONFIG.basebranch):
|
||||
logfile = logfile.replace('_vanilla', '')
|
||||
logfile = logfile.replace('_cached', '')
|
||||
logfile = logfile.replace('_ldpre', '')
|
||||
FIRSTTIME.append(logfile)
|
||||
FIRSTTIME = list(set(FIRSTTIME)) #Deduplicate
|
||||
|
||||
if FIRSTTIME != []:
|
||||
#Create a new configuration for base version tests:
|
||||
BASECONFIG = Configuration(CONFIG.repo, CONFIG.drop_caches,\
|
||||
CONFIG.tests, CONFIG.testlogs, CONFIG.basebranch,\
|
||||
CONFIG.baserev)
|
||||
BASECONFIG.additional_args(None, CONFIG.baserev, CONFIG.basebranch)
|
||||
#Set up the repository and get its revision:
|
||||
REVISION = repoinit(BASECONFIG)
|
||||
BASECONFIG.set_revision(REVISION)
|
||||
#Build
|
||||
os.chdir(BASECONFIG.repo)
|
||||
subprocess.call(['./previous.sh'], shell=True)
|
||||
|
||||
#Perform tests
|
||||
for directory in FIRSTTIME:
|
||||
cur_testcase = parse_configfile(BASECONFIG.tests + '/' + directory +\
|
||||
'/config', directory, BASECONFIG.repo)
|
||||
execute_tests(cur_testcase, directory, BASECONFIG)
|
||||
|
||||
#Reset back the repository to the normal configuration
|
||||
repoinit(CONFIG)
|
||||
|
||||
#Builds moses
|
||||
os.chdir(CONFIG.repo)
|
||||
subprocess.call(['./previous.sh'], shell=True)
|
||||
|
||||
if CONFIG.singletest:
|
||||
TESTCASE = parse_configfile(CONFIG.tests + '/' +\
|
||||
CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo)
|
||||
execute_tests(TESTCASE, CONFIG.singletest, CONFIG)
|
||||
else:
|
||||
for directory in ALL_DIR:
|
||||
cur_testcase = parse_configfile(CONFIG.tests + '/' + directory +\
|
||||
'/config', directory, CONFIG.repo)
|
||||
execute_tests(cur_testcase, directory, CONFIG)
|
22
contrib/moses-speedtest/sys_drop_caches.py
Normal file
22
contrib/moses-speedtest/sys_drop_caches.py
Normal file
@ -0,0 +1,22 @@
|
||||
#!/usr/bin/spython
|
||||
from sys import argv, stderr, exit
|
||||
from os import linesep as ls
|
||||
procfile = "/proc/sys/vm/drop_caches"
|
||||
options = ["1","2","3"]
|
||||
flush_type = None
|
||||
try:
|
||||
flush_type = argv[1][0:1]
|
||||
if not flush_type in options:
|
||||
raise IndexError, "not in options"
|
||||
with open(procfile, "w") as f:
|
||||
f.write("%s%s" % (flush_type,ls))
|
||||
exit(0)
|
||||
except IndexError, e:
|
||||
stderr.write("Argument %s required.%s" % (options, ls))
|
||||
except IOError, e:
|
||||
stderr.write("Error writing to file.%s" % ls)
|
||||
except StandardError, e:
|
||||
stderr.write("Unknown Error.%s" % ls)
|
||||
|
||||
exit(1)
|
||||
|
3
contrib/moses-speedtest/test_config
Normal file
3
contrib/moses-speedtest/test_config
Normal file
@ -0,0 +1,3 @@
|
||||
Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
|
||||
LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/,
|
||||
Variants: vanilla, cached, ldpre #Can't have cached without ldpre or vanilla
|
54
contrib/moses-speedtest/testsuite_common.py
Normal file
54
contrib/moses-speedtest/testsuite_common.py
Normal file
@ -0,0 +1,54 @@
|
||||
"""Common functions of the testsuitce"""
|
||||
import os
|
||||
#Clour constants
|
||||
class bcolors:
|
||||
PURPLE = '\033[95m'
|
||||
BLUE = '\033[94m'
|
||||
GREEN = '\033[92m'
|
||||
YELLOW = '\033[93m'
|
||||
RED = '\033[91m'
|
||||
ENDC = '\033[0m'
|
||||
|
||||
class LogLine:
|
||||
"""A class to contain logfile line"""
|
||||
def __init__(self, date, time, revision, testname, real, user, system, branch):
|
||||
self.date = date
|
||||
self.time = time
|
||||
self.revision = revision
|
||||
self.testname = testname
|
||||
self.real = real
|
||||
self.system = system
|
||||
self.user = user
|
||||
self.branch = branch
|
||||
|
||||
class Result:
|
||||
"""A class to contain results of benchmarking"""
|
||||
def __init__(self, testname, previous, current, revision, branch, prevrev, prevbranch):
|
||||
self.testname = testname
|
||||
self.previous = previous
|
||||
self.current = current
|
||||
self.change = previous - current
|
||||
self.revision = revision
|
||||
self.branch = branch
|
||||
self.prevbranch = prevbranch
|
||||
self.prevrev = prevrev
|
||||
#Produce a percentage with fewer digits
|
||||
self.percentage = float(format(1 - current/previous, '.4f'))
|
||||
|
||||
def processLogLine(logline):
|
||||
"""Parses the log line into a nice datastructure"""
|
||||
logline = logline.split()
|
||||
log = LogLine(logline[0], logline[1], logline[2], logline[4],\
|
||||
float(logline[6]), float(logline[8]), float(logline[10]), logline[12])
|
||||
return log
|
||||
|
||||
def getLastTwoLines(filename, logdir):
|
||||
"""Just a call to tail to get the diff between the last two runs"""
|
||||
try:
|
||||
line1, line2 = os.popen("tail -n2 " + logdir + '/' + filename)
|
||||
except ValueError: #Check for new tests
|
||||
tempfile = open(logdir + '/' + filename)
|
||||
line1 = tempfile.readline()
|
||||
tempfile.close()
|
||||
return (line1, '\n')
|
||||
return (line1, line2)
|
5
contrib/moses-speedtest/testsuite_config
Normal file
5
contrib/moses-speedtest/testsuite_config
Normal file
@ -0,0 +1,5 @@
|
||||
MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder
|
||||
DROP_CACHES_COMM: sys_drop_caches 3
|
||||
TEST_DIR: /home/moses-speedtest/phrase_tables/tests
|
||||
TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
|
||||
BASEBRANCH: RELEASE-2.1.1
|
44
contrib/other-builds/CreateOnDiskPt/.project
Normal file
44
contrib/other-builds/CreateOnDiskPt/.project
Normal file
@ -0,0 +1,44 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>CreateOnDiskPt</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
<project>lm</project>
|
||||
<project>moses</project>
|
||||
<project>OnDiskPt</project>
|
||||
<project>search</project>
|
||||
<project>util</project>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
|
||||
<triggers>clean,full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
|
||||
<triggers>full,incremental,</triggers>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.cdt.core.cnature</nature>
|
||||
<nature>org.eclipse.cdt.core.ccnature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>Main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.h</locationURI>
|
||||
</link>
|
||||
</linkedResources>
|
||||
</projectDescription>
|
@ -1,146 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings>
|
||||
<externalSetting>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/OnDiskPt"/>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/OnDiskPt/Debug"/>
|
||||
<entry flags="RESOLVED" kind="libraryFile" name="OnDiskPt" srcPrefixMapping="" srcRootPath=""/>
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.725420545" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
|
||||
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1586272140" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/OnDiskPt/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1909553559" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.30521110" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.478334849" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1328561226" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.108239817" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1825070846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.901309550" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
|
||||
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.1052680347" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
|
||||
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
|
||||
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
|
||||
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
|
||||
<listOptionValue builtIn="false" value="WITH_THREADS"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.576529322" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.426851981" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1925590121" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.726316251" name="Main.h" rcbsApplicability="disable" resourcePath="Main.h" toolsToInvoke=""/>
|
||||
<sourceEntries>
|
||||
<entry excluding="Main.h|Main.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.701931933">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.701931933" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.701931933." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.5036266" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
|
||||
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.396818757" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/OnDiskPt/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1081186575" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.894082374" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.640159085" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1673993744" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.596082362" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.851420859" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.385722535" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.21058138" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1704184753" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.171488636" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.843129626" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1014721928" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="OnDiskPt.cdt.managedbuild.target.macosx.exe.542902806" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384;cdt.managedbuild.tool.gnu.c.compiler.input.1014721928">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634;cdt.managedbuild.tool.gnu.c.compiler.input.1925590121">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511;cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413;cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
</cproject>
|
@ -1,6 +1,6 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>extract-ordering</name>
|
||||
<name>consolidate</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
@ -46,19 +46,9 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SentenceAlignment.cpp</name>
|
||||
<name>consolidate-main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SentenceAlignment.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>extract-ordering-main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ordering-main.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/consolidate-main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>tables-core.cpp</name>
|
@ -1,138 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../phrase-extract""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/lib64""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_thread-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="bz2"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
@ -26,49 +26,19 @@
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>Alignment.cpp</name>
|
||||
<name>Hole.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Alignment.h</name>
|
||||
<name>HoleCollection.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>AlignmentGraph.cpp</name>
|
||||
<name>HoleCollection.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>AlignmentGraph.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ComposedRule.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ComposedRule.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Exception.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ExtractGHKM.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ExtractGHKM.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>InputFileStream.cpp</name>
|
||||
@ -80,31 +50,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Jamfile</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Node.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Node.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Options.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>OutputFileStream.cpp</name>
|
||||
<type>1</type>
|
||||
@ -116,54 +61,24 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ParseTree.cpp</name>
|
||||
<name>SentenceAlignment.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ParseTree.h</name>
|
||||
<name>SentenceAlignment.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ScfgRule.cpp</name>
|
||||
<name>SentenceAlignmentWithSyntax.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ScfgRule.h</name>
|
||||
<name>SentenceAlignmentWithSyntax.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ScfgRuleWriter.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>ScfgRuleWriter.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Span.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Span.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Subgraph.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Subgraph.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SyntaxTree.cpp</name>
|
||||
@ -186,14 +101,9 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>XmlTreeParser.cpp</name>
|
||||
<name>extract-rules-main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>XmlTreeParser.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>tables-core.cpp</name>
|
||||
|
@ -1,134 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.456080129" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.582801917" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-mixed-syntax/Debug}" id="cdt.managedbuild.builder.gnu.cross.1220166455" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.2055012191" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1768196213" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2007889843" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.855436310" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.506549229" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1497326561" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.606353571" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.740521305" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.libs.1946120010" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.paths.1563475751" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.106010037" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.136661991" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2112208574" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.172930211" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.715007893">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.715007893." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.99436307" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.801178939" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-mixed-syntax/Release}" id="cdt.managedbuild.builder.gnu.cross.1999547547" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1481537766" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1967527847" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.442342681" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1847950300" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.1130138972" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.158429528" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.2020667840" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1372779734" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.371006952" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1770045040" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.707592414" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="extract-mixed-syntax.cdt.managedbuild.target.gnu.cross.exe.1868010260" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038;cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906;cdt.managedbuild.tool.gnu.c.compiler.input.442342681">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915;cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568;cdt.managedbuild.tool.gnu.c.compiler.input.2007889843">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
189
contrib/other-builds/extract-mixed-syntax/AlignedSentence.cpp
Normal file
189
contrib/other-builds/extract-mixed-syntax/AlignedSentence.cpp
Normal file
@ -0,0 +1,189 @@
|
||||
/*
|
||||
* AlignedSentence.cpp
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "moses/Util.h"
|
||||
#include "AlignedSentence.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////////
|
||||
AlignedSentence::AlignedSentence(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment)
|
||||
:m_lineNum(lineNum)
|
||||
{
|
||||
PopulateWordVec(m_source, source);
|
||||
PopulateWordVec(m_target, target);
|
||||
PopulateAlignment(alignment);
|
||||
}
|
||||
|
||||
AlignedSentence::~AlignedSentence() {
|
||||
Moses::RemoveAllInColl(m_source);
|
||||
Moses::RemoveAllInColl(m_target);
|
||||
}
|
||||
|
||||
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line)
|
||||
{
|
||||
std::vector<string> toks;
|
||||
Moses::Tokenize(toks, line);
|
||||
|
||||
vec.resize(toks.size());
|
||||
for (size_t i = 0; i < vec.size(); ++i) {
|
||||
const string &tok = toks[i];
|
||||
Word *word = new Word(i, tok);
|
||||
vec[i] = word;
|
||||
}
|
||||
}
|
||||
|
||||
void AlignedSentence::PopulateAlignment(const std::string &line)
|
||||
{
|
||||
vector<string> alignStr;
|
||||
Moses::Tokenize(alignStr, line);
|
||||
|
||||
for (size_t i = 0; i < alignStr.size(); ++i) {
|
||||
vector<int> alignPair;
|
||||
Moses::Tokenize(alignPair, alignStr[i], "-");
|
||||
assert(alignPair.size() == 2);
|
||||
|
||||
int sourcePos = alignPair[0];
|
||||
int targetPos = alignPair[1];
|
||||
|
||||
if (sourcePos >= m_source.size()) {
|
||||
cerr << "ERROR1:AlignedSentence=" << Debug() << endl;
|
||||
cerr << "m_source=" << m_source.size() << endl;
|
||||
abort();
|
||||
}
|
||||
assert(sourcePos < m_source.size());
|
||||
assert(targetPos < m_target.size());
|
||||
Word *sourceWord = m_source[sourcePos];
|
||||
Word *targetWord = m_target[targetPos];
|
||||
|
||||
sourceWord->AddAlignment(targetWord);
|
||||
targetWord->AddAlignment(sourceWord);
|
||||
}
|
||||
}
|
||||
|
||||
std::string AlignedSentence::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
out << "m_lineNum:";
|
||||
out << m_lineNum;
|
||||
out << endl;
|
||||
|
||||
out << "m_source:";
|
||||
out << m_source.Debug();
|
||||
out << endl;
|
||||
|
||||
out << "m_target:";
|
||||
out << m_target.Debug();
|
||||
out << endl;
|
||||
|
||||
out << "consistent phrases:" << endl;
|
||||
out << m_consistentPhrases.Debug();
|
||||
out << endl;
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
std::vector<int> AlignedSentence::GetSourceAlignmentCount() const
|
||||
{
|
||||
vector<int> ret(m_source.size());
|
||||
|
||||
for (size_t i = 0; i < m_source.size(); ++i) {
|
||||
const Word &word = *m_source[i];
|
||||
ret[i] = word.GetAlignmentIndex().size();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void AlignedSentence::Create(const Parameter ¶ms)
|
||||
{
|
||||
CreateConsistentPhrases(params);
|
||||
m_consistentPhrases.AddHieroNonTerms(params);
|
||||
}
|
||||
|
||||
void AlignedSentence::CreateConsistentPhrases(const Parameter ¶ms)
|
||||
{
|
||||
int countT = m_target.size();
|
||||
int countS = m_source.size();
|
||||
|
||||
m_consistentPhrases.Initialize(countS);
|
||||
|
||||
// check alignments for target phrase startT...endT
|
||||
for(int lengthT=1;
|
||||
lengthT <= params.maxSpan && lengthT <= countT;
|
||||
lengthT++) {
|
||||
for(int startT=0; startT < countT-(lengthT-1); startT++) {
|
||||
|
||||
// that's nice to have
|
||||
int endT = startT + lengthT - 1;
|
||||
|
||||
// find find aligned source words
|
||||
// first: find minimum and maximum source word
|
||||
int minS = 9999;
|
||||
int maxS = -1;
|
||||
vector< int > usedS = GetSourceAlignmentCount();
|
||||
for(int ti=startT; ti<=endT; ti++) {
|
||||
const Word &word = *m_target[ti];
|
||||
const std::set<int> &alignment = word.GetAlignmentIndex();
|
||||
|
||||
std::set<int>::const_iterator iterAlign;
|
||||
for(iterAlign = alignment.begin(); iterAlign != alignment.end(); ++iterAlign) {
|
||||
int si = *iterAlign;
|
||||
if (si<minS) {
|
||||
minS = si;
|
||||
}
|
||||
if (si>maxS) {
|
||||
maxS = si;
|
||||
}
|
||||
usedS[ si ]--;
|
||||
}
|
||||
}
|
||||
|
||||
// unaligned phrases are not allowed
|
||||
if( maxS == -1 )
|
||||
continue;
|
||||
|
||||
// source phrase has to be within limits
|
||||
if( maxS-minS >= params.maxSpan )
|
||||
continue;
|
||||
|
||||
// check if source words are aligned to out of bound target words
|
||||
bool out_of_bounds = false;
|
||||
for(int si=minS; si<=maxS && !out_of_bounds; si++)
|
||||
if (usedS[si]>0) {
|
||||
out_of_bounds = true;
|
||||
}
|
||||
|
||||
// if out of bound, you gotta go
|
||||
if (out_of_bounds)
|
||||
continue;
|
||||
|
||||
// done with all the checks, lets go over all consistent phrase pairs
|
||||
// start point of source phrase may retreat over unaligned
|
||||
for(int startS=minS;
|
||||
(startS>=0 &&
|
||||
startS>maxS - params.maxSpan && // within length limit
|
||||
(startS==minS || m_source[startS]->GetAlignment().size()==0)); // unaligned
|
||||
startS--) {
|
||||
// end point of source phrase may advance over unaligned
|
||||
for(int endS=maxS;
|
||||
(endS<countS && endS<startS + params.maxSpan && // within length limit
|
||||
(endS==maxS || m_source[endS]->GetAlignment().size()==0)); // unaligned
|
||||
endS++) {
|
||||
|
||||
// take note that this is a valid phrase alignment
|
||||
m_consistentPhrases.Add(startS, endS, startT, endT, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
51
contrib/other-builds/extract-mixed-syntax/AlignedSentence.h
Normal file
51
contrib/other-builds/extract-mixed-syntax/AlignedSentence.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* AlignedSentence.h
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "Phrase.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
class Parameter;
|
||||
|
||||
class AlignedSentence {
|
||||
public:
|
||||
AlignedSentence(int lineNum)
|
||||
:m_lineNum(lineNum)
|
||||
{}
|
||||
|
||||
AlignedSentence(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment);
|
||||
virtual ~AlignedSentence();
|
||||
virtual void Create(const Parameter ¶ms);
|
||||
|
||||
const Phrase &GetPhrase(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? m_source : m_target; }
|
||||
|
||||
const ConsistentPhrases &GetConsistentPhrases() const
|
||||
{ return m_consistentPhrases; }
|
||||
|
||||
virtual std::string Debug() const;
|
||||
|
||||
int m_lineNum;
|
||||
protected:
|
||||
Phrase m_source, m_target;
|
||||
ConsistentPhrases m_consistentPhrases;
|
||||
|
||||
void CreateConsistentPhrases(const Parameter ¶ms);
|
||||
void PopulateWordVec(Phrase &vec, const std::string &line);
|
||||
|
||||
// m_source and m_target MUST be populated before calling this
|
||||
void PopulateAlignment(const std::string &line);
|
||||
std::vector<int> GetSourceAlignmentCount() const;
|
||||
};
|
||||
|
||||
|
@ -0,0 +1,183 @@
|
||||
/*
|
||||
* AlignedSentenceSyntax.cpp
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "AlignedSentenceSyntax.h"
|
||||
#include "Parameter.h"
|
||||
#include "pugixml.hpp"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
AlignedSentenceSyntax::AlignedSentenceSyntax(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment)
|
||||
:AlignedSentence(lineNum)
|
||||
,m_sourceStr(source)
|
||||
,m_targetStr(target)
|
||||
,m_alignmentStr(alignment)
|
||||
{
|
||||
}
|
||||
|
||||
AlignedSentenceSyntax::~AlignedSentenceSyntax() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const Parameter ¶ms,
|
||||
string line, Phrase &phrase, SyntaxTree &tree)
|
||||
{
|
||||
// parse source and target string
|
||||
if (isSyntax) {
|
||||
line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
|
||||
XMLParse(phrase, tree, line, params);
|
||||
|
||||
if (mixedSyntaxType != 0) {
|
||||
// mixed syntax. Always add [X] where there isn't 1
|
||||
tree.SetHieroLabel(params.hieroNonTerm);
|
||||
if (mixedSyntaxType == 2) {
|
||||
tree.AddToAll(params.hieroNonTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
PopulateWordVec(phrase, line);
|
||||
tree.SetHieroLabel(params.hieroNonTerm);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::Create(const Parameter ¶ms)
|
||||
{
|
||||
Populate(params.sourceSyntax, params.mixedSyntaxType, params, m_sourceStr,
|
||||
m_source, m_sourceTree);
|
||||
Populate(params.targetSyntax, params.mixedSyntaxType, params, m_targetStr,
|
||||
m_target, m_targetTree);
|
||||
|
||||
PopulateAlignment(m_alignmentStr);
|
||||
CreateConsistentPhrases(params);
|
||||
|
||||
// create labels
|
||||
CreateNonTerms();
|
||||
}
|
||||
|
||||
void Escape(string &text)
|
||||
{
|
||||
text = Moses::Replace(text, "&", "&");
|
||||
text = Moses::Replace(text, "|", "|");
|
||||
text = Moses::Replace(text, "<", "<");
|
||||
text = Moses::Replace(text, ">", ">");
|
||||
text = Moses::Replace(text, "'", "'");
|
||||
text = Moses::Replace(text, "\"", """);
|
||||
text = Moses::Replace(text, "[", "[");
|
||||
text = Moses::Replace(text, "]", "]");
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const pugi::xml_node &parentNode,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
int childNum = 0;
|
||||
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling())
|
||||
{
|
||||
string nodeName = childNode.name();
|
||||
|
||||
// span label
|
||||
string label;
|
||||
int startPos = output.size();
|
||||
|
||||
if (!nodeName.empty()) {
|
||||
pugi::xml_attribute attribute = childNode.attribute("label");
|
||||
label = attribute.as_string();
|
||||
|
||||
// recursively call this function. For proper recursive trees
|
||||
XMLParse(output, tree, childNode, params);
|
||||
}
|
||||
|
||||
|
||||
|
||||
// fill phrase vector
|
||||
string text = childNode.value();
|
||||
Escape(text);
|
||||
//cerr << childNum << " " << label << "=" << text << endl;
|
||||
|
||||
std::vector<string> toks;
|
||||
Moses::Tokenize(toks, text);
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
const string &tok = toks[i];
|
||||
Word *word = new Word(output.size(), tok);
|
||||
output.push_back(word);
|
||||
}
|
||||
|
||||
// is it a labelled span?
|
||||
int endPos = output.size() - 1;
|
||||
|
||||
// fill syntax labels
|
||||
if (!label.empty()) {
|
||||
label = "[" + label + "]";
|
||||
tree.Add(startPos, endPos, label, params);
|
||||
}
|
||||
|
||||
++childNum;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const std::string input,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
pugi::xml_document doc;
|
||||
pugi::xml_parse_result result = doc.load(input.c_str(),
|
||||
pugi::parse_default | pugi::parse_comments);
|
||||
|
||||
pugi::xml_node topNode = doc.child("xml");
|
||||
XMLParse(output, tree, topNode, params);
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::CreateNonTerms()
|
||||
{
|
||||
for (int sourceStart = 0; sourceStart < m_source.size(); ++sourceStart) {
|
||||
for (int sourceEnd = sourceStart; sourceEnd < m_source.size(); ++sourceEnd) {
|
||||
ConsistentPhrases::Coll &coll = m_consistentPhrases.GetColl(sourceStart, sourceEnd);
|
||||
const SyntaxTree::Labels &sourceLabels = m_sourceTree.Find(sourceStart, sourceEnd);
|
||||
|
||||
ConsistentPhrases::Coll::iterator iter;
|
||||
for (iter = coll.begin(); iter != coll.end(); ++iter) {
|
||||
ConsistentPhrase &cp = **iter;
|
||||
|
||||
int targetStart = cp.corners[2];
|
||||
int targetEnd = cp.corners[3];
|
||||
const SyntaxTree::Labels &targetLabels = m_targetTree.Find(targetStart, targetEnd);
|
||||
|
||||
CreateNonTerms(cp, sourceLabels, targetLabels);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void AlignedSentenceSyntax::CreateNonTerms(ConsistentPhrase &cp,
|
||||
const SyntaxTree::Labels &sourceLabels,
|
||||
const SyntaxTree::Labels &targetLabels)
|
||||
{
|
||||
SyntaxTree::Labels::const_iterator iterSource;
|
||||
for (iterSource = sourceLabels.begin(); iterSource != sourceLabels.end(); ++iterSource) {
|
||||
const string &sourceLabel = *iterSource;
|
||||
|
||||
SyntaxTree::Labels::const_iterator iterTarget;
|
||||
for (iterTarget = targetLabels.begin(); iterTarget != targetLabels.end(); ++iterTarget) {
|
||||
const string &targetLabel = *iterTarget;
|
||||
cp.AddNonTerms(sourceLabel, targetLabel);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* AlignedSentenceSyntax.h
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "AlignedSentence.h"
|
||||
#include "SyntaxTree.h"
|
||||
#include "pugixml.hpp"
|
||||
|
||||
class AlignedSentenceSyntax : public AlignedSentence
|
||||
{
|
||||
public:
|
||||
AlignedSentenceSyntax(int lineNum,
|
||||
const std::string &source,
|
||||
const std::string &target,
|
||||
const std::string &alignment);
|
||||
virtual ~AlignedSentenceSyntax();
|
||||
|
||||
void Create(const Parameter ¶ms);
|
||||
|
||||
//virtual std::string Debug() const;
|
||||
protected:
|
||||
std::string m_sourceStr, m_targetStr, m_alignmentStr;
|
||||
SyntaxTree m_sourceTree, m_targetTree;
|
||||
|
||||
void XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const std::string input,
|
||||
const Parameter ¶ms);
|
||||
void XMLParse(Phrase &output,
|
||||
SyntaxTree &tree,
|
||||
const pugi::xml_node &parentNode,
|
||||
const Parameter ¶ms);
|
||||
void CreateNonTerms();
|
||||
void CreateNonTerms(ConsistentPhrase &cp,
|
||||
const SyntaxTree::Labels &sourceLabels,
|
||||
const SyntaxTree::Labels &targetLabels);
|
||||
void Populate(bool isSyntax, int mixedSyntaxType, const Parameter ¶ms,
|
||||
std::string line, Phrase &phrase, SyntaxTree &tree);
|
||||
|
||||
};
|
||||
|
@ -0,0 +1,66 @@
|
||||
/*
|
||||
* ConsistentPhrase.cpp
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <sstream>
|
||||
#include "ConsistentPhrase.h"
|
||||
#include "Word.h"
|
||||
#include "NonTerm.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ConsistentPhrase::ConsistentPhrase(
|
||||
int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms)
|
||||
:corners(4)
|
||||
,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
|
||||
{
|
||||
corners[0] = sourceStart;
|
||||
corners[1] = sourceEnd;
|
||||
corners[2] = targetStart;
|
||||
corners[3] = targetEnd;
|
||||
}
|
||||
|
||||
ConsistentPhrase::~ConsistentPhrase() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
bool ConsistentPhrase::operator<(const ConsistentPhrase &other) const
|
||||
{
|
||||
return corners < other.corners;
|
||||
}
|
||||
|
||||
void ConsistentPhrase::AddNonTerms(const std::string &source,
|
||||
const std::string &target)
|
||||
{
|
||||
m_nonTerms.push_back(NonTerm(*this, source, target));
|
||||
}
|
||||
|
||||
bool ConsistentPhrase::TargetOverlap(const ConsistentPhrase &other) const
|
||||
{
|
||||
if ( other.corners[3] < corners[2] || other.corners[2] > corners[3])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::string ConsistentPhrase::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
out << "[" << corners[0] << "-" << corners[1]
|
||||
<< "][" << corners[2] << "-" << corners[3] << "]";
|
||||
|
||||
out << "NT:";
|
||||
for (size_t i = 0; i < m_nonTerms.size(); ++i) {
|
||||
const NonTerm &nonTerm = m_nonTerms[i];
|
||||
out << nonTerm.GetLabel(Moses::Input) << ":" << nonTerm.GetLabel(Moses::Output);
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
|
51
contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.h
Normal file
51
contrib/other-builds/extract-mixed-syntax/ConsistentPhrase.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* ConsistentPhrase.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "moses/TypeDef.h"
|
||||
#include "NonTerm.h"
|
||||
|
||||
class ConsistentPhrase
|
||||
{
|
||||
public:
|
||||
typedef std::vector<NonTerm> NonTerms;
|
||||
|
||||
std::vector<int> corners;
|
||||
|
||||
ConsistentPhrase(const ConsistentPhrase ©); // do not implement
|
||||
ConsistentPhrase(int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms);
|
||||
|
||||
virtual ~ConsistentPhrase();
|
||||
|
||||
int GetWidth(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1; }
|
||||
|
||||
|
||||
void AddNonTerms(const std::string &source,
|
||||
const std::string &target);
|
||||
const NonTerms &GetNonTerms() const
|
||||
{ return m_nonTerms;}
|
||||
const NonTerm &GetHieroNonTerm() const
|
||||
{ return m_hieroNonTerm;}
|
||||
|
||||
bool TargetOverlap(const ConsistentPhrase &other) const;
|
||||
|
||||
bool operator<(const ConsistentPhrase &other) const;
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
protected:
|
||||
NonTerms m_nonTerms;
|
||||
NonTerm m_hieroNonTerm;
|
||||
};
|
||||
|
103
contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.cpp
Normal file
103
contrib/other-builds/extract-mixed-syntax/ConsistentPhrases.cpp
Normal file
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* ConsistentPhrases.cpp
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "NonTerm.h"
|
||||
#include "Parameter.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
ConsistentPhrases::ConsistentPhrases()
|
||||
{
|
||||
}
|
||||
|
||||
ConsistentPhrases::~ConsistentPhrases() {
|
||||
for (int start = 0; start < m_coll.size(); ++start) {
|
||||
std::vector<Coll> &allSourceStart = m_coll[start];
|
||||
|
||||
for (int size = 0; size < allSourceStart.size(); ++size) {
|
||||
Coll &coll = allSourceStart[size];
|
||||
Moses::RemoveAllInColl(coll);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ConsistentPhrases::Initialize(size_t size)
|
||||
{
|
||||
m_coll.resize(size);
|
||||
|
||||
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
|
||||
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
|
||||
allSourceStart.resize(size - sourceStart);
|
||||
}
|
||||
}
|
||||
|
||||
void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
Coll &coll = m_coll[sourceStart][sourceEnd - sourceStart];
|
||||
ConsistentPhrase *cp = new ConsistentPhrase(sourceStart, sourceEnd,
|
||||
targetStart, targetEnd,
|
||||
params);
|
||||
|
||||
pair<Coll::iterator, bool> inserted = coll.insert(cp);
|
||||
assert(inserted.second);
|
||||
}
|
||||
|
||||
const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const
|
||||
{
|
||||
const std::vector<Coll> &allSourceStart = m_coll[sourceStart];
|
||||
const Coll &ret = allSourceStart[sourceEnd - sourceStart];
|
||||
return ret;
|
||||
}
|
||||
|
||||
ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd)
|
||||
{
|
||||
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
|
||||
Coll &ret = allSourceStart[sourceEnd - sourceStart];
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::string ConsistentPhrases::Debug() const
|
||||
{
|
||||
std::stringstream out;
|
||||
for (int start = 0; start < m_coll.size(); ++start) {
|
||||
const std::vector<Coll> &allSourceStart = m_coll[start];
|
||||
|
||||
for (int size = 0; size < allSourceStart.size(); ++size) {
|
||||
const Coll &coll = allSourceStart[size];
|
||||
|
||||
Coll::const_iterator iter;
|
||||
for (iter = coll.begin(); iter != coll.end(); ++iter) {
|
||||
const ConsistentPhrase &consistentPhrase = **iter;
|
||||
out << consistentPhrase.Debug() << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void ConsistentPhrases::AddHieroNonTerms(const Parameter ¶ms)
|
||||
{
|
||||
// add [X] labels everywhere
|
||||
for (int i = 0; i < m_coll.size(); ++i) {
|
||||
vector<Coll> &inner = m_coll[i];
|
||||
for (int j = 0; j < inner.size(); ++j) {
|
||||
ConsistentPhrases::Coll &coll = inner[j];
|
||||
ConsistentPhrases::Coll::iterator iter;
|
||||
for (iter = coll.begin(); iter != coll.end(); ++iter) {
|
||||
ConsistentPhrase &cp = **iter;
|
||||
cp.AddNonTerms(params.hieroNonTerm, params.hieroNonTerm);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* ConsistentPhrases.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "ConsistentPhrase.h"
|
||||
|
||||
class Word;
|
||||
class Parameter;
|
||||
|
||||
class ConsistentPhrases {
|
||||
public:
|
||||
typedef std::set<ConsistentPhrase*> Coll;
|
||||
|
||||
ConsistentPhrases();
|
||||
virtual ~ConsistentPhrases();
|
||||
|
||||
void Initialize(size_t size);
|
||||
|
||||
void Add(int sourceStart, int sourceEnd,
|
||||
int targetStart, int targetEnd,
|
||||
const Parameter ¶ms);
|
||||
|
||||
void AddHieroNonTerms(const Parameter ¶ms);
|
||||
|
||||
const Coll &GetColl(int sourceStart, int sourceEnd) const;
|
||||
Coll &GetColl(int sourceStart, int sourceEnd);
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
protected:
|
||||
std::vector< std::vector<Coll> > m_coll;
|
||||
};
|
||||
|
@ -1,37 +0,0 @@
|
||||
/*
|
||||
* Global.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 01/02/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "Global.h"
|
||||
|
||||
bool g_debug = false;
|
||||
|
||||
Global::Global()
|
||||
: minHoleSpanSourceDefault(2)
|
||||
, maxHoleSpanSourceDefault(7)
|
||||
, minHoleSpanSourceSyntax(1)
|
||||
, maxHoleSpanSourceSyntax(1000)
|
||||
, maxUnaligned(5)
|
||||
|
||||
, maxSymbols(5)
|
||||
, maxNonTerm(3)
|
||||
, maxNonTermDefault(2)
|
||||
|
||||
// int minHoleSize(1)
|
||||
// int minSubPhraseSize(1) // minimum size of a remaining lexical phrase
|
||||
, glueGrammarFlag(false)
|
||||
, unknownWordLabelFlag(false)
|
||||
//bool zipFiles(false)
|
||||
, sourceSyntax(true)
|
||||
, targetSyntax(false)
|
||||
, mixed(true)
|
||||
, uppermostOnly(true)
|
||||
, allowDefaultNonTermEdge(true)
|
||||
, gzOutput(false)
|
||||
|
||||
{}
|
@ -1,45 +0,0 @@
|
||||
#pragma once
|
||||
/*
|
||||
* Global.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 01/02/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
class Global
|
||||
{
|
||||
public:
|
||||
int minHoleSpanSourceDefault;
|
||||
int maxHoleSpanSourceDefault;
|
||||
int minHoleSpanSourceSyntax;
|
||||
int maxHoleSpanSourceSyntax;
|
||||
|
||||
int maxSymbols;
|
||||
bool glueGrammarFlag;
|
||||
bool unknownWordLabelFlag;
|
||||
int maxNonTerm;
|
||||
int maxNonTermDefault;
|
||||
bool sourceSyntax;
|
||||
bool targetSyntax;
|
||||
bool mixed;
|
||||
int maxUnaligned;
|
||||
bool uppermostOnly;
|
||||
bool allowDefaultNonTermEdge;
|
||||
bool gzOutput;
|
||||
|
||||
Global();
|
||||
|
||||
Global(const Global&);
|
||||
|
||||
};
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
#define DEBUG_OUTPUT() void DebugOutput() const;
|
||||
|
||||
|
@ -1,180 +0,0 @@
|
||||
/*
|
||||
* Lattice.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 18/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include "Lattice.h"
|
||||
#include "LatticeNode.h"
|
||||
#include "Tunnel.h"
|
||||
#include "TunnelCollection.h"
|
||||
#include "SyntaxTree.h"
|
||||
#include "SentenceAlignment.h"
|
||||
#include "tables-core.h"
|
||||
#include "Rule.h"
|
||||
#include "RuleCollection.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Lattice::Lattice(size_t sourceSize)
|
||||
:m_stacks(sourceSize + 1)
|
||||
{
|
||||
}
|
||||
|
||||
Lattice::~Lattice()
|
||||
{
|
||||
std::vector<Stack>::iterator iterStack;
|
||||
for (iterStack = m_stacks.begin(); iterStack != m_stacks.end(); ++iterStack)
|
||||
{
|
||||
Stack &stack = *iterStack;
|
||||
RemoveAllInColl(stack);
|
||||
}
|
||||
}
|
||||
|
||||
void Lattice::CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global)
|
||||
{
|
||||
// term
|
||||
Stack &startStack = GetStack(startPos);
|
||||
|
||||
LatticeNode *node = new LatticeNode(startPos, &sentence);
|
||||
startStack.push_back(node);
|
||||
|
||||
// non-term
|
||||
for (size_t endPos = startPos + 1; endPos <= sentence.source.size(); ++endPos)
|
||||
{
|
||||
const TunnelList &tunnels = tunnelColl.GetTunnels(startPos, endPos - 1);
|
||||
|
||||
TunnelList::const_iterator iterHole;
|
||||
for (iterHole = tunnels.begin(); iterHole != tunnels.end(); ++iterHole)
|
||||
{
|
||||
const Tunnel &tunnel = *iterHole;
|
||||
CreateArcsUsing1Hole(tunnel, sentence, global);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Lattice::CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global)
|
||||
{
|
||||
size_t startPos = tunnel.GetRange(0).GetStartPos()
|
||||
, endPos = tunnel.GetRange(0).GetEndPos();
|
||||
size_t numSymbols = tunnel.GetRange(0).GetWidth();
|
||||
assert(numSymbols > 0);
|
||||
|
||||
Stack &startStack = GetStack(startPos);
|
||||
|
||||
|
||||
// non-terms. cartesian product of source & target labels
|
||||
assert(startPos == tunnel.GetRange(0).GetStartPos() && endPos == tunnel.GetRange(0).GetEndPos());
|
||||
size_t startT = tunnel.GetRange(1).GetStartPos()
|
||||
,endT = tunnel.GetRange(1).GetEndPos();
|
||||
|
||||
const SyntaxNodes &nodesS = sentence.sourceTree.GetNodes(startPos, endPos);
|
||||
const SyntaxNodes &nodesT = sentence.targetTree.GetNodes(startT, endT );
|
||||
|
||||
SyntaxNodes::const_iterator iterS, iterT;
|
||||
for (iterS = nodesS.begin(); iterS != nodesS.end(); ++iterS)
|
||||
{
|
||||
const SyntaxNode *syntaxNodeS = *iterS;
|
||||
|
||||
for (iterT = nodesT.begin(); iterT != nodesT.end(); ++iterT)
|
||||
{
|
||||
const SyntaxNode *syntaxNodeT = *iterT;
|
||||
|
||||
bool isSyntax = syntaxNodeS->IsSyntax() || syntaxNodeT->IsSyntax();
|
||||
size_t maxSourceNonTermSpan = isSyntax ? global.maxHoleSpanSourceSyntax : global.maxHoleSpanSourceDefault;
|
||||
|
||||
if (maxSourceNonTermSpan >= endPos - startPos)
|
||||
{
|
||||
LatticeNode *node = new LatticeNode(tunnel, syntaxNodeS, syntaxNodeT);
|
||||
startStack.push_back(node);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Stack &Lattice::GetStack(size_t startPos)
|
||||
{
|
||||
assert(startPos < m_stacks.size());
|
||||
return m_stacks[startPos];
|
||||
}
|
||||
|
||||
const Stack &Lattice::GetStack(size_t startPos) const
|
||||
{
|
||||
assert(startPos < m_stacks.size());
|
||||
return m_stacks[startPos];
|
||||
}
|
||||
|
||||
void Lattice::CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global)
|
||||
{
|
||||
const Stack &startStack = GetStack(startPos);
|
||||
|
||||
Stack::const_iterator iterStack;
|
||||
for (iterStack = startStack.begin(); iterStack != startStack.end(); ++iterStack)
|
||||
{
|
||||
const LatticeNode *node = *iterStack;
|
||||
Rule *initRule = new Rule(node);
|
||||
|
||||
if (initRule->CanRecurse(global, sentence.GetTunnelCollection()))
|
||||
{ // may or maynot be valid, but can continue to build on this rule
|
||||
initRule->CreateRules(m_rules, *this, sentence, global);
|
||||
}
|
||||
|
||||
if (initRule->IsValid(global, sentence.GetTunnelCollection()))
|
||||
{ // add to rule collection
|
||||
m_rules.Add(global, initRule, sentence);
|
||||
}
|
||||
else
|
||||
{
|
||||
delete initRule;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
Stack Lattice::GetNonTermNode(const Range &sourceRange) const
|
||||
{
|
||||
Stack ret;
|
||||
size_t sourcePos = sourceRange.GetStartPos();
|
||||
|
||||
const Stack &origStack = GetStack(sourcePos);
|
||||
Stack::const_iterator iter;
|
||||
for (iter = origStack.begin(); iter != origStack.end(); ++iter)
|
||||
{
|
||||
LatticeNode *node = *iter;
|
||||
const Range &nodeRangeS = node->GetSourceRange();
|
||||
|
||||
assert(nodeRangeS.GetStartPos() == sourceRange.GetStartPos());
|
||||
|
||||
if (! node->IsTerminal() && nodeRangeS.GetEndPos() == sourceRange.GetEndPos())
|
||||
{
|
||||
ret.push_back(node);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const Lattice &obj)
|
||||
{
|
||||
std::vector<Stack>::const_iterator iter;
|
||||
for (iter = obj.m_stacks.begin(); iter != obj.m_stacks.end(); ++iter)
|
||||
{
|
||||
const Stack &stack = *iter;
|
||||
|
||||
Stack::const_iterator iterStack;
|
||||
for (iterStack = stack.begin(); iterStack != stack.end(); ++iterStack)
|
||||
{
|
||||
const LatticeNode &node = **iterStack;
|
||||
out << node << " ";
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -1,47 +0,0 @@
|
||||
#pragma once
|
||||
/*
|
||||
* Lattice.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 18/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "RuleCollection.h"
|
||||
|
||||
class Global;
|
||||
class LatticeNode;
|
||||
class Tunnel;
|
||||
class TunnelCollection;
|
||||
class SentenceAlignment;
|
||||
|
||||
typedef std::vector<LatticeNode*> Stack;
|
||||
|
||||
class Lattice
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const Lattice&);
|
||||
|
||||
std::vector<Stack> m_stacks;
|
||||
RuleCollection m_rules;
|
||||
|
||||
Stack &GetStack(size_t endPos);
|
||||
|
||||
void CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global);
|
||||
|
||||
public:
|
||||
Lattice(size_t sourceSize);
|
||||
~Lattice();
|
||||
|
||||
void CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global);
|
||||
void CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global);
|
||||
|
||||
const Stack &GetStack(size_t startPos) const;
|
||||
const RuleCollection &GetRules() const
|
||||
{ return m_rules; }
|
||||
|
||||
Stack GetNonTermNode(const Range &sourceRange) const;
|
||||
|
||||
};
|
||||
|
@ -1,149 +0,0 @@
|
||||
/*
|
||||
* LatticeNode.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 18/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <sstream>
|
||||
#include "LatticeNode.h"
|
||||
#include "SyntaxTree.h"
|
||||
#include "Tunnel.h"
|
||||
#include "SentenceAlignment.h"
|
||||
#include "SymbolSequence.h"
|
||||
|
||||
size_t LatticeNode::s_count = 0;
|
||||
|
||||
using namespace std;
|
||||
|
||||
// for terms
|
||||
LatticeNode::LatticeNode(size_t pos, const SentenceAlignment *sentence)
|
||||
:m_tunnel(NULL)
|
||||
,m_isTerminal(true)
|
||||
,m_sourceTreeNode(NULL)
|
||||
,m_targetTreeNode(NULL)
|
||||
,m_sentence(sentence)
|
||||
,m_sourceRange(pos, pos)
|
||||
{
|
||||
s_count++;
|
||||
//cerr << *this << endl;
|
||||
}
|
||||
|
||||
// for non-terms
|
||||
LatticeNode::LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode)
|
||||
:m_tunnel(&tunnel)
|
||||
,m_isTerminal(false)
|
||||
,m_sourceTreeNode(sourceTreeNode)
|
||||
,m_targetTreeNode(targetTreeNode)
|
||||
,m_sentence(NULL)
|
||||
,m_sourceRange(tunnel.GetRange(0))
|
||||
{
|
||||
s_count++;
|
||||
//cerr << *this << endl;
|
||||
}
|
||||
|
||||
bool LatticeNode::IsSyntax() const
|
||||
{
|
||||
assert(!m_isTerminal);
|
||||
bool ret = m_sourceTreeNode->IsSyntax() || m_targetTreeNode->IsSyntax();
|
||||
return ret;
|
||||
}
|
||||
|
||||
size_t LatticeNode::GetNumSymbols(size_t direction) const
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
int LatticeNode::Compare(const LatticeNode &otherNode) const
|
||||
{
|
||||
int ret = 0;
|
||||
if (m_isTerminal != otherNode.m_isTerminal)
|
||||
{
|
||||
ret = m_isTerminal ? -1 : 1;
|
||||
}
|
||||
|
||||
// both term or non-term
|
||||
else if (m_isTerminal)
|
||||
{ // term. compare source span
|
||||
if (m_sourceRange.GetStartPos() == otherNode.m_sourceRange.GetStartPos())
|
||||
ret = 0;
|
||||
else
|
||||
ret = (m_sourceRange.GetStartPos() < otherNode.m_sourceRange.GetStartPos()) ? -1 : +1;
|
||||
}
|
||||
else
|
||||
{ // non-term. compare source span and BOTH label
|
||||
assert(!m_isTerminal);
|
||||
assert(!otherNode.m_isTerminal);
|
||||
|
||||
if (m_sourceTreeNode->IsSyntax())
|
||||
{
|
||||
ret = m_tunnel->Compare(*otherNode.m_tunnel, 0);
|
||||
if (ret == 0 && m_sourceTreeNode->GetLabel() != otherNode.m_sourceTreeNode->GetLabel())
|
||||
{
|
||||
ret = (m_sourceTreeNode->GetLabel() < otherNode.m_sourceTreeNode->GetLabel()) ? -1 : +1;
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == 0 && m_targetTreeNode->IsSyntax())
|
||||
{
|
||||
ret = m_tunnel->Compare(*otherNode.m_tunnel, 1);
|
||||
if (ret == 0 && m_targetTreeNode->GetLabel() != otherNode.m_targetTreeNode->GetLabel())
|
||||
{
|
||||
ret = (m_targetTreeNode->GetLabel() < otherNode.m_targetTreeNode->GetLabel()) ? -1 : +1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void LatticeNode::CreateSymbols(size_t direction, SymbolSequence &symbols) const
|
||||
{
|
||||
if (m_isTerminal)
|
||||
{
|
||||
/*
|
||||
const std::vector<std::string> &words = (direction == 0 ? m_sentence->source : m_sentence->target);
|
||||
size_t startPos = m_tunnel.GetStart(direction)
|
||||
,endPos = m_tunnel.GetEnd(direction);
|
||||
|
||||
for (size_t pos = startPos; pos <= endPos; ++pos)
|
||||
{
|
||||
Symbol symbol(words[pos], pos);
|
||||
symbols.Add(symbol);
|
||||
}
|
||||
*/
|
||||
}
|
||||
else
|
||||
{ // output both
|
||||
|
||||
Symbol symbol(m_sourceTreeNode->GetLabel(), m_targetTreeNode->GetLabel()
|
||||
, m_tunnel->GetRange(0).GetStartPos(), m_tunnel->GetRange(0).GetEndPos()
|
||||
, m_tunnel->GetRange(1).GetStartPos(), m_tunnel->GetRange(1).GetEndPos()
|
||||
, m_sourceTreeNode->IsSyntax(), m_targetTreeNode->IsSyntax());
|
||||
|
||||
symbols.Add(symbol);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const LatticeNode &obj)
|
||||
{
|
||||
if (obj.m_isTerminal)
|
||||
{
|
||||
assert(obj.m_sourceRange.GetWidth() == 1);
|
||||
size_t pos = obj.m_sourceRange.GetStartPos();
|
||||
|
||||
const SentenceAlignment &sentence = *obj.m_sentence;
|
||||
out << obj.m_sourceRange << "=" << sentence.source[pos];
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(obj.m_tunnel);
|
||||
out << obj.GetTunnel() << "=" << obj.m_sourceTreeNode->GetLabel() << obj.m_targetTreeNode->GetLabel() << " ";
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -1,77 +0,0 @@
|
||||
#pragma once
|
||||
/*
|
||||
* LatticeNode.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 18/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include "Range.h"
|
||||
|
||||
class Tunnel;
|
||||
class SyntaxNode;
|
||||
class SentenceAlignment;
|
||||
class SymbolSequence;
|
||||
|
||||
class LatticeNode
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const LatticeNode&);
|
||||
|
||||
bool m_isTerminal;
|
||||
|
||||
// for terms & non-term
|
||||
Range m_sourceRange;
|
||||
|
||||
// non-terms. source range should be same as m_sourceRange
|
||||
const Tunnel *m_tunnel;
|
||||
|
||||
public:
|
||||
static size_t s_count;
|
||||
|
||||
|
||||
|
||||
const SyntaxNode *m_sourceTreeNode, *m_targetTreeNode;
|
||||
const SentenceAlignment *m_sentence;
|
||||
|
||||
// for terms
|
||||
LatticeNode(size_t pos, const SentenceAlignment *sentence);
|
||||
|
||||
// for non-terms
|
||||
LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode);
|
||||
|
||||
bool IsTerminal() const
|
||||
{ return m_isTerminal; }
|
||||
|
||||
bool IsSyntax() const;
|
||||
|
||||
size_t GetNumSymbols(size_t direction) const;
|
||||
|
||||
std::string ToString() const;
|
||||
|
||||
int Compare(const LatticeNode &otherNode) const;
|
||||
|
||||
void CreateSymbols(size_t direction, SymbolSequence &symbols) const;
|
||||
|
||||
const Tunnel &GetTunnel() const
|
||||
{
|
||||
assert(m_tunnel);
|
||||
return *m_tunnel;
|
||||
}
|
||||
|
||||
const Range &GetSourceRange() const
|
||||
{
|
||||
return m_sourceRange;
|
||||
}
|
||||
const SyntaxNode &GetSyntaxNode(size_t direction) const
|
||||
{
|
||||
const SyntaxNode *node = direction == 0 ? m_sourceTreeNode : m_targetTreeNode;
|
||||
assert(node);
|
||||
return *node;
|
||||
}
|
||||
|
||||
};
|
||||
|
174
contrib/other-builds/extract-mixed-syntax/Main.cpp
Normal file
174
contrib/other-builds/extract-mixed-syntax/Main.cpp
Normal file
@ -0,0 +1,174 @@
|
||||
#include <iostream>
|
||||
#include <cstdlib>
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
#include "Main.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "OutputFileStream.h"
|
||||
#include "AlignedSentence.h"
|
||||
#include "AlignedSentenceSyntax.h"
|
||||
#include "Parameter.h"
|
||||
#include "Rules.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool g_debug = false;
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
cerr << "Starting" << endl;
|
||||
|
||||
Parameter params;
|
||||
|
||||
namespace po = boost::program_options;
|
||||
po::options_description desc("Options");
|
||||
desc.add_options()
|
||||
("help", "Print help messages")
|
||||
("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
|
||||
("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
|
||||
("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
|
||||
("GZOutput", "Compress extract files")
|
||||
("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
|
||||
("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
|
||||
("MinHoleSource", po::value<int>()->default_value(params.minHoleSource), "Minimum source span for a non-term.")
|
||||
|
||||
("SourceSyntax", "Source sentence is a parse tree")
|
||||
("TargetSyntax", "Target sentence is a parse tree")
|
||||
("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere")
|
||||
("MultiLabel", po::value<int>()->default_value(params.multiLabel), "What to do with multiple labels on the same span. 0(default)=keep them all, 1=keep only top-most, 2=keep only bottom-most")
|
||||
("HieroSourceLHS", "Always use Hiero source LHS? Default = 0")
|
||||
("MaxSpanFreeNonTermSource", po::value<int>()->default_value(params.maxSpanFreeNonTermSource), "Max number of words covered by beginning/end NT. Default = 0 (no limit)")
|
||||
("NoNieceTerminal", "Don't extract rule if 1 of the non-term covers the same word as 1 of the terminals")
|
||||
("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
|
||||
("SpanLength", "Property - span length of RHS each non-term")
|
||||
("NonTermContext", "Property - left and right, inside and outside words of each non-term");
|
||||
|
||||
po::variables_map vm;
|
||||
try
|
||||
{
|
||||
po::store(po::parse_command_line(argc, argv, desc),
|
||||
vm); // can throw
|
||||
|
||||
/** --help option
|
||||
*/
|
||||
if ( vm.count("help") || argc < 5 )
|
||||
{
|
||||
std::cout << argv[0] << " target source alignment [options...]" << std::endl
|
||||
<< desc << std::endl;
|
||||
return EXIT_SUCCESS;
|
||||
}
|
||||
|
||||
po::notify(vm); // throws on error, so do after help in case
|
||||
// there are any problems
|
||||
}
|
||||
catch(po::error& e)
|
||||
{
|
||||
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
|
||||
std::cerr << desc << std::endl;
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (vm.count("MaxSpan")) params.maxSpan = vm["MaxSpan"].as<int>();
|
||||
if (vm.count("GZOutput")) params.gzOutput = true;
|
||||
if (vm.count("GlueGrammar")) params.gluePath = vm["GlueGrammar"].as<string>();
|
||||
if (vm.count("SentenceOffset")) params.sentenceOffset = vm["SentenceOffset"].as<long>();
|
||||
if (vm.count("MaxNonTerm")) params.maxNonTerm = vm["MaxNonTerm"].as<int>();
|
||||
if (vm.count("MaxHieroNonTerm")) params.maxHieroNonTerm = vm["MaxHieroNonTerm"].as<int>();
|
||||
if (vm.count("MinHoleSource")) params.minHoleSource = vm["MinHoleSource"].as<int>();
|
||||
|
||||
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
|
||||
if (vm.count("TargetSyntax")) params.targetSyntax = true;
|
||||
if (vm.count("MixedSyntaxType")) params.mixedSyntaxType = vm["MixedSyntaxType"].as<int>();
|
||||
if (vm.count("MultiLabel")) params.multiLabel = vm["MultiLabel"].as<int>();
|
||||
if (vm.count("HieroSourceLHS")) params.hieroSourceLHS = true;
|
||||
if (vm.count("MaxSpanFreeNonTermSource")) params.maxSpanFreeNonTermSource = vm["MaxSpanFreeNonTermSource"].as<int>();
|
||||
if (vm.count("NoNieceTerminal")) params.nieceTerminal = false;
|
||||
if (vm.count("MaxScope")) params.maxScope = vm["MaxScope"].as<int>();
|
||||
|
||||
// properties
|
||||
if (vm.count("SpanLength")) params.spanLength = true;
|
||||
if (vm.count("NonTermContext")) params.nonTermContext = true;
|
||||
|
||||
// input files;
|
||||
string pathTarget = argv[1];
|
||||
string pathSource = argv[2];
|
||||
string pathAlignment = argv[3];
|
||||
|
||||
string pathExtract = argv[4];
|
||||
string pathExtractInv = pathExtract + ".inv";
|
||||
if (params.gzOutput) {
|
||||
pathExtract += ".gz";
|
||||
pathExtractInv += ".gz";
|
||||
}
|
||||
|
||||
Moses::InputFileStream strmTarget(pathTarget);
|
||||
Moses::InputFileStream strmSource(pathSource);
|
||||
Moses::InputFileStream strmAlignment(pathAlignment);
|
||||
Moses::OutputFileStream extractFile(pathExtract);
|
||||
Moses::OutputFileStream extractInvFile(pathExtractInv);
|
||||
|
||||
|
||||
// MAIN LOOP
|
||||
int lineNum = 1;
|
||||
string lineTarget, lineSource, lineAlignment;
|
||||
while (getline(strmTarget, lineTarget)) {
|
||||
if (lineNum % 10000 == 0) {
|
||||
cerr << lineNum << " ";
|
||||
}
|
||||
|
||||
bool success;
|
||||
success = getline(strmSource, lineSource);
|
||||
if (!success) {
|
||||
throw "Couldn't read source";
|
||||
}
|
||||
success = getline(strmAlignment, lineAlignment);
|
||||
if (!success) {
|
||||
throw "Couldn't read alignment";
|
||||
}
|
||||
|
||||
/*
|
||||
cerr << "lineTarget=" << lineTarget << endl;
|
||||
cerr << "lineSource=" << lineSource << endl;
|
||||
cerr << "lineAlignment=" << lineAlignment << endl;
|
||||
*/
|
||||
|
||||
AlignedSentence *alignedSentence;
|
||||
|
||||
if (params.sourceSyntax || params.targetSyntax) {
|
||||
alignedSentence = new AlignedSentenceSyntax(lineNum, lineSource, lineTarget, lineAlignment);
|
||||
}
|
||||
else {
|
||||
alignedSentence = new AlignedSentence(lineNum, lineSource, lineTarget, lineAlignment);
|
||||
}
|
||||
|
||||
alignedSentence->Create(params);
|
||||
//cerr << alignedSentence->Debug();
|
||||
|
||||
Rules rules(*alignedSentence);
|
||||
rules.Extend(params);
|
||||
rules.Consolidate(params);
|
||||
//cerr << rules.Debug();
|
||||
|
||||
rules.Output(extractFile, true, params);
|
||||
rules.Output(extractInvFile, false, params);
|
||||
|
||||
delete alignedSentence;
|
||||
|
||||
++lineNum;
|
||||
}
|
||||
|
||||
if (!params.gluePath.empty()) {
|
||||
Moses::OutputFileStream glueFile(params.gluePath);
|
||||
CreateGlueGrammar(glueFile);
|
||||
}
|
||||
|
||||
cerr << "Finished" << endl;
|
||||
}
|
||||
|
||||
void CreateGlueGrammar(Moses::OutputFileStream &glueFile)
|
||||
{
|
||||
glueFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
|
||||
<< "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
|
||||
<< "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
|
||||
|
||||
}
|
12
contrib/other-builds/extract-mixed-syntax/Main.h
Normal file
12
contrib/other-builds/extract-mixed-syntax/Main.h
Normal file
@ -0,0 +1,12 @@
|
||||
/*
|
||||
* Main.h
|
||||
*
|
||||
* Created on: 28 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "OutputFileStream.h"
|
||||
|
||||
void CreateGlueGrammar(Moses::OutputFileStream &glueFile);
|
||||
|
@ -1,13 +1,17 @@
|
||||
all: extract
|
||||
all: extract-mixed-syntax
|
||||
|
||||
clean:
|
||||
rm -f *.o extract-mixed-syntax
|
||||
|
||||
.cpp.o:
|
||||
g++ -O6 -g -c $<
|
||||
g++ -O4 -g -c -I../../../boost/include -I../../../ $<
|
||||
|
||||
extract: tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o
|
||||
OBJECTS = AlignedSentence.o ConsistentPhrase.o ConsistentPhrases.o InputFileStream.o \
|
||||
Main.o OutputFileStream.o Parameter.o Phrase.o Rule.o Rules.o RuleSymbol.o \
|
||||
SyntaxTree.o Word.o NonTerm.o RulePhrase.o AlignedSentenceSyntax.o pugixml.o
|
||||
|
||||
g++ tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o -lz -lboost_iostreams-mt -o extract-mixed-syntax
|
||||
extract-mixed-syntax: $(OBJECTS)
|
||||
|
||||
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_iostreams-mt -lboost_program_options-mt -o extract-mixed-syntax
|
||||
|
||||
|
||||
|
65
contrib/other-builds/extract-mixed-syntax/NonTerm.cpp
Normal file
65
contrib/other-builds/extract-mixed-syntax/NonTerm.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
/*
|
||||
* NonTerm.cpp
|
||||
*
|
||||
* Created on: 22 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "NonTerm.h"
|
||||
#include "Word.h"
|
||||
#include "ConsistentPhrase.h"
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
NonTerm::NonTerm(const ConsistentPhrase &consistentPhrase,
|
||||
const std::string &source,
|
||||
const std::string &target)
|
||||
:m_consistentPhrase(&consistentPhrase)
|
||||
,m_source(source)
|
||||
,m_target(target)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
NonTerm::~NonTerm() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
std::string NonTerm::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
out << m_source << m_target;
|
||||
out << m_consistentPhrase->Debug();
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void NonTerm::Output(std::ostream &out) const
|
||||
{
|
||||
out << m_source << m_target;
|
||||
}
|
||||
|
||||
void NonTerm::Output(std::ostream &out, Moses::FactorDirection direction) const
|
||||
{
|
||||
out << GetLabel(direction);
|
||||
}
|
||||
|
||||
const std::string &NonTerm::GetLabel(Moses::FactorDirection direction) const
|
||||
{
|
||||
return (direction == Moses::Input) ? m_source : m_target;
|
||||
}
|
||||
|
||||
bool NonTerm::IsHiero(Moses::FactorDirection direction, const Parameter ¶ms) const
|
||||
{
|
||||
const std::string &label = NonTerm::GetLabel(direction);
|
||||
return label == params.hieroNonTerm;
|
||||
}
|
||||
|
||||
bool NonTerm::IsHiero(const Parameter ¶ms) const
|
||||
{
|
||||
return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
|
||||
}
|
||||
int NonTerm::GetWidth(Moses::FactorDirection direction) const
|
||||
{ return GetConsistentPhrase().GetWidth(direction); }
|
47
contrib/other-builds/extract-mixed-syntax/NonTerm.h
Normal file
47
contrib/other-builds/extract-mixed-syntax/NonTerm.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* NonTerm.h
|
||||
*
|
||||
* Created on: 22 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include "RuleSymbol.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
class ConsistentPhrase;
|
||||
class Parameter;
|
||||
|
||||
class NonTerm : public RuleSymbol
|
||||
{
|
||||
public:
|
||||
|
||||
NonTerm(const ConsistentPhrase &consistentPhrase,
|
||||
const std::string &source,
|
||||
const std::string &target);
|
||||
virtual ~NonTerm();
|
||||
|
||||
const ConsistentPhrase &GetConsistentPhrase() const
|
||||
{ return *m_consistentPhrase; }
|
||||
|
||||
int GetWidth(Moses::FactorDirection direction) const;
|
||||
|
||||
virtual bool IsNonTerm() const
|
||||
{ return true; }
|
||||
|
||||
std::string GetString() const
|
||||
{ return m_source + m_target; }
|
||||
|
||||
virtual std::string Debug() const;
|
||||
virtual void Output(std::ostream &out) const;
|
||||
void Output(std::ostream &out, Moses::FactorDirection direction) const;
|
||||
|
||||
const std::string &GetLabel(Moses::FactorDirection direction) const;
|
||||
bool IsHiero(Moses::FactorDirection direction, const Parameter ¶ms) const;
|
||||
bool IsHiero(const Parameter ¶ms) const;
|
||||
|
||||
protected:
|
||||
const ConsistentPhrase *m_consistentPhrase;
|
||||
std::string m_source, m_target;
|
||||
};
|
||||
|
41
contrib/other-builds/extract-mixed-syntax/Parameter.cpp
Normal file
41
contrib/other-builds/extract-mixed-syntax/Parameter.cpp
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Parameter.cpp
|
||||
*
|
||||
* Created on: 17 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#include "Parameter.h"
|
||||
|
||||
Parameter::Parameter()
|
||||
:maxSpan(10)
|
||||
,maxNonTerm(2)
|
||||
,maxHieroNonTerm(999)
|
||||
,maxSymbolsTarget(999)
|
||||
,maxSymbolsSource(5)
|
||||
,minHoleSource(2)
|
||||
,sentenceOffset(0)
|
||||
,nonTermConsecSource(false)
|
||||
,requireAlignedWord(true)
|
||||
,fractionalCounting(true)
|
||||
,gzOutput(false)
|
||||
|
||||
,hieroNonTerm("[X]")
|
||||
,sourceSyntax(false)
|
||||
,targetSyntax(false)
|
||||
|
||||
,mixedSyntaxType(0)
|
||||
,multiLabel(0)
|
||||
,nonTermConsecSourceMixed(true)
|
||||
,hieroSourceLHS(false)
|
||||
,maxSpanFreeNonTermSource(0)
|
||||
,nieceTerminal(true)
|
||||
,maxScope(UNDEFINED)
|
||||
|
||||
,spanLength(false)
|
||||
,nonTermContext(false)
|
||||
{}
|
||||
|
||||
Parameter::~Parameter() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
51
contrib/other-builds/extract-mixed-syntax/Parameter.h
Normal file
51
contrib/other-builds/extract-mixed-syntax/Parameter.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* Parameter.h
|
||||
*
|
||||
* Created on: 17 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <limits>
|
||||
|
||||
#define UNDEFINED std::numeric_limits<int>::max()
|
||||
|
||||
class Parameter
|
||||
{
|
||||
public:
|
||||
Parameter();
|
||||
virtual ~Parameter();
|
||||
|
||||
int maxSpan;
|
||||
int maxNonTerm;
|
||||
int maxHieroNonTerm;
|
||||
int maxSymbolsTarget;
|
||||
int maxSymbolsSource;
|
||||
int minHoleSource;
|
||||
|
||||
long sentenceOffset;
|
||||
|
||||
bool nonTermConsecSource;
|
||||
bool requireAlignedWord;
|
||||
bool fractionalCounting;
|
||||
bool gzOutput;
|
||||
|
||||
std::string hieroNonTerm;
|
||||
std::string gluePath;
|
||||
|
||||
bool sourceSyntax, targetSyntax;
|
||||
|
||||
int mixedSyntaxType, multiLabel;
|
||||
bool nonTermConsecSourceMixed;
|
||||
bool hieroSourceLHS;
|
||||
int maxSpanFreeNonTermSource;
|
||||
bool nieceTerminal;
|
||||
int maxScope;
|
||||
|
||||
// prperties
|
||||
bool spanLength;
|
||||
bool nonTermContext;
|
||||
|
||||
};
|
||||
|
14
contrib/other-builds/extract-mixed-syntax/Phrase.cpp
Normal file
14
contrib/other-builds/extract-mixed-syntax/Phrase.cpp
Normal file
@ -0,0 +1,14 @@
|
||||
#include <sstream>
|
||||
#include "Phrase.h"
|
||||
|
||||
std::string Phrase::Debug() const
|
||||
{
|
||||
std::stringstream out;
|
||||
|
||||
for (size_t i = 0; i < size(); ++i) {
|
||||
Word &word = *at(i);
|
||||
out << word.Debug() << " ";
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
19
contrib/other-builds/extract-mixed-syntax/Phrase.h
Normal file
19
contrib/other-builds/extract-mixed-syntax/Phrase.h
Normal file
@ -0,0 +1,19 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include "Word.h"
|
||||
|
||||
// a vector of terminals
|
||||
class Phrase : public std::vector<Word*>
|
||||
{
|
||||
public:
|
||||
Phrase()
|
||||
{}
|
||||
|
||||
Phrase(size_t size)
|
||||
:std::vector<Word*>(size)
|
||||
{}
|
||||
|
||||
std::string Debug() const;
|
||||
|
||||
};
|
@ -1,74 +0,0 @@
|
||||
/*
|
||||
* Range.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 22/02/2011.
|
||||
* Copyright 2011 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "Range.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
void Range::Merge(const Range &a, const Range &b)
|
||||
{
|
||||
if (a.m_startPos == NOT_FOUND)
|
||||
{ // get the other regardless
|
||||
m_startPos = b.m_startPos;
|
||||
}
|
||||
else if (b.m_startPos == NOT_FOUND)
|
||||
{
|
||||
m_startPos = a.m_startPos;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_startPos = min(a.m_startPos, b.m_startPos);
|
||||
}
|
||||
|
||||
if (a.m_endPos == NOT_FOUND)
|
||||
{ // get the other regardless
|
||||
m_endPos = b.m_endPos;
|
||||
}
|
||||
else if (b.m_endPos == NOT_FOUND)
|
||||
{ // do nothing
|
||||
m_endPos = a.m_endPos;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_endPos = max(a.m_endPos, b.m_endPos);
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
int Range::Compare(const Range &other) const
|
||||
{
|
||||
if (m_startPos < other.m_startPos)
|
||||
return -1;
|
||||
else if (m_startPos > other.m_startPos)
|
||||
return +1;
|
||||
else if (m_endPos < other.m_endPos)
|
||||
return -1;
|
||||
else if (m_endPos > other.m_endPos)
|
||||
return +1;
|
||||
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
bool Range::Overlap(const Range &other) const
|
||||
{
|
||||
if ( other.m_endPos < m_startPos || other.m_startPos > m_endPos)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const Range &range)
|
||||
{
|
||||
out << "[" << range.m_startPos << "-" << range.m_endPos << "]";
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -1,57 +0,0 @@
|
||||
/*
|
||||
* Range.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 22/02/2011.
|
||||
* Copyright 2011 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#pragma once
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
|
||||
#define NOT_FOUND std::numeric_limits<size_t>::max()
|
||||
|
||||
class Range
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const Range&);
|
||||
|
||||
size_t m_startPos, m_endPos;
|
||||
public:
|
||||
|
||||
Range()
|
||||
:m_startPos(NOT_FOUND)
|
||||
,m_endPos(NOT_FOUND)
|
||||
{}
|
||||
|
||||
Range(const Range ©)
|
||||
:m_startPos(copy.m_startPos)
|
||||
,m_endPos(copy.m_endPos)
|
||||
{}
|
||||
|
||||
Range(size_t startPos, size_t endPos)
|
||||
:m_startPos(startPos)
|
||||
,m_endPos(endPos)
|
||||
{}
|
||||
|
||||
size_t GetStartPos() const
|
||||
{ return m_startPos; }
|
||||
size_t GetEndPos() const
|
||||
{ return m_endPos; }
|
||||
size_t GetWidth() const
|
||||
{ return m_endPos - m_startPos + 1; }
|
||||
|
||||
void SetStartPos(size_t startPos)
|
||||
{ m_startPos = startPos; }
|
||||
void SetEndPos(size_t endPos)
|
||||
{ m_endPos = endPos; }
|
||||
|
||||
void Merge(const Range &a, const Range &b);
|
||||
|
||||
int Compare(const Range &other) const;
|
||||
|
||||
bool Overlap(const Range &other) const;
|
||||
|
||||
|
||||
};
|
File diff suppressed because it is too large
Load Diff
@ -1,96 +1,87 @@
|
||||
#pragma once
|
||||
/*
|
||||
* Rule.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
* Rule.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "LatticeNode.h"
|
||||
#include "SymbolSequence.h"
|
||||
#include "Global.h"
|
||||
#include "Phrase.h"
|
||||
#include "RulePhrase.h"
|
||||
#include "moses/TypeDef.h"
|
||||
|
||||
class Lattice;
|
||||
class SentenceAlignment;
|
||||
class Global;
|
||||
class RuleCollection;
|
||||
class SyntaxNode;
|
||||
class TunnelCollection;
|
||||
class Range;
|
||||
class ConsistentPhrase;
|
||||
class AlignedSentence;
|
||||
class NonTerm;
|
||||
class Parameter;
|
||||
|
||||
class RuleElement
|
||||
{
|
||||
protected:
|
||||
const LatticeNode *m_latticeNode;
|
||||
|
||||
class Rule {
|
||||
public:
|
||||
std::pair<size_t, size_t> m_alignmentPos;
|
||||
|
||||
RuleElement(const RuleElement ©);
|
||||
RuleElement(const LatticeNode &latticeNode)
|
||||
:m_latticeNode(&latticeNode)
|
||||
,m_alignmentPos(NOT_FOUND, NOT_FOUND)
|
||||
{}
|
||||
typedef std::set<std::pair<int,int> > Alignments;
|
||||
|
||||
const LatticeNode &GetLatticeNode() const
|
||||
{ return *m_latticeNode; }
|
||||
Rule(const Rule ©); // do not implement
|
||||
|
||||
};
|
||||
// original rule with no non-term
|
||||
Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence);
|
||||
|
||||
class Rule
|
||||
{
|
||||
protected:
|
||||
typedef std::vector<RuleElement> CollType;
|
||||
CollType m_coll;
|
||||
|
||||
const LatticeNode *m_lhs;
|
||||
SymbolSequence m_source, m_target;
|
||||
|
||||
bool IsHole(const TunnelCollection &tunnelColl) const;
|
||||
bool NonTermOverlap() const;
|
||||
|
||||
const LatticeNode &GetLatticeNode(size_t ind) const;
|
||||
void CreateSymbols(const Global &global, bool &isValid, const SentenceAlignment &sentence);
|
||||
|
||||
public:
|
||||
// init
|
||||
Rule(const LatticeNode *latticeNode);
|
||||
|
||||
// create new rule by appending node to prev rule
|
||||
Rule(const Rule &prevRule, const LatticeNode *latticeNode);
|
||||
|
||||
// create copy with lhs
|
||||
Rule(const Global &global, bool &isValid, const Rule ©, const LatticeNode *lhs, const SentenceAlignment &sentence);
|
||||
|
||||
// can continue to add to this rule
|
||||
bool CanRecurse(const Global &global, const TunnelCollection &tunnelColl) const;
|
||||
// extend a rule, adding 1 new non-term
|
||||
Rule(const Rule ©, const NonTerm &nonTerm);
|
||||
|
||||
virtual ~Rule();
|
||||
|
||||
// can add this to the set of rules
|
||||
bool IsValid(const Global &global, const TunnelCollection &tunnelColl) const;
|
||||
bool IsValid() const
|
||||
{ return m_isValid; }
|
||||
|
||||
size_t GetNumSymbols() const;
|
||||
bool AdjacentDefaultNonTerms() const;
|
||||
bool MaxNonTerm(const Global &global) const;
|
||||
bool MoreDefaultNonTermThanTerm() const;
|
||||
bool SourceHasEdgeDefaultNonTerm() const;
|
||||
bool CanRecurse() const
|
||||
{ return m_canRecurse; }
|
||||
|
||||
void CreateRules(RuleCollection &rules
|
||||
, const Lattice &lattice
|
||||
, const SentenceAlignment &sentence
|
||||
, const Global &global);
|
||||
|
||||
int Compare(const Rule &compare) const;
|
||||
bool operator<(const Rule &compare) const;
|
||||
|
||||
Range GetSourceRange() const;
|
||||
|
||||
DEBUG_OUTPUT();
|
||||
const NonTerm &GetLHS() const
|
||||
{ return m_lhs; }
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
void OutputInv(std::ostream &out) const;
|
||||
const ConsistentPhrase &GetConsistentPhrase() const;
|
||||
|
||||
int GetNextSourcePosForNonTerm() const;
|
||||
|
||||
void SetCount(float count)
|
||||
{ m_count = count; }
|
||||
float GetCount() const
|
||||
{ return m_count; }
|
||||
|
||||
const Alignments &GetAlignments() const
|
||||
{ return m_alignments; }
|
||||
|
||||
std::string Debug() const;
|
||||
void Output(std::ostream &out, bool forward, const Parameter ¶ms) const;
|
||||
|
||||
void Prevalidate(const Parameter ¶ms);
|
||||
void CreateTarget(const Parameter ¶ms);
|
||||
|
||||
const RulePhrase &GetPhrase(Moses::FactorDirection direction) const
|
||||
{ return (direction == Moses::Input) ? m_source : m_target; }
|
||||
|
||||
protected:
|
||||
const NonTerm &m_lhs;
|
||||
const AlignedSentence &m_alignedSentence;
|
||||
RulePhrase m_source, m_target;
|
||||
float m_count;
|
||||
|
||||
Alignments m_alignments;
|
||||
|
||||
// in source order
|
||||
std::vector<const NonTerm*> m_nonterms;
|
||||
|
||||
bool m_isValid, m_canRecurse;
|
||||
|
||||
void CreateSource();
|
||||
void CreateAlignments();
|
||||
void CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords);
|
||||
void CreateAlignments(int sourcePos, const RuleSymbol *targetSought);
|
||||
|
||||
bool ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const;
|
||||
int CalcScope() const; // not yet correctly calculated
|
||||
|
||||
void NonTermContext(size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
|
||||
|
||||
};
|
||||
|
||||
|
@ -1,102 +0,0 @@
|
||||
/*
|
||||
* RuleCollection.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "RuleCollection.h"
|
||||
#include "Rule.h"
|
||||
#include "SentenceAlignment.h"
|
||||
#include "tables-core.h"
|
||||
#include "Lattice.h"
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
RuleCollection::~RuleCollection()
|
||||
{
|
||||
RemoveAllInColl(m_coll);
|
||||
}
|
||||
|
||||
void RuleCollection::Add(const Global &global, Rule *rule, const SentenceAlignment &sentence)
|
||||
{
|
||||
Range spanS = rule->GetSourceRange();
|
||||
|
||||
// cartesian product of lhs
|
||||
Stack nontermNodes = sentence.GetLattice().GetNonTermNode(spanS);
|
||||
Stack::const_iterator iterStack;
|
||||
for (iterStack = nontermNodes.begin(); iterStack != nontermNodes.end(); ++iterStack)
|
||||
{
|
||||
const LatticeNode &node = **iterStack;
|
||||
assert(!node.IsTerminal());
|
||||
|
||||
bool isValid;
|
||||
// create rules with LHS
|
||||
//cerr << "old:" << *rule << endl;
|
||||
Rule *newRule = new Rule(global, isValid, *rule, &node, sentence);
|
||||
|
||||
if (!isValid)
|
||||
{ // lhs doesn't match non-term spans
|
||||
delete newRule;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
stringstream s;
|
||||
s << *newRule;
|
||||
if (s.str().find("Wiederaufnahme der [X] ||| resumption of the [X] ||| ||| 1") == 0)
|
||||
{
|
||||
cerr << "READY:" << *newRule << endl;
|
||||
g_debug = true;
|
||||
}
|
||||
else {
|
||||
g_debug = false;
|
||||
}
|
||||
*/
|
||||
|
||||
typedef set<const Rule*, CompareRule>::iterator Iterator;
|
||||
pair<Iterator,bool> ret = m_coll.insert(newRule);
|
||||
|
||||
if (ret.second)
|
||||
{
|
||||
//cerr << "ACCEPTED:" << *newRule << endl;
|
||||
//cerr << "";
|
||||
}
|
||||
else
|
||||
{
|
||||
//cerr << "REJECTED:" << *newRule << endl;
|
||||
delete newRule;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
delete rule;
|
||||
|
||||
}
|
||||
|
||||
void RuleCollection::Output(std::ostream &out) const
|
||||
{
|
||||
RuleCollection::CollType::const_iterator iter;
|
||||
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
|
||||
{
|
||||
const Rule &rule = **iter;
|
||||
rule.Output(out);
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void RuleCollection::OutputInv(std::ostream &out) const
|
||||
{
|
||||
RuleCollection::CollType::const_iterator iter;
|
||||
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
|
||||
{
|
||||
const Rule &rule = **iter;
|
||||
rule.OutputInv(out);
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,55 +0,0 @@
|
||||
#pragma once
|
||||
/*
|
||||
* RuleCollection.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include "Rule.h"
|
||||
|
||||
class SentenceAlignment;
|
||||
|
||||
// helper for sort. Don't compare default non-terminals
|
||||
struct CompareRule
|
||||
{
|
||||
bool operator() (const Rule *a, const Rule *b)
|
||||
{
|
||||
/*
|
||||
if (g_debug)
|
||||
{
|
||||
std::cerr << std::endl << (*a) << std::endl << (*b) << " ";
|
||||
}
|
||||
*/
|
||||
bool ret = (*a) < (*b);
|
||||
/*
|
||||
if (g_debug)
|
||||
{
|
||||
std::cerr << ret << std::endl;
|
||||
}
|
||||
*/
|
||||
return ret;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
class RuleCollection
|
||||
{
|
||||
protected:
|
||||
typedef std::set<const Rule*, CompareRule> CollType;
|
||||
CollType m_coll;
|
||||
|
||||
public:
|
||||
~RuleCollection();
|
||||
void Add(const Global &global, Rule *rule, const SentenceAlignment &sentence);
|
||||
size_t GetSize() const
|
||||
{ return m_coll.size(); }
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
void OutputInv(std::ostream &out) const;
|
||||
|
||||
};
|
||||
|
50
contrib/other-builds/extract-mixed-syntax/RulePhrase.cpp
Normal file
50
contrib/other-builds/extract-mixed-syntax/RulePhrase.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* RulePhrase.cpp
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "RulePhrase.h"
|
||||
#include "RuleSymbol.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
int RulePhrase::Compare(const RulePhrase &other) const
|
||||
{
|
||||
if (GetSize() != other.GetSize()) {
|
||||
return GetSize() < other.GetSize() ? -1 : +1;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < m_coll.size(); ++i) {
|
||||
const RuleSymbol &symbol = *m_coll[i];
|
||||
const RuleSymbol &otherSymbol = *other.m_coll[i];
|
||||
int compare = symbol.Compare(otherSymbol);
|
||||
|
||||
if (compare) {
|
||||
return compare;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void RulePhrase::Output(std::ostream &out) const
|
||||
{
|
||||
for (size_t i = 0; i < m_coll.size(); ++i) {
|
||||
const RuleSymbol &symbol = *m_coll[i];
|
||||
symbol.Output(out);
|
||||
out << " ";
|
||||
}
|
||||
}
|
||||
|
||||
std::string RulePhrase::Debug() const
|
||||
{
|
||||
std::stringstream out;
|
||||
Output(out);
|
||||
return out.str();
|
||||
}
|
||||
|
49
contrib/other-builds/extract-mixed-syntax/RulePhrase.h
Normal file
49
contrib/other-builds/extract-mixed-syntax/RulePhrase.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* RulePhrase.h
|
||||
*
|
||||
* Created on: 26 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#ifndef RULEPHRASE_H_
|
||||
#define RULEPHRASE_H_
|
||||
|
||||
#include <vector>
|
||||
#include <cstddef>
|
||||
#include <iostream>
|
||||
|
||||
class RuleSymbol;
|
||||
|
||||
// a phrase of terms and non-terms for 1 side of a rule
|
||||
class RulePhrase
|
||||
{
|
||||
public:
|
||||
typedef std::vector<const RuleSymbol*> Coll;
|
||||
Coll m_coll;
|
||||
|
||||
size_t GetSize() const
|
||||
{ return m_coll.size(); }
|
||||
|
||||
void Add(const RuleSymbol *symbol)
|
||||
{
|
||||
m_coll.push_back(symbol);
|
||||
}
|
||||
|
||||
const RuleSymbol* operator[](size_t index) const {
|
||||
return m_coll[index];
|
||||
}
|
||||
|
||||
const RuleSymbol* Front() const {
|
||||
return m_coll.front();
|
||||
}
|
||||
const RuleSymbol* Back() const {
|
||||
return m_coll.back();
|
||||
}
|
||||
|
||||
int Compare(const RulePhrase &other) const;
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
std::string Debug() const;
|
||||
};
|
||||
|
||||
#endif /* RULEPHRASE_H_ */
|
36
contrib/other-builds/extract-mixed-syntax/RuleSymbol.cpp
Normal file
36
contrib/other-builds/extract-mixed-syntax/RuleSymbol.cpp
Normal file
@ -0,0 +1,36 @@
|
||||
/*
|
||||
* RuleSymbol.cpp
|
||||
*
|
||||
* Created on: 21 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include "RuleSymbol.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
RuleSymbol::RuleSymbol() {
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
RuleSymbol::~RuleSymbol() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
int RuleSymbol::Compare(const RuleSymbol &other) const
|
||||
{
|
||||
if (IsNonTerm() != other.IsNonTerm()) {
|
||||
return IsNonTerm() ? -1 : +1;
|
||||
}
|
||||
|
||||
string str = GetString();
|
||||
string otherStr = other.GetString();
|
||||
|
||||
if (str == otherStr) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return (str < otherStr) ? -1 : +1;
|
||||
}
|
||||
}
|
31
contrib/other-builds/extract-mixed-syntax/RuleSymbol.h
Normal file
31
contrib/other-builds/extract-mixed-syntax/RuleSymbol.h
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* RuleSymbol.h
|
||||
*
|
||||
* Created on: 21 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#ifndef RULESYMBOL_H_
|
||||
#define RULESYMBOL_H_
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
// base class - terminal or non-term
|
||||
class RuleSymbol {
|
||||
public:
|
||||
RuleSymbol();
|
||||
virtual ~RuleSymbol();
|
||||
|
||||
virtual bool IsNonTerm() const = 0;
|
||||
|
||||
virtual std::string Debug() const = 0;
|
||||
virtual void Output(std::ostream &out) const = 0;
|
||||
|
||||
virtual std::string GetString() const = 0;
|
||||
|
||||
int Compare(const RuleSymbol &other) const;
|
||||
|
||||
};
|
||||
|
||||
#endif /* RULESYMBOL_H_ */
|
227
contrib/other-builds/extract-mixed-syntax/Rules.cpp
Normal file
227
contrib/other-builds/extract-mixed-syntax/Rules.cpp
Normal file
@ -0,0 +1,227 @@
|
||||
/*
|
||||
* Rules.cpp
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#include <sstream>
|
||||
#include "Rules.h"
|
||||
#include "ConsistentPhrase.h"
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "AlignedSentence.h"
|
||||
#include "Rule.h"
|
||||
#include "Parameter.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
Rules::Rules(const AlignedSentence &alignedSentence)
|
||||
:m_alignedSentence(alignedSentence)
|
||||
{
|
||||
}
|
||||
|
||||
Rules::~Rules() {
|
||||
Moses::RemoveAllInColl(m_keepRules);
|
||||
}
|
||||
|
||||
void Rules::CreateRules(const ConsistentPhrase &cp,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
if (params.hieroSourceLHS) {
|
||||
const NonTerm &nonTerm = cp.GetHieroNonTerm();
|
||||
CreateRule(nonTerm, params);
|
||||
}
|
||||
else {
|
||||
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
|
||||
for (size_t i = 0; i < nonTerms.size(); ++i) {
|
||||
const NonTerm &nonTerm = nonTerms[i];
|
||||
CreateRule(nonTerm, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::CreateRule(const NonTerm &nonTerm,
|
||||
const Parameter ¶ms)
|
||||
{
|
||||
Rule *rule = new Rule(nonTerm, m_alignedSentence);
|
||||
|
||||
rule->Prevalidate(params);
|
||||
rule->CreateTarget(params);
|
||||
|
||||
|
||||
if (rule->CanRecurse()) {
|
||||
Extend(*rule, params);
|
||||
}
|
||||
|
||||
if (rule->IsValid()) {
|
||||
m_keepRules.insert(rule);
|
||||
}
|
||||
else {
|
||||
delete rule;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void Rules::Extend(const Parameter ¶ms)
|
||||
{
|
||||
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
|
||||
|
||||
size_t size = m_alignedSentence.GetPhrase(Moses::Input).size();
|
||||
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
|
||||
for (size_t sourceEnd = sourceStart; sourceEnd < size; ++sourceEnd) {
|
||||
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
|
||||
|
||||
ConsistentPhrases::Coll::const_iterator iter;
|
||||
for (iter = cps.begin(); iter != cps.end(); ++iter) {
|
||||
const ConsistentPhrase &cp = **iter;
|
||||
CreateRules(cp, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Extend(const Rule &rule, const Parameter ¶ms)
|
||||
{
|
||||
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
|
||||
int sourceMin = rule.GetNextSourcePosForNonTerm();
|
||||
|
||||
int ruleStart = rule.GetConsistentPhrase().corners[0];
|
||||
int ruleEnd = rule.GetConsistentPhrase().corners[1];
|
||||
|
||||
for (int sourceStart = sourceMin; sourceStart <= ruleEnd; ++sourceStart) {
|
||||
for (int sourceEnd = sourceStart; sourceEnd <= ruleEnd; ++sourceEnd) {
|
||||
if (sourceStart == ruleStart && sourceEnd == ruleEnd) {
|
||||
// don't cover whole rule with 1 non-term
|
||||
continue;
|
||||
}
|
||||
|
||||
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
|
||||
Extend(rule, cps, params);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter ¶ms)
|
||||
{
|
||||
ConsistentPhrases::Coll::const_iterator iter;
|
||||
for (iter = cps.begin(); iter != cps.end(); ++iter) {
|
||||
const ConsistentPhrase &cp = **iter;
|
||||
Extend(rule, cp, params);
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter ¶ms)
|
||||
{
|
||||
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
|
||||
for (size_t i = 0; i < nonTerms.size(); ++i) {
|
||||
const NonTerm &nonTerm = nonTerms[i];
|
||||
|
||||
Rule *newRule = new Rule(rule, nonTerm);
|
||||
newRule->Prevalidate(params);
|
||||
newRule->CreateTarget(params);
|
||||
|
||||
if (newRule->CanRecurse()) {
|
||||
// recursively extend
|
||||
Extend(*newRule, params);
|
||||
}
|
||||
|
||||
if (newRule->IsValid()) {
|
||||
m_keepRules.insert(newRule);
|
||||
}
|
||||
else {
|
||||
delete newRule;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string Rules::Debug() const
|
||||
{
|
||||
stringstream out;
|
||||
|
||||
std::set<Rule*>::const_iterator iter;
|
||||
out << "m_keepRules:" << endl;
|
||||
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
|
||||
const Rule &rule = **iter;
|
||||
out << rule.Debug() << endl;
|
||||
}
|
||||
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void Rules::Output(std::ostream &out, bool forward, const Parameter ¶ms) const
|
||||
{
|
||||
std::set<Rule*, CompareRules>::const_iterator iter;
|
||||
for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
|
||||
const Rule &rule = **iter;
|
||||
rule.Output(out, forward, params);
|
||||
out << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::Consolidate(const Parameter ¶ms)
|
||||
{
|
||||
if (params.fractionalCounting) {
|
||||
CalcFractionalCount();
|
||||
}
|
||||
else {
|
||||
std::set<Rule*>::iterator iter;
|
||||
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
|
||||
Rule &rule = **iter;
|
||||
rule.SetCount(1);
|
||||
}
|
||||
}
|
||||
|
||||
MergeRules(params);
|
||||
}
|
||||
|
||||
void Rules::MergeRules(const Parameter ¶ms)
|
||||
{
|
||||
typedef std::set<Rule*, CompareRules> MergeRules;
|
||||
|
||||
std::set<Rule*>::const_iterator iterOrig;
|
||||
for (iterOrig = m_keepRules.begin(); iterOrig != m_keepRules.end(); ++iterOrig) {
|
||||
Rule *origRule = *iterOrig;
|
||||
|
||||
pair<MergeRules::iterator, bool> inserted = m_mergeRules.insert(origRule);
|
||||
if (!inserted.second) {
|
||||
// already there, just add count
|
||||
Rule &rule = **inserted.first;
|
||||
float newCount = rule.GetCount() + origRule->GetCount();
|
||||
rule.SetCount(newCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void Rules::CalcFractionalCount()
|
||||
{
|
||||
typedef std::set<Rule*> RuleColl;
|
||||
typedef std::map<const ConsistentPhrase*, RuleColl> RuleByConsistentPhrase;
|
||||
RuleByConsistentPhrase allRules;
|
||||
|
||||
// sort by source AND target ranges
|
||||
std::set<Rule*>::const_iterator iter;
|
||||
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
|
||||
Rule *rule = *iter;
|
||||
const ConsistentPhrase &cp = rule->GetConsistentPhrase();
|
||||
RuleColl &ruleColl = allRules[&cp];
|
||||
ruleColl.insert(rule);
|
||||
}
|
||||
|
||||
// fractional count
|
||||
RuleByConsistentPhrase::iterator iterOuter;
|
||||
for (iterOuter = allRules.begin(); iterOuter != allRules.end(); ++iterOuter) {
|
||||
RuleColl &rules = iterOuter->second;
|
||||
|
||||
RuleColl::iterator iterInner;
|
||||
for (iterInner = rules.begin(); iterInner != rules.end(); ++iterInner) {
|
||||
Rule &rule = **iterInner;
|
||||
rule.SetCount(1.0f / (float) rules.size());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
72
contrib/other-builds/extract-mixed-syntax/Rules.h
Normal file
72
contrib/other-builds/extract-mixed-syntax/Rules.h
Normal file
@ -0,0 +1,72 @@
|
||||
/*
|
||||
* Rules.h
|
||||
*
|
||||
* Created on: 20 Feb 2014
|
||||
* Author: hieu
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include "ConsistentPhrases.h"
|
||||
#include "Rule.h"
|
||||
|
||||
extern bool g_debug;
|
||||
|
||||
class AlignedSentence;
|
||||
class Parameter;
|
||||
|
||||
struct CompareRules {
|
||||
bool operator()(const Rule *a, const Rule *b)
|
||||
{
|
||||
int compare;
|
||||
|
||||
compare = a->GetPhrase(Moses::Input).Compare(b->GetPhrase(Moses::Input));
|
||||
if (compare) return compare < 0;
|
||||
|
||||
compare = a->GetPhrase(Moses::Output).Compare(b->GetPhrase(Moses::Output));
|
||||
if (compare) return compare < 0;
|
||||
|
||||
if (a->GetAlignments() != b->GetAlignments()) {
|
||||
return a->GetAlignments() < b->GetAlignments();
|
||||
}
|
||||
|
||||
if (a->GetLHS().GetString() != b->GetLHS().GetString()) {
|
||||
return a->GetLHS().GetString() < b->GetLHS().GetString();
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class Rules {
|
||||
public:
|
||||
Rules(const AlignedSentence &alignedSentence);
|
||||
virtual ~Rules();
|
||||
void Extend(const Parameter ¶ms);
|
||||
void Consolidate(const Parameter ¶ms);
|
||||
|
||||
std::string Debug() const;
|
||||
void Output(std::ostream &out, bool forward, const Parameter ¶ms) const;
|
||||
|
||||
protected:
|
||||
const AlignedSentence &m_alignedSentence;
|
||||
std::set<Rule*> m_keepRules;
|
||||
std::set<Rule*, CompareRules> m_mergeRules;
|
||||
|
||||
void Extend(const Rule &rule, const Parameter ¶ms);
|
||||
void Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter ¶ms);
|
||||
void Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter ¶ms);
|
||||
|
||||
// create original rules
|
||||
void CreateRules(const ConsistentPhrase &cp,
|
||||
const Parameter ¶ms);
|
||||
void CreateRule(const NonTerm &nonTerm,
|
||||
const Parameter ¶ms);
|
||||
|
||||
void MergeRules(const Parameter ¶ms);
|
||||
void CalcFractionalCount();
|
||||
|
||||
};
|
||||
|
@ -1,331 +0,0 @@
|
||||
/*
|
||||
* SentenceAlignment.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/01/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include "SentenceAlignment.h"
|
||||
#include "XmlTree.h"
|
||||
#include "tables-core.h"
|
||||
#include "TunnelCollection.h"
|
||||
#include "Lattice.h"
|
||||
#include "LatticeNode.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
extern std::set< std::string > targetLabelCollection, sourceLabelCollection;
|
||||
extern std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;
|
||||
|
||||
SentenceAlignment::SentenceAlignment()
|
||||
:m_tunnelCollection(NULL)
|
||||
,m_lattice(NULL)
|
||||
{}
|
||||
|
||||
SentenceAlignment::~SentenceAlignment()
|
||||
{
|
||||
delete m_tunnelCollection;
|
||||
delete m_lattice;
|
||||
}
|
||||
|
||||
int SentenceAlignment::Create( const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global )
|
||||
{
|
||||
|
||||
// tokenizing English (and potentially extract syntax spans)
|
||||
if (global.targetSyntax) {
|
||||
string targetStringCPP = string(targetString);
|
||||
ProcessAndStripXMLTags( targetStringCPP, targetTree, targetLabelCollection , targetTopLabelCollection );
|
||||
target = tokenize( targetStringCPP.c_str() );
|
||||
// cerr << "E: " << targetStringCPP << endl;
|
||||
}
|
||||
else {
|
||||
target = tokenize( targetString.c_str() );
|
||||
}
|
||||
|
||||
// tokenizing source (and potentially extract syntax spans)
|
||||
if (global.sourceSyntax) {
|
||||
string sourceStringCPP = string(sourceString);
|
||||
ProcessAndStripXMLTags( sourceStringCPP, sourceTree, sourceLabelCollection , sourceTopLabelCollection );
|
||||
source = tokenize( sourceStringCPP.c_str() );
|
||||
// cerr << "F: " << sourceStringCPP << endl;
|
||||
}
|
||||
else {
|
||||
source = tokenize( sourceString.c_str() );
|
||||
}
|
||||
|
||||
// check if sentences are empty
|
||||
if (target.size() == 0 || source.size() == 0) {
|
||||
cerr << "no target (" << target.size() << ") or source (" << source.size() << ") words << end insentence " << sentenceID << endl;
|
||||
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// prepare data structures for alignments
|
||||
for(int i=0; i<source.size(); i++) {
|
||||
alignedCountS.push_back( 0 );
|
||||
}
|
||||
for(int i=0; i<target.size(); i++) {
|
||||
vector< int > dummy;
|
||||
alignedToT.push_back( dummy );
|
||||
}
|
||||
|
||||
//InitTightest(m_s2tTightest, source.size());
|
||||
//InitTightest(m_t2sTightest, target.size());
|
||||
|
||||
|
||||
// reading in alignments
|
||||
vector<string> alignmentSequence = tokenize( alignmentString.c_str() );
|
||||
for(int i=0; i<alignmentSequence.size(); i++) {
|
||||
int s,t;
|
||||
// cout << "scaning " << alignmentSequence[i].c_str() << endl;
|
||||
if (! sscanf(alignmentSequence[i].c_str(), "%d-%d", &s, &t)) {
|
||||
cerr << "WARNING: " << alignmentSequence[i] << " is a bad alignment point in sentence " << sentenceID << endl;
|
||||
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
|
||||
return 0;
|
||||
}
|
||||
// cout << "alignmentSequence[i] " << alignmentSequence[i] << " is " << s << ", " << t << endl;
|
||||
if (t >= target.size() || s >= source.size()) {
|
||||
cerr << "WARNING: sentence " << sentenceID << " has alignment point (" << s << ", " << t << ") out of bounds (" << source.size() << ", " << target.size() << ")\n";
|
||||
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
|
||||
return 0;
|
||||
}
|
||||
alignedToT[t].push_back( s );
|
||||
alignedCountS[s]++;
|
||||
|
||||
//SetAlignment(s, t);
|
||||
}
|
||||
|
||||
bool mixed = global.mixed;
|
||||
sourceTree.AddDefaultNonTerms(global.sourceSyntax, mixed, source.size());
|
||||
targetTree.AddDefaultNonTerms(global.targetSyntax, mixed, target.size());
|
||||
|
||||
//CalcTightestSpan(m_s2tTightest);
|
||||
//CalcTightestSpan(m_t2sTightest);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
void SentenceAlignment::InitTightest(Outer &tightest, size_t len)
|
||||
{
|
||||
tightest.resize(len);
|
||||
|
||||
for (size_t posOuter = 0; posOuter < len; ++posOuter)
|
||||
{
|
||||
Inner &inner = tightest[posOuter];
|
||||
size_t innerSize = len - posOuter;
|
||||
inner.resize(innerSize);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void SentenceAlignment::CalcTightestSpan(Outer &tightest)
|
||||
{
|
||||
size_t len = tightest.size();
|
||||
|
||||
for (size_t startPos = 0; startPos < len; ++startPos)
|
||||
{
|
||||
for (size_t endPos = startPos + 1; endPos < len; ++endPos)
|
||||
{
|
||||
const Range &prevRange = GetTightest(tightest, startPos, endPos - 1);
|
||||
const Range &smallRange = GetTightest(tightest, endPos, endPos);
|
||||
Range &newRange = GetTightest(tightest, startPos, endPos);
|
||||
|
||||
newRange.Merge(prevRange, smallRange);
|
||||
//cerr << "[" << startPos << "-" << endPos << "] --> [" << newRange.GetStartPos() << "-" << newRange.GetEndPos() << "]";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Range &SentenceAlignment::GetTightest(Outer &tightest, size_t startPos, size_t endPos)
|
||||
{
|
||||
assert(endPos < tightest.size());
|
||||
assert(endPos >= startPos);
|
||||
|
||||
Inner &inner = tightest[startPos];
|
||||
|
||||
size_t ind = endPos - startPos;
|
||||
Range &ret = inner[ind];
|
||||
return ret;
|
||||
}
|
||||
|
||||
void SentenceAlignment::SetAlignment(size_t source, size_t target)
|
||||
{
|
||||
SetAlignment(m_s2tTightest, source, target);
|
||||
SetAlignment(m_t2sTightest, target, source);
|
||||
}
|
||||
|
||||
void SentenceAlignment::SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos)
|
||||
{
|
||||
|
||||
Range &range = GetTightest(tightest, thisPos, thisPos);
|
||||
if (range.GetStartPos() == NOT_FOUND)
|
||||
{ // not yet set, do them both
|
||||
assert(range.GetEndPos() == NOT_FOUND);
|
||||
range.SetStartPos(thatPos);
|
||||
range.SetEndPos(thatPos);
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(range.GetEndPos() != NOT_FOUND);
|
||||
range.SetStartPos( (range.GetStartPos() > thatPos) ? thatPos : range.GetStartPos() );
|
||||
range.SetEndPos( (range.GetEndPos() < thatPos) ? thatPos : range.GetEndPos() );
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
void SentenceAlignment::FindTunnels(const Global &global )
|
||||
{
|
||||
int countT = target.size();
|
||||
int countS = source.size();
|
||||
int maxSpan = max(global.maxHoleSpanSourceDefault, global.maxHoleSpanSourceSyntax);
|
||||
|
||||
m_tunnelCollection = new TunnelCollection(countS);
|
||||
|
||||
m_tunnelCollection->alignedCountS = alignedCountS;
|
||||
m_tunnelCollection->alignedCountT.resize(alignedToT.size());
|
||||
for (size_t ind = 0; ind < alignedToT.size(); ind++)
|
||||
{
|
||||
m_tunnelCollection->alignedCountT[ind] = alignedToT[ind].size();
|
||||
}
|
||||
|
||||
// phrase repository for creating hiero phrases
|
||||
|
||||
// check alignments for target phrase startT...endT
|
||||
for(int lengthT=1;
|
||||
lengthT <= maxSpan && lengthT <= countT;
|
||||
lengthT++) {
|
||||
for(int startT=0; startT < countT-(lengthT-1); startT++) {
|
||||
|
||||
// that's nice to have
|
||||
int endT = startT + lengthT - 1;
|
||||
|
||||
// if there is target side syntax, there has to be a node
|
||||
if (global.targetSyntax && !targetTree.HasNode(startT,endT))
|
||||
continue;
|
||||
|
||||
// find find aligned source words
|
||||
// first: find minimum and maximum source word
|
||||
int minS = 9999;
|
||||
int maxS = -1;
|
||||
vector< int > usedS = alignedCountS;
|
||||
for(int ti=startT;ti<=endT;ti++) {
|
||||
for(int i=0;i<alignedToT[ti].size();i++) {
|
||||
int si = alignedToT[ti][i];
|
||||
// cerr << "point (" << si << ", " << ti << ")\n";
|
||||
if (si<minS) { minS = si; }
|
||||
if (si>maxS) { maxS = si; }
|
||||
usedS[ si ]--;
|
||||
}
|
||||
}
|
||||
|
||||
// unaligned phrases are not allowed
|
||||
if( maxS == -1 )
|
||||
continue;
|
||||
|
||||
// source phrase has to be within limits
|
||||
if( maxS-minS >= maxSpan )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// check if source words are aligned to out of bound target words
|
||||
bool out_of_bounds = false;
|
||||
for(int si=minS;si<=maxS && !out_of_bounds;si++)
|
||||
{
|
||||
if (usedS[si]>0) {
|
||||
out_of_bounds = true;
|
||||
}
|
||||
}
|
||||
|
||||
// if out of bound, you gotta go
|
||||
if (out_of_bounds)
|
||||
continue;
|
||||
|
||||
if (m_tunnelCollection->NumUnalignedWord(1, startT, endT) >= global.maxUnaligned)
|
||||
continue;
|
||||
|
||||
// done with all the checks, lets go over all consistent phrase pairs
|
||||
// start point of source phrase may retreat over unaligned
|
||||
for(int startS=minS;
|
||||
(startS>=0 &&
|
||||
startS>maxS - maxSpan && // within length limit
|
||||
(startS==minS || alignedCountS[startS]==0)); // unaligned
|
||||
startS--)
|
||||
{
|
||||
// end point of source phrase may advance over unaligned
|
||||
for(int endS=maxS;
|
||||
(endS<countS && endS<startS + maxSpan && // within length limit
|
||||
(endS==maxS || alignedCountS[endS]==0)); // unaligned
|
||||
endS++)
|
||||
{
|
||||
if (m_tunnelCollection->NumUnalignedWord(0, startS, endS) >= global.maxUnaligned)
|
||||
continue;
|
||||
|
||||
// take note that this is a valid phrase alignment
|
||||
m_tunnelCollection->Add(startS, endS, startT, endT);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//cerr << *tunnelCollection << endl;
|
||||
|
||||
}
|
||||
|
||||
void SentenceAlignment::CreateLattice(const Global &global)
|
||||
{
|
||||
size_t countS = source.size();
|
||||
m_lattice = new Lattice(countS);
|
||||
|
||||
for (size_t startPos = 0; startPos < countS; ++startPos)
|
||||
{
|
||||
//cerr << "creating arcs for " << startPos << "=";
|
||||
m_lattice->CreateArcs(startPos, *m_tunnelCollection, *this, global);
|
||||
|
||||
//cerr << LatticeNode::s_count << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void SentenceAlignment::CreateRules(const Global &global)
|
||||
{
|
||||
size_t countS = source.size();
|
||||
|
||||
for (size_t startPos = 0; startPos < countS; ++startPos)
|
||||
{
|
||||
//cerr << "creating rules for " << startPos << "\n";
|
||||
m_lattice->CreateRules(startPos, *this, global);
|
||||
}
|
||||
}
|
||||
|
||||
void OutputSentenceStr(std::ostream &out, const std::vector<std::string> &vec)
|
||||
{
|
||||
for (size_t pos = 0; pos < vec.size(); ++pos)
|
||||
{
|
||||
out << vec[pos] << " ";
|
||||
}
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const SentenceAlignment &obj)
|
||||
{
|
||||
OutputSentenceStr(out, obj.target);
|
||||
out << " ==> ";
|
||||
OutputSentenceStr(out, obj.source);
|
||||
out << endl;
|
||||
|
||||
out << *obj.m_tunnelCollection;
|
||||
|
||||
if (obj.m_lattice)
|
||||
out << endl << *obj.m_lattice;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
@ -1,69 +0,0 @@
|
||||
#pragma once
|
||||
/*
|
||||
* SentenceAlignment.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/01/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include "SyntaxTree.h"
|
||||
#include "Global.h"
|
||||
#include "Range.h"
|
||||
|
||||
class TunnelCollection;
|
||||
class Lattice;
|
||||
|
||||
class SentenceAlignment
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const SentenceAlignment&);
|
||||
|
||||
public:
|
||||
std::vector<std::string> target;
|
||||
std::vector<std::string> source;
|
||||
std::vector<int> alignedCountS;
|
||||
std::vector< std::vector<int> > alignedToT;
|
||||
SyntaxTree sourceTree, targetTree;
|
||||
|
||||
//typedef std::vector<Range> Inner;
|
||||
//typedef std::vector<Inner> Outer;
|
||||
|
||||
//Outer m_s2tTightest, m_t2sTightest;
|
||||
|
||||
SentenceAlignment();
|
||||
~SentenceAlignment();
|
||||
int Create(const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global);
|
||||
// void clear() { delete(alignment); };
|
||||
void FindTunnels( const Global &global ) ;
|
||||
|
||||
void CreateLattice(const Global &global);
|
||||
void CreateRules(const Global &global);
|
||||
|
||||
const TunnelCollection &GetTunnelCollection() const
|
||||
{
|
||||
assert(m_tunnelCollection);
|
||||
return *m_tunnelCollection;
|
||||
}
|
||||
|
||||
const Lattice &GetLattice() const
|
||||
{
|
||||
assert(m_lattice);
|
||||
return *m_lattice;
|
||||
}
|
||||
|
||||
protected:
|
||||
TunnelCollection *m_tunnelCollection;
|
||||
Lattice *m_lattice;
|
||||
|
||||
/*
|
||||
void CalcTightestSpan(Outer &tightest);
|
||||
void InitTightest(Outer &tightest, size_t len);
|
||||
Range &GetTightest(Outer &tightest, size_t startPos, size_t endPos);
|
||||
void SetAlignment(size_t source, size_t target);
|
||||
void SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos);
|
||||
*/
|
||||
};
|
||||
|
@ -1,101 +0,0 @@
|
||||
/*
|
||||
* Symbol.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 21/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <cassert>
|
||||
#include "Symbol.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Symbol::Symbol(const std::string &label, size_t pos)
|
||||
:m_label(label)
|
||||
,m_isTerminal(true)
|
||||
,m_span(2)
|
||||
{
|
||||
m_span[0].first = pos;
|
||||
}
|
||||
|
||||
Symbol::Symbol(const std::string &labelS, const std::string &labelT
|
||||
, size_t startS, size_t endS
|
||||
, size_t startT, size_t endT
|
||||
, bool isSourceSyntax, bool isTargetSyntax)
|
||||
:m_label(labelS)
|
||||
,m_labelT(labelT)
|
||||
,m_isTerminal(false)
|
||||
,m_span(2)
|
||||
,m_isSourceSyntax(isSourceSyntax)
|
||||
,m_isTargetSyntax(isTargetSyntax)
|
||||
{
|
||||
m_span[0] = std::pair<size_t, size_t>(startS, endS);
|
||||
m_span[1] = std::pair<size_t, size_t>(startT, endT);
|
||||
}
|
||||
|
||||
int CompareNonTerm(bool thisIsSyntax, bool otherIsSyntax
|
||||
, const std::pair<size_t, size_t> &thisSpan, const std::pair<size_t, size_t> &otherSpan
|
||||
, std::string thisLabel, std::string otherLabel)
|
||||
{
|
||||
if (thisIsSyntax != otherIsSyntax)
|
||||
{ // 1 is [X] & the other is [NP] on the source
|
||||
return thisIsSyntax ? -1 : +1;
|
||||
}
|
||||
|
||||
assert(thisIsSyntax == otherIsSyntax);
|
||||
if (thisIsSyntax)
|
||||
{ // compare span & label
|
||||
if (thisSpan != otherSpan)
|
||||
return thisSpan < otherSpan ? -1 : +1;
|
||||
if (thisLabel != otherLabel)
|
||||
return thisLabel < otherLabel ? -1 : +1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Symbol::Compare(const Symbol &other) const
|
||||
{
|
||||
if (m_isTerminal != other.m_isTerminal)
|
||||
return m_isTerminal ? -1 : +1;
|
||||
|
||||
assert(m_isTerminal == other.m_isTerminal);
|
||||
if (m_isTerminal)
|
||||
{ // compare labels & pos
|
||||
if (m_span[0].first != other.m_span[0].first)
|
||||
return (m_span[0].first < other.m_span[0].first) ? -1 : +1;
|
||||
|
||||
if (m_label != other.m_label)
|
||||
return (m_label < other.m_label) ? -1 : +1;
|
||||
|
||||
}
|
||||
else
|
||||
{ // non terms
|
||||
int ret = CompareNonTerm(m_isSourceSyntax, other.m_isSourceSyntax
|
||||
,m_span[0], other.m_span[0]
|
||||
,m_label, other.m_label);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = CompareNonTerm(m_isTargetSyntax, other.m_isTargetSyntax
|
||||
,m_span[1], other.m_span[1]
|
||||
,m_label, other.m_label);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const Symbol &obj)
|
||||
{
|
||||
if (obj.m_isTerminal)
|
||||
out << obj.m_label;
|
||||
else
|
||||
out << obj.m_label + obj.m_labelT;
|
||||
|
||||
return out;
|
||||
}
|
||||
|
@ -1,36 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* Symbol.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 21/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
class Symbol
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &out, const Symbol &obj);
|
||||
|
||||
protected:
|
||||
std::string m_label, m_labelT; // m_labelT only for non-term
|
||||
std::vector<std::pair<size_t, size_t> > m_span;
|
||||
|
||||
bool m_isTerminal, m_isSourceSyntax, m_isTargetSyntax;
|
||||
public:
|
||||
// for terminals
|
||||
Symbol(const std::string &label, size_t pos);
|
||||
|
||||
// for non-terminals
|
||||
Symbol(const std::string &labelS, const std::string &labelT
|
||||
, size_t startS, size_t endS
|
||||
, size_t startT, size_t endT
|
||||
, bool isSourceSyntax, bool isTargetSyntax);
|
||||
|
||||
int Compare(const Symbol &other) const;
|
||||
|
||||
};
|
@ -1,56 +0,0 @@
|
||||
/*
|
||||
* SymbolSequence.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 21/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <cassert>
|
||||
#include <sstream>
|
||||
#include "SymbolSequence.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int SymbolSequence::Compare(const SymbolSequence &other) const
|
||||
{
|
||||
int ret;
|
||||
size_t thisSize = GetSize();
|
||||
size_t otherSize = other.GetSize();
|
||||
if (thisSize != otherSize)
|
||||
{
|
||||
ret = (thisSize < otherSize) ? -1 : +1;
|
||||
return ret;
|
||||
}
|
||||
else
|
||||
{
|
||||
assert(thisSize == otherSize);
|
||||
for (size_t ind = 0; ind < thisSize; ++ind)
|
||||
{
|
||||
const Symbol &thisSymbol = GetSymbol(ind);
|
||||
const Symbol &otherSymbol = other.GetSymbol(ind);
|
||||
ret = thisSymbol.Compare(otherSymbol);
|
||||
if (ret != 0)
|
||||
{
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
assert(ret == 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj)
|
||||
{
|
||||
SymbolSequence::CollType::const_iterator iterSymbol;
|
||||
for (iterSymbol = obj.m_coll.begin(); iterSymbol != obj.m_coll.end(); ++iterSymbol)
|
||||
{
|
||||
const Symbol &symbol = *iterSymbol;
|
||||
out << symbol << " ";
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -1,42 +0,0 @@
|
||||
#pragma once
|
||||
/*
|
||||
* SymbolSequence.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 21/07/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
#include "Symbol.h"
|
||||
|
||||
class SymbolSequence
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj);
|
||||
|
||||
protected:
|
||||
typedef std::vector<Symbol> CollType;
|
||||
CollType m_coll;
|
||||
|
||||
public:
|
||||
typedef CollType::iterator iterator;
|
||||
typedef CollType::const_iterator const_iterator;
|
||||
const_iterator begin() const { return m_coll.begin(); }
|
||||
const_iterator end() const { return m_coll.end(); }
|
||||
|
||||
void Add(const Symbol &symbol)
|
||||
{
|
||||
m_coll.push_back(symbol);
|
||||
}
|
||||
size_t GetSize() const
|
||||
{ return m_coll.size(); }
|
||||
const Symbol &GetSymbol(size_t ind) const
|
||||
{ return m_coll[ind]; }
|
||||
|
||||
void Clear()
|
||||
{ m_coll.clear(); }
|
||||
|
||||
int Compare(const SymbolSequence &other) const;
|
||||
|
||||
};
|
@ -1,245 +1,47 @@
|
||||
// $Id: SyntaxTree.cpp 1960 2008-12-15 12:52:38Z phkoehn $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2009 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include "SyntaxTree.h"
|
||||
//#include "extract.h"
|
||||
#include "Global.h"
|
||||
|
||||
//extern const Global g_debug;
|
||||
extern const Global *g_global;
|
||||
#include "Parameter.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool SyntaxNode::IsSyntax() const
|
||||
void SyntaxTree::Add(int startPos, int endPos, const std::string &label, const Parameter ¶ms)
|
||||
{
|
||||
bool ret = GetLabel() != "[X]";
|
||||
return ret;
|
||||
}
|
||||
//cerr << "add " << label << " to " << "[" << startPos << "-" << endPos << "]" << endl;
|
||||
|
||||
SyntaxTree::SyntaxTree()
|
||||
:m_defaultLHS(0,0, "[X]")
|
||||
{
|
||||
m_emptyNode.clear();
|
||||
}
|
||||
Range range(startPos, endPos);
|
||||
Labels &labels = m_coll[range];
|
||||
|
||||
SyntaxTree::~SyntaxTree()
|
||||
{
|
||||
// loop through all m_nodes, delete them
|
||||
for(int i=0; i<m_nodes.size(); i++)
|
||||
{
|
||||
delete m_nodes[i];
|
||||
}
|
||||
}
|
||||
|
||||
bool HasDuplicates(const SyntaxNodes &nodes)
|
||||
{
|
||||
string prevLabel;
|
||||
SyntaxNodes::const_iterator iter;
|
||||
for (iter = nodes.begin(); iter != nodes.end(); ++iter)
|
||||
{
|
||||
const SyntaxNode &node = **iter;
|
||||
string label = node.GetLabel();
|
||||
if (label == prevLabel)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void SyntaxTree::AddNode( int startPos, int endPos, std::string label )
|
||||
{
|
||||
SyntaxNode* newNode = new SyntaxNode( startPos, endPos, "[" + label + "]");
|
||||
m_nodes.push_back( newNode );
|
||||
|
||||
SyntaxNodes &nodesChart = m_index[ startPos ][ endPos ];
|
||||
|
||||
if (!g_global->uppermostOnly)
|
||||
{
|
||||
nodesChart.push_back( newNode );
|
||||
//assert(!HasDuplicates(m_index[ startPos ][ endPos ]));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (nodesChart.size() > 0)
|
||||
{
|
||||
assert(nodesChart.size() == 1);
|
||||
//delete nodes[0];
|
||||
nodesChart.resize(0);
|
||||
bool add = true;
|
||||
if (labels.size()) {
|
||||
if (params.multiLabel == 1) {
|
||||
// delete the label in collection and add new
|
||||
assert(labels.size() == 1);
|
||||
labels.clear();
|
||||
}
|
||||
assert(nodesChart.size() == 0);
|
||||
nodesChart.push_back( newNode );
|
||||
}
|
||||
}
|
||||
|
||||
ParentNodes SyntaxTree::Parse() {
|
||||
ParentNodes parents;
|
||||
|
||||
int size = m_index.size();
|
||||
|
||||
// looping through all spans of size >= 2
|
||||
for( int length=2; length<=size; length++ )
|
||||
{
|
||||
for( int startPos = 0; startPos <= size-length; startPos++ )
|
||||
{
|
||||
if (HasNode( startPos, startPos+length-1 ))
|
||||
{
|
||||
// processing one (parent) span
|
||||
|
||||
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
|
||||
SplitPoints splitPoints;
|
||||
splitPoints.push_back( startPos );
|
||||
//std::cerr << " " << startPos;
|
||||
|
||||
int first = 1;
|
||||
int covered = 0;
|
||||
while( covered < length )
|
||||
{
|
||||
// find largest covering subspan (child)
|
||||
// starting at last covered position
|
||||
for( int midPos=length-first; midPos>covered; midPos-- )
|
||||
{
|
||||
if( HasNode( startPos+covered, startPos+midPos-1 ) )
|
||||
{
|
||||
covered = midPos;
|
||||
splitPoints.push_back( startPos+covered );
|
||||
// std::cerr << " " << ( startPos+covered );
|
||||
first = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
// std::cerr << std::endl;
|
||||
parents.push_back( splitPoints );
|
||||
}
|
||||
else if (params.multiLabel == 2) {
|
||||
// ignore this label
|
||||
add = false;
|
||||
}
|
||||
}
|
||||
return parents;
|
||||
}
|
||||
|
||||
bool SyntaxTree::HasNode( int startPos, int endPos ) const
|
||||
{
|
||||
return GetNodes( startPos, endPos).size() > 0;
|
||||
}
|
||||
|
||||
const SyntaxNodes &SyntaxTree::GetNodes( int startPos, int endPos ) const
|
||||
{
|
||||
SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
|
||||
if (startIndex == m_index.end() )
|
||||
return m_emptyNode;
|
||||
|
||||
SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
|
||||
if (endIndex == startIndex->second.end())
|
||||
return m_emptyNode;
|
||||
|
||||
return endIndex->second;
|
||||
}
|
||||
|
||||
// for printing out tree
|
||||
std::string SyntaxTree::ToString() const
|
||||
{
|
||||
std::stringstream out;
|
||||
out << *this;
|
||||
return out.str();
|
||||
}
|
||||
|
||||
void SyntaxTree::AddDefaultNonTerms(size_t phraseSize)
|
||||
{
|
||||
for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
|
||||
{
|
||||
for (size_t endPos = startPos; endPos < phraseSize; ++endPos)
|
||||
{
|
||||
AddNode(startPos, endPos, "X");
|
||||
}
|
||||
if (add) {
|
||||
labels.push_back(label);
|
||||
}
|
||||
}
|
||||
|
||||
void SyntaxTree::AddDefaultNonTerms(bool isSyntax, bool mixed, size_t phraseSize)
|
||||
void SyntaxTree::AddToAll(const std::string &label)
|
||||
{
|
||||
if (isSyntax)
|
||||
{
|
||||
AddDefaultNonTerms(!mixed, phraseSize);
|
||||
}
|
||||
else
|
||||
{ // add X everywhere
|
||||
AddDefaultNonTerms(phraseSize);
|
||||
Coll::iterator iter;
|
||||
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
|
||||
Labels &labels = iter->second;
|
||||
labels.push_back(label);
|
||||
}
|
||||
}
|
||||
|
||||
void SyntaxTree::AddDefaultNonTerms(bool addEverywhere, size_t phraseSize)
|
||||
const SyntaxTree::Labels &SyntaxTree::Find(int startPos, int endPos) const
|
||||
{
|
||||
//cerr << "GetNumWords()=" << GetNumWords() << endl;
|
||||
//assert(phraseSize == GetNumWords() || GetNumWords() == 1); // 1 if syntax sentence doesn't have any xml. TODO fix syntax tree obj
|
||||
|
||||
for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
|
||||
{
|
||||
for (size_t endPos = startPos; endPos <= phraseSize; ++endPos)
|
||||
{
|
||||
const SyntaxNodes &nodes = GetNodes(startPos, endPos);
|
||||
if (!addEverywhere && nodes.size() > 0)
|
||||
{ // only add if no label
|
||||
continue;
|
||||
}
|
||||
AddNode(startPos, endPos, "X");
|
||||
}
|
||||
}
|
||||
Coll::const_iterator iter;
|
||||
iter = m_coll.find(Range(startPos, endPos));
|
||||
return (iter == m_coll.end()) ? m_defaultLabels : iter->second;
|
||||
}
|
||||
|
||||
const SyntaxNodes SyntaxTree::GetNodesForLHS( int startPos, int endPos ) const
|
||||
{
|
||||
SyntaxNodes ret(GetNodes(startPos, endPos));
|
||||
|
||||
if (ret.size() == 0)
|
||||
ret.push_back(&m_defaultLHS);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const SyntaxTree& t)
|
||||
{
|
||||
int size = t.m_index.size();
|
||||
for(size_t length=1; length<=size; length++)
|
||||
{
|
||||
for(size_t space=0; space<length; space++)
|
||||
{
|
||||
os << " ";
|
||||
}
|
||||
for(size_t start=0; start<=size-length; start++)
|
||||
{
|
||||
|
||||
if (t.HasNode( start, start+(length-1) ))
|
||||
{
|
||||
std::string label = t.GetNodes( start, start+(length-1) )[0]->GetLabel() + "#######";
|
||||
|
||||
os << label.substr(0,7) << " ";
|
||||
}
|
||||
else
|
||||
{
|
||||
os << "------- ";
|
||||
}
|
||||
}
|
||||
os << std::endl;
|
||||
}
|
||||
return os;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1,96 +1,32 @@
|
||||
#pragma once
|
||||
|
||||
// $Id: SyntaxTree.h 1960 2008-12-15 12:52:38Z phkoehn $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2009 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
|
||||
class SyntaxNode;
|
||||
class Parameter;
|
||||
|
||||
typedef std::vector<const SyntaxNode*> SyntaxNodes;
|
||||
|
||||
class SyntaxNode {
|
||||
protected:
|
||||
int m_start, m_end;
|
||||
std::string m_label;
|
||||
SyntaxNodes m_children;
|
||||
SyntaxNode* m_parent;
|
||||
class SyntaxTree
|
||||
{
|
||||
public:
|
||||
SyntaxNode( int startPos, int endPos, const std::string &label)
|
||||
:m_start(startPos)
|
||||
,m_end(endPos)
|
||||
,m_label(label)
|
||||
{}
|
||||
int GetStart() const
|
||||
{ return m_start; }
|
||||
int GetEnd() const
|
||||
{ return m_end; }
|
||||
const std::string &GetLabel() const
|
||||
{ return m_label; }
|
||||
bool IsSyntax() const;
|
||||
typedef std::pair<int, int> Range;
|
||||
typedef std::vector<std::string> Labels;
|
||||
typedef std::map<Range, Labels> Coll;
|
||||
|
||||
void Add(int startPos, int endPos, const std::string &label, const Parameter ¶ms);
|
||||
void AddToAll(const std::string &label);
|
||||
|
||||
const Labels &Find(int startPos, int endPos) const;
|
||||
|
||||
void SetHieroLabel(const std::string &label) {
|
||||
m_defaultLabels.push_back(label);
|
||||
}
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
Coll m_coll;
|
||||
Labels m_defaultLabels;
|
||||
};
|
||||
|
||||
|
||||
typedef std::vector< int > SplitPoints;
|
||||
typedef std::vector< SplitPoints > ParentNodes;
|
||||
|
||||
class SyntaxTree {
|
||||
protected:
|
||||
SyntaxNodes m_nodes;
|
||||
SyntaxNode* m_top;
|
||||
SyntaxNode m_defaultLHS;
|
||||
|
||||
typedef std::map< int, SyntaxNodes > SyntaxTreeIndex2;
|
||||
typedef SyntaxTreeIndex2::const_iterator SyntaxTreeIndexIterator2;
|
||||
typedef std::map< int, SyntaxTreeIndex2 > SyntaxTreeIndex;
|
||||
typedef SyntaxTreeIndex::const_iterator SyntaxTreeIndexIterator;
|
||||
SyntaxTreeIndex m_index;
|
||||
SyntaxNodes m_emptyNode;
|
||||
|
||||
friend std::ostream& operator<<(std::ostream&, const SyntaxTree&);
|
||||
|
||||
public:
|
||||
SyntaxTree();
|
||||
~SyntaxTree();
|
||||
|
||||
void AddNode( int startPos, int endPos, std::string label );
|
||||
ParentNodes Parse();
|
||||
bool HasNode( int startPos, int endPos ) const;
|
||||
const SyntaxNodes &GetNodes( int startPos, int endPos ) const;
|
||||
const SyntaxNodes &GetAllNodes() const { return m_nodes; } ;
|
||||
size_t GetNumWords() const { return m_index.size(); }
|
||||
std::string ToString() const;
|
||||
|
||||
void AddDefaultNonTerms(bool isSyntax, bool addEverywhere, size_t phraseSize);
|
||||
void AddDefaultNonTerms(bool mixed, size_t phraseSize);
|
||||
|
||||
void AddDefaultNonTerms(size_t phraseSize);
|
||||
|
||||
const SyntaxNodes GetNodesForLHS( int startPos, int endPos ) const;
|
||||
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream&, const SyntaxTree&);
|
||||
|
||||
|
@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Tunnel.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/01/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "Tunnel.h"
|
||||
|
||||
|
||||
int Tunnel::Compare(const Tunnel &other) const
|
||||
{
|
||||
int ret = m_sourceRange.Compare(other.m_sourceRange);
|
||||
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
ret = m_targetRange.Compare(other.m_targetRange);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int Tunnel::Compare(const Tunnel &other, size_t direction) const
|
||||
{
|
||||
const Range &thisRange = (direction == 0) ? m_sourceRange : m_targetRange;
|
||||
const Range &otherRange = (direction == 0) ? other.m_sourceRange : other.m_targetRange;
|
||||
|
||||
int ret = thisRange.Compare(otherRange);
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const Tunnel &tunnel)
|
||||
{
|
||||
out << tunnel.m_sourceRange << "==>" << tunnel.m_targetRange;
|
||||
return out;
|
||||
}
|
@ -1,49 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
/*
|
||||
* Tunnel.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/01/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include "Range.h"
|
||||
|
||||
// for unaligned source terminal
|
||||
|
||||
class Tunnel
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const Tunnel&);
|
||||
|
||||
protected:
|
||||
|
||||
Range m_sourceRange, m_targetRange;
|
||||
|
||||
public:
|
||||
Tunnel()
|
||||
{}
|
||||
|
||||
Tunnel(const Tunnel ©)
|
||||
:m_sourceRange(copy.m_sourceRange)
|
||||
,m_targetRange(copy.m_targetRange)
|
||||
{}
|
||||
|
||||
Tunnel(const Range &sourceRange, const Range &targetRange)
|
||||
:m_sourceRange(sourceRange)
|
||||
,m_targetRange(targetRange)
|
||||
{}
|
||||
|
||||
const Range &GetRange(size_t direction) const
|
||||
{ return (direction == 0) ? m_sourceRange : m_targetRange; }
|
||||
|
||||
int Compare(const Tunnel &other) const;
|
||||
int Compare(const Tunnel &other, size_t direction) const;
|
||||
};
|
||||
|
||||
typedef std::vector<Tunnel> TunnelList;
|
||||
|
@ -1,70 +0,0 @@
|
||||
/*
|
||||
* TunnelCollection.cpp
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/01/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "TunnelCollection.h"
|
||||
#include "Range.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
size_t TunnelCollection::NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const
|
||||
{
|
||||
assert(startPos <= endPos);
|
||||
|
||||
if (direction == 0)
|
||||
assert(endPos < alignedCountS.size());
|
||||
else
|
||||
assert(endPos < alignedCountT.size());
|
||||
|
||||
size_t ret = 0;
|
||||
for (size_t ind = startPos; ind <= endPos; ++ind)
|
||||
{
|
||||
if (direction == 0 && alignedCountS[ind] == 0)
|
||||
{
|
||||
ret++;
|
||||
}
|
||||
else if (direction == 1 && alignedCountT[ind] == 0)
|
||||
{
|
||||
ret++;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void TunnelCollection::Add(int startS, int endS, int startT, int endT)
|
||||
{
|
||||
// m_phraseExist[startS][endS - startS].push_back(Tunnel(startT, endT));
|
||||
m_coll[startS][endS - startS].push_back(Tunnel(Range(startS, endS), Range(startT, endT)));
|
||||
}
|
||||
|
||||
|
||||
std::ostream& operator<<(std::ostream &out, const TunnelCollection &TunnelCollection)
|
||||
{
|
||||
size_t size = TunnelCollection.GetSize();
|
||||
|
||||
for (size_t startPos = 0; startPos < size; ++startPos)
|
||||
{
|
||||
for (size_t endPos = startPos; endPos < size; ++endPos)
|
||||
{
|
||||
const TunnelList &tunnelList = TunnelCollection.GetTunnels(startPos, endPos);
|
||||
TunnelList::const_iterator iter;
|
||||
for (iter = tunnelList.begin(); iter != tunnelList.end(); ++iter)
|
||||
{
|
||||
const Tunnel &tunnel = *iter;
|
||||
out << tunnel << " ";
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -1,61 +0,0 @@
|
||||
#pragma once
|
||||
/*
|
||||
* TunnelCollection.h
|
||||
* extract
|
||||
*
|
||||
* Created by Hieu Hoang on 19/01/2010.
|
||||
* Copyright 2010 __MyCompanyName__. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include <vector>
|
||||
#include "Tunnel.h"
|
||||
|
||||
// reposity of extracted phrase pairs
|
||||
// which are potential tunnels in larger phrase pairs
|
||||
class TunnelCollection
|
||||
{
|
||||
friend std::ostream& operator<<(std::ostream&, const TunnelCollection&);
|
||||
|
||||
protected:
|
||||
std::vector< std::vector<TunnelList> > m_coll;
|
||||
// indexed by source pos. and source length
|
||||
// maps to list of tunnels where <int, int> are target pos
|
||||
|
||||
public:
|
||||
std::vector<int> alignedCountS, alignedCountT;
|
||||
|
||||
TunnelCollection(const TunnelCollection &);
|
||||
|
||||
TunnelCollection(size_t size)
|
||||
:m_coll(size)
|
||||
{
|
||||
// size is the length of the source sentence
|
||||
for (size_t pos = 0; pos < size; ++pos)
|
||||
{
|
||||
// create empty tunnel lists
|
||||
std::vector<TunnelList> &endVec = m_coll[pos];
|
||||
endVec.resize(size - pos);
|
||||
}
|
||||
}
|
||||
|
||||
void Add(int startS, int endS, int startT, int endT);
|
||||
|
||||
//const TunnelList &GetTargetHoles(int startS, int endS) const
|
||||
//{
|
||||
// const TunnelList &targetHoles = m_phraseExist[startS][endS - startS];
|
||||
// return targetHoles;
|
||||
//}
|
||||
const TunnelList &GetTunnels(int startS, int endS) const
|
||||
{
|
||||
const TunnelList &sourceHoles = m_coll[startS][endS - startS];
|
||||
return sourceHoles;
|
||||
}
|
||||
|
||||
const size_t GetSize() const
|
||||
{ return m_coll.size(); }
|
||||
|
||||
size_t NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const;
|
||||
|
||||
|
||||
};
|
||||
|
56
contrib/other-builds/extract-mixed-syntax/Word.cpp
Normal file
56
contrib/other-builds/extract-mixed-syntax/Word.cpp
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Word.cpp
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
#include <limits>
|
||||
#include "Word.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
Word::Word(int pos, const std::string &str)
|
||||
:m_pos(pos)
|
||||
,m_str(str)
|
||||
{
|
||||
// TODO Auto-generated constructor stub
|
||||
|
||||
}
|
||||
|
||||
Word::~Word() {
|
||||
// TODO Auto-generated destructor stub
|
||||
}
|
||||
|
||||
void Word::AddAlignment(const Word *other)
|
||||
{
|
||||
m_alignment.insert(other);
|
||||
}
|
||||
|
||||
std::set<int> Word::GetAlignmentIndex() const
|
||||
{
|
||||
std::set<int> ret;
|
||||
|
||||
std::set<const Word *>::const_iterator iter;
|
||||
for (iter = m_alignment.begin(); iter != m_alignment.end(); ++iter) {
|
||||
const Word &otherWord = **iter;
|
||||
int otherPos = otherWord.GetPos();
|
||||
ret.insert(otherPos);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void Word::Output(std::ostream &out) const
|
||||
{
|
||||
out << m_str;
|
||||
}
|
||||
|
||||
std::string Word::Debug() const
|
||||
{
|
||||
return m_str;
|
||||
}
|
||||
|
||||
int Word::CompareString(const Word &other) const
|
||||
{
|
||||
return m_str.compare(other.m_str);
|
||||
}
|
47
contrib/other-builds/extract-mixed-syntax/Word.h
Normal file
47
contrib/other-builds/extract-mixed-syntax/Word.h
Normal file
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Word.h
|
||||
*
|
||||
* Created on: 18 Feb 2014
|
||||
* Author: s0565741
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "RuleSymbol.h"
|
||||
|
||||
// a terminal
|
||||
class Word : public RuleSymbol
|
||||
{
|
||||
public:
|
||||
Word(const Word&); // do not implement
|
||||
Word(int pos, const std::string &str);
|
||||
virtual ~Word();
|
||||
|
||||
virtual bool IsNonTerm() const
|
||||
{ return false; }
|
||||
|
||||
std::string GetString() const
|
||||
{ return m_str; }
|
||||
|
||||
int GetPos() const
|
||||
{ return m_pos; }
|
||||
|
||||
void AddAlignment(const Word *other);
|
||||
|
||||
const std::set<const Word *> &GetAlignment() const
|
||||
{ return m_alignment; }
|
||||
|
||||
std::set<int> GetAlignmentIndex() const;
|
||||
|
||||
void Output(std::ostream &out) const;
|
||||
std::string Debug() const;
|
||||
|
||||
int CompareString(const Word &other) const;
|
||||
|
||||
protected:
|
||||
int m_pos; // original position in sentence, NOT in lattice
|
||||
std::string m_str;
|
||||
std::set<const Word *> m_alignment;
|
||||
};
|
||||
|
@ -1,344 +0,0 @@
|
||||
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
|
||||
inline std::vector<std::string> Tokenize(const std::string& str,
|
||||
const std::string& delimiters = " \t")
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
// Skip delimiters at beginning.
|
||||
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
|
||||
// Find first "non-delimiter".
|
||||
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos)
|
||||
{
|
||||
// Found a token, add it to the vector.
|
||||
tokens.push_back(str.substr(lastPos, pos - lastPos));
|
||||
// Skip delimiters. Note the "not_of"
|
||||
lastPos = str.find_first_not_of(delimiters, pos);
|
||||
// Find next "non-delimiter"
|
||||
pos = str.find_first_of(delimiters, lastPos);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r")
|
||||
{
|
||||
std::string res = str;
|
||||
res.erase(str.find_last_not_of(dropChars)+1);
|
||||
return res.erase(0, res.find_first_not_of(dropChars));
|
||||
}
|
||||
|
||||
string ParseXmlTagAttribute(const string& tag,const string& attributeName){
|
||||
/*TODO deal with unescaping \"*/
|
||||
string tagOpen = attributeName + "=\"";
|
||||
size_t contentsStart = tag.find(tagOpen);
|
||||
if (contentsStart == string::npos) return "";
|
||||
contentsStart += tagOpen.size();
|
||||
size_t contentsEnd = tag.find_first_of('"',contentsStart+1);
|
||||
if (contentsEnd == string::npos) {
|
||||
cerr << "Malformed XML attribute: "<< tag;
|
||||
return "";
|
||||
}
|
||||
size_t possibleEnd;
|
||||
while (tag.at(contentsEnd-1) == '\\' && (possibleEnd = tag.find_first_of('"',contentsEnd+1)) != string::npos) {
|
||||
contentsEnd = possibleEnd;
|
||||
}
|
||||
return tag.substr(contentsStart,contentsEnd-contentsStart);
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove "<" and ">" from XML tag
|
||||
*
|
||||
* \param str xml token to be stripped
|
||||
*/
|
||||
string TrimXml(const string& str)
|
||||
{
|
||||
// too short to be xml token -> do nothing
|
||||
if (str.size() < 2) return str;
|
||||
|
||||
// strip first and last character
|
||||
if (str[0] == '<' && str[str.size() - 1] == '>')
|
||||
{
|
||||
return str.substr(1, str.size() - 2);
|
||||
}
|
||||
// not an xml token -> do nothing
|
||||
else { return str; }
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if the token is an XML tag, i.e. starts with "<"
|
||||
*
|
||||
* \param tag token to be checked
|
||||
*/
|
||||
bool isXmlTag(const string& tag)
|
||||
{
|
||||
return tag[0] == '<';
|
||||
}
|
||||
|
||||
/**
|
||||
* Split up the input character string into tokens made up of
|
||||
* either XML tags or text.
|
||||
* example: this <b> is a </b> test .
|
||||
* => (this ), (<b>), ( is a ), (</b>), ( test .)
|
||||
*
|
||||
* \param str input string
|
||||
*/
|
||||
inline vector<string> TokenizeXml(const string& str)
|
||||
{
|
||||
string lbrack = "<";
|
||||
string rbrack = ">";
|
||||
vector<string> tokens; // vector of tokens to be returned
|
||||
string::size_type cpos = 0; // current position in string
|
||||
string::size_type lpos = 0; // left start of xml tag
|
||||
string::size_type rpos = 0; // right end of xml tag
|
||||
|
||||
// walk thorugh the string (loop vver cpos)
|
||||
while (cpos != str.size())
|
||||
{
|
||||
// find the next opening "<" of an xml tag
|
||||
lpos = str.find_first_of(lbrack, cpos);
|
||||
if (lpos != string::npos)
|
||||
{
|
||||
// find the end of the xml tag
|
||||
rpos = str.find_first_of(rbrack, lpos);
|
||||
// sanity check: there has to be closing ">"
|
||||
if (rpos == string::npos)
|
||||
{
|
||||
cerr << "ERROR: malformed XML: " << str << endl;
|
||||
return tokens;
|
||||
}
|
||||
}
|
||||
else // no more tags found
|
||||
{
|
||||
// add the rest as token
|
||||
tokens.push_back(str.substr(cpos));
|
||||
break;
|
||||
}
|
||||
|
||||
// add stuff before xml tag as token, if there is any
|
||||
if (lpos - cpos > 0)
|
||||
tokens.push_back(str.substr(cpos, lpos - cpos));
|
||||
|
||||
// add xml tag as token
|
||||
tokens.push_back(str.substr(lpos, rpos-lpos+1));
|
||||
cpos = rpos + 1;
|
||||
}
|
||||
return tokens;
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a sentence with xml annotation
|
||||
* Xml tags may specifiy additional/replacing translation options
|
||||
* and reordering constraints
|
||||
*
|
||||
* \param line in: sentence, out: sentence without the xml
|
||||
* \param res vector with translation options specified by xml
|
||||
* \param reorderingConstraint reordering constraint zones specified by xml
|
||||
* \param walls reordering constraint walls specified by xml
|
||||
*/
|
||||
/*TODO: we'd only have to return a vector of XML options if we dropped linking. 2-d vector
|
||||
is so we can link things up afterwards. We can't create TranslationOptions as we
|
||||
parse because we don't have the completed source parsed until after this function
|
||||
removes all the markup from it (CreateFromString in Sentence::Read).
|
||||
*/
|
||||
bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &labelCollection, map< string, int > &topLabelCollection ) {
|
||||
//parse XML markup in translation line
|
||||
|
||||
// no xml tag? we're done.
|
||||
if (line.find_first_of('<') == string::npos) { return true; }
|
||||
|
||||
// break up input into a vector of xml tags and text
|
||||
// example: (this), (<b>), (is a), (</b>), (test .)
|
||||
vector<string> xmlTokens = TokenizeXml(line);
|
||||
|
||||
// we need to store opened tags, until they are closed
|
||||
// tags are stored as tripled (tagname, startpos, contents)
|
||||
typedef pair< string, pair< size_t, string > > OpenedTag;
|
||||
vector< OpenedTag > tagStack; // stack that contains active opened tags
|
||||
|
||||
string cleanLine; // return string (text without xml)
|
||||
size_t wordPos = 0; // position in sentence (in terms of number of words)
|
||||
bool isLinked = false;
|
||||
|
||||
// loop through the tokens
|
||||
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++)
|
||||
{
|
||||
// not a xml tag, but regular text (may contain many words)
|
||||
if(!isXmlTag(xmlTokens[xmlTokenPos]))
|
||||
{
|
||||
// add a space at boundary, if necessary
|
||||
if (cleanLine.size()>0 &&
|
||||
cleanLine[cleanLine.size() - 1] != ' ' &&
|
||||
xmlTokens[xmlTokenPos][0] != ' ')
|
||||
{
|
||||
cleanLine += " ";
|
||||
}
|
||||
cleanLine += xmlTokens[xmlTokenPos]; // add to output
|
||||
wordPos = Tokenize(cleanLine).size(); // count all the words
|
||||
}
|
||||
|
||||
// process xml tag
|
||||
else
|
||||
{
|
||||
// *** get essential information about tag ***
|
||||
|
||||
// strip extra boundary spaces and "<" and ">"
|
||||
string tag = Trim(TrimXml(xmlTokens[xmlTokenPos]));
|
||||
// cerr << "XML TAG IS: " << tag << std::endl;
|
||||
|
||||
if (tag.size() == 0)
|
||||
{
|
||||
cerr << "ERROR: empty tag name: " << line << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// check if unary (e.g., "<wall/>")
|
||||
bool isUnary = ( tag[tag.size() - 1] == '/' );
|
||||
|
||||
// check if opening tag (e.g. "<a>", not "</a>")g
|
||||
bool isClosed = ( tag[0] == '/' );
|
||||
bool isOpen = !isClosed;
|
||||
|
||||
if (isClosed && isUnary)
|
||||
{
|
||||
cerr << "ERROR: can't have both closed and unary tag <" << tag << ">: " << line << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (isClosed)
|
||||
tag = tag.substr(1); // remove "/" at the beginning
|
||||
if (isUnary)
|
||||
tag = tag.substr(0,tag.size()-1); // remove "/" at the end
|
||||
|
||||
// find the tag name and contents
|
||||
string::size_type endOfName = tag.find_first_of(' ');
|
||||
string tagName = tag;
|
||||
string tagContent = "";
|
||||
if (endOfName != string::npos) {
|
||||
tagName = tag.substr(0,endOfName);
|
||||
tagContent = tag.substr(endOfName+1);
|
||||
}
|
||||
|
||||
// *** process new tag ***
|
||||
|
||||
if (isOpen || isUnary)
|
||||
{
|
||||
// put the tag on the tag stack
|
||||
OpenedTag openedTag = make_pair( tagName, make_pair( wordPos, tagContent ) );
|
||||
tagStack.push_back( openedTag );
|
||||
// cerr << "XML TAG " << tagName << " (" << tagContent << ") added to stack, now size " << tagStack.size() << endl;
|
||||
}
|
||||
|
||||
// *** process completed tag ***
|
||||
|
||||
if (isClosed || isUnary)
|
||||
{
|
||||
// pop last opened tag from stack;
|
||||
if (tagStack.size() == 0)
|
||||
{
|
||||
cerr << "ERROR: tag " << tagName << " closed, but not opened" << ":" << line << endl;
|
||||
return false;
|
||||
}
|
||||
OpenedTag openedTag = tagStack.back();
|
||||
tagStack.pop_back();
|
||||
|
||||
// tag names have to match
|
||||
if (openedTag.first != tagName)
|
||||
{
|
||||
cerr << "ERROR: tag " << openedTag.first << " closed by tag " << tagName << ": " << line << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// assemble remaining information about tag
|
||||
size_t startPos = openedTag.second.first;
|
||||
string tagContent = openedTag.second.second;
|
||||
size_t endPos = wordPos;
|
||||
|
||||
// span attribute overwrites position
|
||||
string span = ParseXmlTagAttribute(tagContent,"span");
|
||||
if (! span.empty())
|
||||
{
|
||||
vector<string> ij = Tokenize(span, "-");
|
||||
if (ij.size() != 1 && ij.size() != 2) {
|
||||
cerr << "ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl;
|
||||
return false;
|
||||
}
|
||||
startPos = atoi(ij[0].c_str());
|
||||
if (ij.size() == 1) endPos = startPos + 1;
|
||||
else endPos = atoi(ij[1].c_str()) + 1;
|
||||
}
|
||||
|
||||
// cerr << "XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl;
|
||||
|
||||
if (startPos >= endPos)
|
||||
{
|
||||
cerr << "ERROR: tag " << tagName << " must span at least one word (" << startPos << "-" << endPos << "): " << line << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
string label = ParseXmlTagAttribute(tagContent,"label");
|
||||
labelCollection.insert( label );
|
||||
|
||||
// report what we have processed so far
|
||||
if (0) {
|
||||
cerr << "XML TAG NAME IS: '" << tagName << "'" << endl;
|
||||
cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
|
||||
cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
|
||||
}
|
||||
tree.AddNode( startPos, endPos-1, label );
|
||||
}
|
||||
}
|
||||
}
|
||||
// we are done. check if there are tags that are still open
|
||||
if (tagStack.size() > 0)
|
||||
{
|
||||
cerr << "ERROR: some opened tags were never closed: " << line << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
// collect top labels
|
||||
const SyntaxNodes &topNodes = tree.GetNodes( 0, wordPos-1 );
|
||||
for( SyntaxNodes::const_iterator node = topNodes.begin(); node != topNodes.end(); node++ )
|
||||
{
|
||||
const SyntaxNode *n = *node;
|
||||
const string &label = n->GetLabel();
|
||||
if (topLabelCollection.find( label ) == topLabelCollection.end())
|
||||
topLabelCollection[ label ] = 0;
|
||||
topLabelCollection[ label ]++;
|
||||
}
|
||||
|
||||
// return de-xml'ed sentence in line
|
||||
line = cleanLine;
|
||||
return true;
|
||||
}
|
@ -1,35 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include "SyntaxTree.h"
|
||||
|
||||
std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
|
||||
std::string TrimXml(const std::string& str);
|
||||
bool isXmlTag(const std::string& tag);
|
||||
inline std::vector<std::string> TokenizeXml(const std::string& str);
|
||||
bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection );
|
@ -1,310 +0,0 @@
|
||||
// $Id: extract.cpp 2828 2010-02-01 16:07:58Z hieuhoang1972 $
|
||||
// vim:tabstop=2
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2009 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <cstdio>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <time.h>
|
||||
#include <cstring>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#include "extract.h"
|
||||
#include "InputFileStream.h"
|
||||
#include "OutputFileStream.h"
|
||||
#include "Lattice.h"
|
||||
|
||||
#ifdef WIN32
|
||||
// Include Visual Leak Detector
|
||||
#include <vld.h>
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
|
||||
void writeGlueGrammar(const string &, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection);
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
cerr << "Extract v2.0, written by Philipp Koehn\n"
|
||||
<< "rule extraction from an aligned parallel corpus\n";
|
||||
//time_t starttime = time(NULL);
|
||||
|
||||
Global *global = new Global();
|
||||
g_global = global;
|
||||
int sentenceOffset = 0;
|
||||
|
||||
if (argc < 5) {
|
||||
cerr << "syntax: extract-mixed-syntax corpus.target corpus.source corpus.align extract "
|
||||
<< " [ --Hierarchical | --Orientation"
|
||||
<< " | --GlueGrammar FILE | --UnknownWordLabel FILE"
|
||||
<< " | --OnlyDirect"
|
||||
|
||||
<< " | --MinHoleSpanSourceDefault[" << global->minHoleSpanSourceDefault << "]"
|
||||
<< " | --MaxHoleSpanSourceDefault[" << global->maxHoleSpanSourceDefault << "]"
|
||||
<< " | --MinHoleSpanSourceSyntax[" << global->minHoleSpanSourceSyntax << "]"
|
||||
<< " | --MaxHoleSpanSourceSyntax[" << global->maxHoleSpanSourceSyntax << "]"
|
||||
|
||||
<< " | --MaxSymbols[" << global->maxSymbols<< "]"
|
||||
<< " | --MaxNonTerm[" << global->maxNonTerm << "]"
|
||||
<< " | --SourceSyntax | --TargetSyntax"
|
||||
<< " | --UppermostOnly[" << g_global->uppermostOnly << "]"
|
||||
<< endl;
|
||||
exit(1);
|
||||
}
|
||||
char* &fileNameT = argv[1];
|
||||
char* &fileNameS = argv[2];
|
||||
char* &fileNameA = argv[3];
|
||||
string fileNameGlueGrammar;
|
||||
string fileNameUnknownWordLabel;
|
||||
string fileNameExtract = string(argv[4]);
|
||||
|
||||
int optionInd = 5;
|
||||
|
||||
for(int i=optionInd;i<argc;i++)
|
||||
{
|
||||
if (strcmp(argv[i],"--MinHoleSpanSourceDefault") == 0) {
|
||||
global->minHoleSpanSourceDefault = atoi(argv[++i]);
|
||||
if (global->minHoleSpanSourceDefault < 1) {
|
||||
cerr << "extract error: --minHoleSourceDefault should be at least 1" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else if (strcmp(argv[i],"--MaxHoleSpanSourceDefault") == 0) {
|
||||
global->maxHoleSpanSourceDefault = atoi(argv[++i]);
|
||||
if (global->maxHoleSpanSourceDefault < 1) {
|
||||
cerr << "extract error: --maxHoleSourceDefault should be at least 1" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else if (strcmp(argv[i],"--MinHoleSpanSourceSyntax") == 0) {
|
||||
global->minHoleSpanSourceSyntax = atoi(argv[++i]);
|
||||
if (global->minHoleSpanSourceSyntax < 1) {
|
||||
cerr << "extract error: --minHoleSourceSyntax should be at least 1" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
else if (strcmp(argv[i],"--UppermostOnly") == 0) {
|
||||
global->uppermostOnly = atoi(argv[++i]);
|
||||
}
|
||||
else if (strcmp(argv[i],"--MaxHoleSpanSourceSyntax") == 0) {
|
||||
global->maxHoleSpanSourceSyntax = atoi(argv[++i]);
|
||||
if (global->maxHoleSpanSourceSyntax < 1) {
|
||||
cerr << "extract error: --maxHoleSourceSyntax should be at least 1" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// maximum number of words in hierarchical phrase
|
||||
else if (strcmp(argv[i],"--maxSymbols") == 0) {
|
||||
global->maxSymbols = atoi(argv[++i]);
|
||||
if (global->maxSymbols < 1) {
|
||||
cerr << "extract error: --maxSymbols should be at least 1" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// maximum number of non-terminals
|
||||
else if (strcmp(argv[i],"--MaxNonTerm") == 0) {
|
||||
global->maxNonTerm = atoi(argv[++i]);
|
||||
if (global->maxNonTerm < 1) {
|
||||
cerr << "extract error: --MaxNonTerm should be at least 1" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
// allow consecutive non-terminals (X Y | X Y)
|
||||
else if (strcmp(argv[i],"--TargetSyntax") == 0) {
|
||||
global->targetSyntax = true;
|
||||
}
|
||||
else if (strcmp(argv[i],"--SourceSyntax") == 0) {
|
||||
global->sourceSyntax = true;
|
||||
}
|
||||
// do not create many part00xx files!
|
||||
else if (strcmp(argv[i],"--NoFileLimit") == 0) {
|
||||
// now default
|
||||
}
|
||||
else if (strcmp(argv[i],"--GlueGrammar") == 0) {
|
||||
global->glueGrammarFlag = true;
|
||||
if (++i >= argc)
|
||||
{
|
||||
cerr << "ERROR: Option --GlueGrammar requires a file name" << endl;
|
||||
exit(0);
|
||||
}
|
||||
fileNameGlueGrammar = string(argv[i]);
|
||||
cerr << "creating glue grammar in '" << fileNameGlueGrammar << "'" << endl;
|
||||
}
|
||||
else if (strcmp(argv[i],"--UnknownWordLabel") == 0) {
|
||||
global->unknownWordLabelFlag = true;
|
||||
if (++i >= argc)
|
||||
{
|
||||
cerr << "ERROR: Option --UnknownWordLabel requires a file name" << endl;
|
||||
exit(0);
|
||||
}
|
||||
fileNameUnknownWordLabel = string(argv[i]);
|
||||
cerr << "creating unknown word labels in '" << fileNameUnknownWordLabel << "'" << endl;
|
||||
}
|
||||
// TODO: this should be a useful option
|
||||
//else if (strcmp(argv[i],"--ZipFiles") == 0) {
|
||||
// zipFiles = true;
|
||||
//}
|
||||
// if an source phrase is paired with two target phrases, then count(t|s) = 0.5
|
||||
else if (strcmp(argv[i],"--Mixed") == 0) {
|
||||
global->mixed = true;
|
||||
}
|
||||
else if (strcmp(argv[i],"--AllowDefaultNonTermEdge") == 0) {
|
||||
global->allowDefaultNonTermEdge = atoi(argv[++i]);
|
||||
}
|
||||
else if (strcmp(argv[i], "--GZOutput") == 0) {
|
||||
global->gzOutput = true;
|
||||
}
|
||||
else if (strcmp(argv[i],"--MaxSpan") == 0) {
|
||||
// ignore
|
||||
++i;
|
||||
}
|
||||
else if (strcmp(argv[i],"--SentenceOffset") == 0) {
|
||||
if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
|
||||
cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
|
||||
exit(1);
|
||||
}
|
||||
sentenceOffset = atoi(argv[++i]);
|
||||
}
|
||||
else {
|
||||
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// open input files
|
||||
Moses::InputFileStream tFile(fileNameT);
|
||||
Moses::InputFileStream sFile(fileNameS);
|
||||
Moses::InputFileStream aFile(fileNameA);
|
||||
|
||||
// open output files
|
||||
string fileNameExtractInv = fileNameExtract + ".inv";
|
||||
if (global->gzOutput) {
|
||||
fileNameExtract += ".gz";
|
||||
fileNameExtractInv += ".gz";
|
||||
}
|
||||
|
||||
Moses::OutputFileStream extractFile;
|
||||
Moses::OutputFileStream extractFileInv;
|
||||
extractFile.Open(fileNameExtract.c_str());
|
||||
extractFileInv.Open(fileNameExtractInv.c_str());
|
||||
|
||||
|
||||
// loop through all sentence pairs
|
||||
int i = sentenceOffset;
|
||||
while(true) {
|
||||
i++;
|
||||
|
||||
if (i % 1000 == 0) {
|
||||
cerr << i << " " << flush;
|
||||
}
|
||||
|
||||
string targetString;
|
||||
string sourceString;
|
||||
string alignmentString;
|
||||
|
||||
bool ok = getline(tFile, targetString);
|
||||
if (!ok)
|
||||
break;
|
||||
getline(sFile, sourceString);
|
||||
getline(aFile, alignmentString);
|
||||
|
||||
//cerr << endl << targetString << endl << sourceString << endl << alignmentString << endl;
|
||||
|
||||
//time_t currTime = time(NULL);
|
||||
//cerr << "A " << (currTime - starttime) << endl;
|
||||
|
||||
SentenceAlignment sentencePair;
|
||||
if (sentencePair.Create( targetString, sourceString, alignmentString, i, *global ))
|
||||
{
|
||||
//cerr << sentence.sourceTree << endl;
|
||||
//cerr << sentence.targetTree << endl;
|
||||
|
||||
sentencePair.FindTunnels(*g_global);
|
||||
//cerr << "C " << (time(NULL) - starttime) << endl;
|
||||
//cerr << sentencePair << endl;
|
||||
|
||||
sentencePair.CreateLattice(*g_global);
|
||||
//cerr << "D " << (time(NULL) - starttime) << endl;
|
||||
//cerr << sentencePair << endl;
|
||||
|
||||
sentencePair.CreateRules(*g_global);
|
||||
//cerr << "E " << (time(NULL) - starttime) << endl;
|
||||
|
||||
//cerr << sentence.lattice->GetRules().GetSize() << endl;
|
||||
sentencePair.GetLattice().GetRules().Output(extractFile);
|
||||
sentencePair.GetLattice().GetRules().OutputInv(extractFileInv);
|
||||
}
|
||||
}
|
||||
|
||||
tFile.Close();
|
||||
sFile.Close();
|
||||
aFile.Close();
|
||||
|
||||
extractFile.Close();
|
||||
extractFileInv.Close();
|
||||
|
||||
if (global->glueGrammarFlag) {
|
||||
writeGlueGrammar(fileNameGlueGrammar, *global, targetLabelCollection, targetTopLabelCollection);
|
||||
}
|
||||
|
||||
delete global;
|
||||
}
|
||||
|
||||
|
||||
void writeGlueGrammar( const string & fileName, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection )
|
||||
{
|
||||
ofstream grammarFile;
|
||||
grammarFile.open(fileName.c_str());
|
||||
if (!options.targetSyntax) {
|
||||
grammarFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
|
||||
<< "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
|
||||
<< "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
|
||||
} else {
|
||||
// chose a top label that is not already a label
|
||||
string topLabel = "QQQQQQ";
|
||||
for( unsigned int i=1; i<=topLabel.length(); i++) {
|
||||
if(targetLabelCollection.find( topLabel.substr(0,i) ) == targetLabelCollection.end() ) {
|
||||
topLabel = topLabel.substr(0,i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// basic rules
|
||||
grammarFile << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| " << endl
|
||||
<< "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 " << endl;
|
||||
|
||||
// top rules
|
||||
for( map<string,int>::const_iterator i = targetTopLabelCollection.begin();
|
||||
i != targetTopLabelCollection.end(); i++ ) {
|
||||
grammarFile << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1" << endl;
|
||||
}
|
||||
|
||||
// glue rules
|
||||
for( set<string>::const_iterator i = targetLabelCollection.begin();
|
||||
i != targetLabelCollection.end(); i++ ) {
|
||||
grammarFile << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1" << endl;
|
||||
}
|
||||
grammarFile << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 " << endl; // glue rule for unknown word...
|
||||
}
|
||||
grammarFile.close();
|
||||
}
|
||||
|
@ -1,34 +0,0 @@
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include "SyntaxTree.h"
|
||||
#include "XmlTree.h"
|
||||
#include "Tunnel.h"
|
||||
#include "TunnelCollection.h"
|
||||
#include "SentenceAlignment.h"
|
||||
#include "Global.h"
|
||||
|
||||
std::vector<std::string> tokenize( const char [] );
|
||||
|
||||
#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
|
||||
_IS.getline(_LINE, _SIZE, _DELIM); \
|
||||
if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
|
||||
if (_IS.gcount() == _SIZE-1) { \
|
||||
cerr << "Line too long! Buffer overflow. Delete lines >=" \
|
||||
<< _SIZE << " chars or raise LINE_MAX_LENGTH in phrase-extract/extract.cpp" \
|
||||
<< endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
#define LINE_MAX_LENGTH 1000000
|
||||
|
||||
const Global *g_global;
|
||||
|
||||
std::set< std::string > targetLabelCollection, sourceLabelCollection;
|
||||
std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;
|
27
contrib/other-builds/extract-mixed-syntax/filter-by-source-word-count.perl
Executable file
27
contrib/other-builds/extract-mixed-syntax/filter-by-source-word-count.perl
Executable file
@ -0,0 +1,27 @@
|
||||
#!/usr/bin/perl
|
||||
|
||||
use strict;
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
binmode(STDERR, ":utf8");
|
||||
|
||||
my $maxNumWords = $ARGV[0];
|
||||
|
||||
while (my $line = <STDIN>) {
|
||||
chomp($line);
|
||||
my @toks = split(/ /,$line);
|
||||
|
||||
my $numSourceWords = 0;
|
||||
my $tok = $toks[$numSourceWords];
|
||||
while ($tok ne "|||") {
|
||||
++$numSourceWords;
|
||||
$tok = $toks[$numSourceWords];
|
||||
}
|
||||
|
||||
if ($numSourceWords <= $maxNumWords) {
|
||||
print "$line\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
33
contrib/other-builds/extract-mixed-syntax/learnable/equal.perl
Executable file
33
contrib/other-builds/extract-mixed-syntax/learnable/equal.perl
Executable file
@ -0,0 +1,33 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
sub trim($);
|
||||
|
||||
my $file1 = $ARGV[0];
|
||||
my $file2 = $ARGV[1];
|
||||
|
||||
open (FILE1, $file1);
|
||||
open (FILE2, $file2);
|
||||
|
||||
my $countEqual = 0;
|
||||
while (my $line1 = <FILE1>) {
|
||||
my $line2 = <FILE2>;
|
||||
if (trim($line1) eq trim($line2)) {
|
||||
++$countEqual;
|
||||
}
|
||||
}
|
||||
|
||||
print $countEqual ."\n";
|
||||
|
||||
|
||||
######################
|
||||
# Perl trim function to remove whitespace from the start and end of the string
|
||||
sub trim($) {
|
||||
my $string = shift;
|
||||
$string =~ s/^\s+//;
|
||||
$string =~ s/\s+$//;
|
||||
return $string;
|
||||
}
|
||||
|
||||
|
29
contrib/other-builds/extract-mixed-syntax/learnable/get-by-line-number.perl
Executable file
29
contrib/other-builds/extract-mixed-syntax/learnable/get-by-line-number.perl
Executable file
@ -0,0 +1,29 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
binmode(STDIN, ":utf8");
|
||||
binmode(STDOUT, ":utf8");
|
||||
binmode(STDERR, ":utf8");
|
||||
|
||||
my $fileLineNum = $ARGV[0];
|
||||
open (FILE_LINE_NUM, $fileLineNum);
|
||||
|
||||
my $nextLineNum = <FILE_LINE_NUM>;
|
||||
|
||||
my $lineNum = 1;
|
||||
while (my $line = <STDIN>) {
|
||||
if (defined($nextLineNum) && $lineNum == $nextLineNum) {
|
||||
# matches. output line
|
||||
chomp($line);
|
||||
print "$line\n";
|
||||
|
||||
# next line number
|
||||
$nextLineNum = <FILE_LINE_NUM>;
|
||||
}
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
|
||||
|
108
contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl
Executable file
108
contrib/other-builds/extract-mixed-syntax/learnable/learnable.perl
Executable file
@ -0,0 +1,108 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
my $isHiero = $ARGV[1];
|
||||
my $decoderExec = $ARGV[2];
|
||||
my $extractExec = $ARGV[3];
|
||||
my $tmpName = $ARGV[4];
|
||||
|
||||
my $WORK_DIR = `pwd`;
|
||||
chomp($WORK_DIR);
|
||||
|
||||
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu";
|
||||
|
||||
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
|
||||
$extractExec = "$MOSES_DIR/bin/$extractExec";
|
||||
|
||||
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
|
||||
if($SPLIT_EXEC) {
|
||||
$SPLIT_EXEC = 'gsplit';
|
||||
}
|
||||
else {
|
||||
$SPLIT_EXEC = 'split';
|
||||
}
|
||||
|
||||
my $SORT_EXEC = `gsort --help 2>/dev/null`;
|
||||
if($SORT_EXEC) {
|
||||
$SORT_EXEC = 'gsort';
|
||||
}
|
||||
else {
|
||||
$SORT_EXEC = 'sort';
|
||||
}
|
||||
|
||||
|
||||
my $hieroFlag = "";
|
||||
if ($isHiero == 1) {
|
||||
$hieroFlag = "--Hierarchical";
|
||||
}
|
||||
|
||||
print STDERR "WORK_DIR=$WORK_DIR \n";
|
||||
|
||||
my $cmd;
|
||||
|
||||
open (SOURCE, "source");
|
||||
open (TARGET, "target");
|
||||
open (ALIGNMENT, "alignment");
|
||||
|
||||
my $lineNum = 0;
|
||||
my ($source, $target, $alignment);
|
||||
while ($source = <SOURCE>) {
|
||||
chomp($source);
|
||||
$target = <TARGET>; chomp($target);
|
||||
$alignment = <ALIGNMENT>; chomp($alignment);
|
||||
|
||||
#print STDERR "$source ||| $target ||| $alignment \n";
|
||||
|
||||
# write out 1 line
|
||||
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
|
||||
`mkdir -p $tmpDir`;
|
||||
|
||||
open (SOURCE1, ">$tmpDir/source");
|
||||
open (TARGET1, ">$tmpDir/target");
|
||||
open (ALIGNMENT1, ">$tmpDir/alignment");
|
||||
|
||||
print SOURCE1 "$source\n";
|
||||
print TARGET1 "$target\n";
|
||||
print ALIGNMENT1 "$alignment\n";
|
||||
|
||||
close (SOURCE1);
|
||||
close (TARGET1);
|
||||
close (ALIGNMENT1);
|
||||
|
||||
# train
|
||||
if ($isHiero == 1) {
|
||||
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract --GZOutput";
|
||||
}
|
||||
else {
|
||||
# pb
|
||||
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract 7 --GZOutput";
|
||||
}
|
||||
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
|
||||
`$cmd`;
|
||||
|
||||
# decode
|
||||
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source -feature-add \"ConstrainedDecoding path=$tmpDir/target\"";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
# `rm -rf $tmpDir`;
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
close(SOURCE);
|
||||
close(TARGET);
|
||||
close(ALIGNMENT);
|
||||
|
151
contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl
Executable file
151
contrib/other-builds/extract-mixed-syntax/learnable/num-deriv.perl
Executable file
@ -0,0 +1,151 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
sub Write1Line;
|
||||
sub WriteCorpus1Holdout;
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
my $isHiero = $ARGV[1];
|
||||
my $decoderExec = $ARGV[2];
|
||||
my $extractExec = $ARGV[3];
|
||||
my $tmpName = $ARGV[4];
|
||||
my $startLine = $ARGV[5];
|
||||
my $endLine = $ARGV[6];
|
||||
|
||||
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
|
||||
|
||||
my $WORK_DIR = `pwd`;
|
||||
chomp($WORK_DIR);
|
||||
|
||||
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
|
||||
|
||||
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
|
||||
$extractExec = "$MOSES_DIR/bin/$extractExec";
|
||||
|
||||
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
|
||||
if($SPLIT_EXEC) {
|
||||
$SPLIT_EXEC = 'gsplit';
|
||||
}
|
||||
else {
|
||||
$SPLIT_EXEC = 'split';
|
||||
}
|
||||
|
||||
my $SORT_EXEC = `gsort --help 2>/dev/null`;
|
||||
if($SORT_EXEC) {
|
||||
$SORT_EXEC = 'gsort';
|
||||
}
|
||||
else {
|
||||
$SORT_EXEC = 'sort';
|
||||
}
|
||||
|
||||
|
||||
my $hieroFlag = "";
|
||||
if ($isHiero == 1) {
|
||||
$hieroFlag = "--Hierarchical";
|
||||
}
|
||||
|
||||
print STDERR "WORK_DIR=$WORK_DIR \n";
|
||||
|
||||
my $cmd;
|
||||
|
||||
open (SOURCE, "source");
|
||||
open (TARGET, "target");
|
||||
open (ALIGNMENT, "alignment");
|
||||
|
||||
my $numLines = `cat source | wc -l`;
|
||||
|
||||
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
|
||||
my $source = <SOURCE>; chomp($source);
|
||||
my $target = <TARGET>; chomp($target);
|
||||
my $alignment = <ALIGNMENT>; chomp($alignment);
|
||||
|
||||
if ($lineNum < $startLine || $lineNum >= $endLine) {
|
||||
next;
|
||||
}
|
||||
|
||||
#print STDERR "$source ||| $target ||| $alignment \n";
|
||||
# write out 1 line
|
||||
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
|
||||
`mkdir -p $tmpDir`;
|
||||
|
||||
Write1Line($source, $tmpDir, "source.1");
|
||||
Write1Line($target, $tmpDir, "target.1");
|
||||
Write1Line($alignment, $tmpDir, "alignment.1");
|
||||
|
||||
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
|
||||
|
||||
# train
|
||||
if ($isHiero == 1) {
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
|
||||
}
|
||||
else {
|
||||
# pb
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
|
||||
}
|
||||
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
|
||||
`$cmd`;
|
||||
|
||||
# decode
|
||||
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -n-best-list $tmpDir/nbest 10000 distinct -v 2";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
# count the number of translation in nbest list
|
||||
$cmd = "wc -l $tmpDir/nbest >> out";
|
||||
`$cmd`;
|
||||
|
||||
`rm -rf $tmpDir`;
|
||||
}
|
||||
|
||||
close(SOURCE);
|
||||
close(TARGET);
|
||||
close(ALIGNMENT);
|
||||
|
||||
|
||||
######################
|
||||
sub Write1Line
|
||||
{
|
||||
my ($line, $tmpDir, $fileName) = @_;
|
||||
|
||||
open (HANDLE, ">$tmpDir/$fileName");
|
||||
print HANDLE "$line\n";
|
||||
close (HANDLE);
|
||||
}
|
||||
|
||||
sub WriteCorpus1Holdout
|
||||
{
|
||||
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
|
||||
|
||||
open (INFILE, "$inFilePath");
|
||||
open (OUTFILE, ">$tmpDir/$outFileName");
|
||||
|
||||
my $lineNum = 0;
|
||||
while (my $line = <INFILE>) {
|
||||
chomp($line);
|
||||
|
||||
if ($lineNum != $holdoutLineNum) {
|
||||
print OUTFILE "$line\n";
|
||||
}
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
close (OUTFILE);
|
||||
close(INFILE);
|
||||
|
||||
}
|
||||
|
||||
|
147
contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl
Executable file
147
contrib/other-builds/extract-mixed-syntax/learnable/reachable.perl
Executable file
@ -0,0 +1,147 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
sub Write1Line;
|
||||
sub WriteCorpus1Holdout;
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
my $isHiero = $ARGV[1];
|
||||
my $decoderExec = $ARGV[2];
|
||||
my $extractExec = $ARGV[3];
|
||||
my $tmpName = $ARGV[4];
|
||||
my $startLine = $ARGV[5];
|
||||
my $endLine = $ARGV[6];
|
||||
|
||||
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
|
||||
|
||||
my $WORK_DIR = `pwd`;
|
||||
chomp($WORK_DIR);
|
||||
|
||||
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
|
||||
|
||||
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
|
||||
$extractExec = "$MOSES_DIR/bin/$extractExec";
|
||||
|
||||
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
|
||||
if($SPLIT_EXEC) {
|
||||
$SPLIT_EXEC = 'gsplit';
|
||||
}
|
||||
else {
|
||||
$SPLIT_EXEC = 'split';
|
||||
}
|
||||
|
||||
my $SORT_EXEC = `gsort --help 2>/dev/null`;
|
||||
if($SORT_EXEC) {
|
||||
$SORT_EXEC = 'gsort';
|
||||
}
|
||||
else {
|
||||
$SORT_EXEC = 'sort';
|
||||
}
|
||||
|
||||
|
||||
my $hieroFlag = "";
|
||||
if ($isHiero == 1) {
|
||||
$hieroFlag = "--Hierarchical";
|
||||
}
|
||||
|
||||
print STDERR "WORK_DIR=$WORK_DIR \n";
|
||||
|
||||
my $cmd;
|
||||
|
||||
open (SOURCE, "source");
|
||||
open (TARGET, "target");
|
||||
open (ALIGNMENT, "alignment");
|
||||
|
||||
my $numLines = `cat source | wc -l`;
|
||||
|
||||
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
|
||||
my $source = <SOURCE>; chomp($source);
|
||||
my $target = <TARGET>; chomp($target);
|
||||
my $alignment = <ALIGNMENT>; chomp($alignment);
|
||||
|
||||
if ($lineNum < $startLine || $lineNum >= $endLine) {
|
||||
next;
|
||||
}
|
||||
|
||||
#print STDERR "$source ||| $target ||| $alignment \n";
|
||||
# write out 1 line
|
||||
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
|
||||
`mkdir -p $tmpDir`;
|
||||
|
||||
Write1Line($source, $tmpDir, "source.1");
|
||||
Write1Line($target, $tmpDir, "target.1");
|
||||
Write1Line($alignment, $tmpDir, "alignment.1");
|
||||
|
||||
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
|
||||
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
|
||||
|
||||
# train
|
||||
if ($isHiero == 1) {
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
|
||||
}
|
||||
else {
|
||||
# pb
|
||||
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
|
||||
}
|
||||
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
|
||||
`$cmd`;
|
||||
|
||||
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
|
||||
`$cmd`;
|
||||
|
||||
# decode
|
||||
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -feature-add \"ConstrainedDecoding path=$tmpDir/target.1\" -v 2";
|
||||
print STDERR "Executing: $cmd\n";
|
||||
`$cmd`;
|
||||
|
||||
`rm -rf $tmpDir`;
|
||||
}
|
||||
|
||||
close(SOURCE);
|
||||
close(TARGET);
|
||||
close(ALIGNMENT);
|
||||
|
||||
|
||||
######################
|
||||
sub Write1Line
|
||||
{
|
||||
my ($line, $tmpDir, $fileName) = @_;
|
||||
|
||||
open (HANDLE, ">$tmpDir/$fileName");
|
||||
print HANDLE "$line\n";
|
||||
close (HANDLE);
|
||||
}
|
||||
|
||||
sub WriteCorpus1Holdout
|
||||
{
|
||||
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
|
||||
|
||||
open (INFILE, "$inFilePath");
|
||||
open (OUTFILE, ">$tmpDir/$outFileName");
|
||||
|
||||
my $lineNum = 0;
|
||||
while (my $line = <INFILE>) {
|
||||
chomp($line);
|
||||
|
||||
if ($lineNum != $holdoutLineNum) {
|
||||
print OUTFILE "$line\n";
|
||||
}
|
||||
|
||||
++$lineNum;
|
||||
}
|
||||
|
||||
close (OUTFILE);
|
||||
close(INFILE);
|
||||
|
||||
}
|
||||
|
||||
|
17
contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl
Executable file
17
contrib/other-builds/extract-mixed-syntax/learnable/run-parallel.perl
Executable file
@ -0,0 +1,17 @@
|
||||
#! /usr/bin/perl -w
|
||||
|
||||
my $iniPath = $ARGV[0];
|
||||
|
||||
my $SPLIT_LINES = 200;
|
||||
my $lineCount = `cat source | wc -l`;
|
||||
print STDERR "lineCount=$lineCount \n";
|
||||
|
||||
for (my $startLine = 0; $startLine < $lineCount; $startLine += $SPLIT_LINES) {
|
||||
my $endLine = $startLine + $SPLIT_LINES;
|
||||
|
||||
my $cmd = "../../scripts/reachable.perl $iniPath 1 moses_chart extract-rules tmp-reachable $startLine $endLine &>out.reachable.$startLine &";
|
||||
print STDERR "Executing: $cmd \n";
|
||||
system($cmd);
|
||||
|
||||
}
|
||||
|
69
contrib/other-builds/extract-mixed-syntax/pugiconfig.hpp
Normal file
69
contrib/other-builds/extract-mixed-syntax/pugiconfig.hpp
Normal file
@ -0,0 +1,69 @@
|
||||
/**
|
||||
* pugixml parser - version 1.2
|
||||
* --------------------------------------------------------
|
||||
* Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at http://pugixml.org/
|
||||
*
|
||||
* This library is distributed under the MIT License. See notice at the end
|
||||
* of this file.
|
||||
*
|
||||
* This work is based on the pugxml parser, which is:
|
||||
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
|
||||
*/
|
||||
|
||||
#ifndef HEADER_PUGICONFIG_HPP
|
||||
#define HEADER_PUGICONFIG_HPP
|
||||
|
||||
// Uncomment this to enable wchar_t mode
|
||||
// #define PUGIXML_WCHAR_MODE
|
||||
|
||||
// Uncomment this to disable XPath
|
||||
// #define PUGIXML_NO_XPATH
|
||||
|
||||
// Uncomment this to disable STL
|
||||
// #define PUGIXML_NO_STL
|
||||
|
||||
// Uncomment this to disable exceptions
|
||||
// #define PUGIXML_NO_EXCEPTIONS
|
||||
|
||||
// Set this to control attributes for public classes/functions, i.e.:
|
||||
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
|
||||
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
|
||||
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
|
||||
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
|
||||
|
||||
// Uncomment this to switch to header-only version
|
||||
// #define PUGIXML_HEADER_ONLY
|
||||
// #include "pugixml.cpp"
|
||||
|
||||
// Tune these constants to adjust memory-related behavior
|
||||
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
|
||||
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
|
||||
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Copyright (c) 2006-2012 Arseny Kapoulkine
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
10250
contrib/other-builds/extract-mixed-syntax/pugixml.cpp
Normal file
10250
contrib/other-builds/extract-mixed-syntax/pugixml.cpp
Normal file
File diff suppressed because it is too large
Load Diff
1265
contrib/other-builds/extract-mixed-syntax/pugixml.hpp
Normal file
1265
contrib/other-builds/extract-mixed-syntax/pugixml.hpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,110 +0,0 @@
|
||||
// $Id: tables-core.cpp 3131 2010-04-13 16:29:55Z pjwilliams $
|
||||
//#include "beammain.h"
|
||||
//#include "SafeGetLine.h"
|
||||
#include "tables-core.h"
|
||||
|
||||
#define TABLE_LINE_MAX_LENGTH 1000
|
||||
#define UNKNOWNSTR "UNK"
|
||||
|
||||
// as in beamdecoder/tables.cpp
|
||||
vector<string> tokenize( const char* input ) {
|
||||
vector< string > token;
|
||||
bool betweenWords = true;
|
||||
int start=0;
|
||||
int i=0;
|
||||
for(; input[i] != '\0'; i++) {
|
||||
bool isSpace = (input[i] == ' ' || input[i] == '\t');
|
||||
|
||||
if (!isSpace && betweenWords) {
|
||||
start = i;
|
||||
betweenWords = false;
|
||||
}
|
||||
else if (isSpace && !betweenWords) {
|
||||
token.push_back( string( input+start, i-start ) );
|
||||
betweenWords = true;
|
||||
}
|
||||
}
|
||||
if (!betweenWords)
|
||||
token.push_back( string( input+start, i-start ) );
|
||||
return token;
|
||||
}
|
||||
|
||||
WORD_ID Vocabulary::storeIfNew( const WORD& word ) {
|
||||
map<WORD, WORD_ID>::iterator i = lookup.find( word );
|
||||
|
||||
if( i != lookup.end() )
|
||||
return i->second;
|
||||
|
||||
WORD_ID id = vocab.size();
|
||||
vocab.push_back( word );
|
||||
lookup[ word ] = id;
|
||||
return id;
|
||||
}
|
||||
|
||||
WORD_ID Vocabulary::getWordID( const WORD& word ) {
|
||||
map<WORD, WORD_ID>::iterator i = lookup.find( word );
|
||||
if( i == lookup.end() )
|
||||
return 0;
|
||||
return i->second;
|
||||
}
|
||||
|
||||
PHRASE_ID PhraseTable::storeIfNew( const PHRASE& phrase ) {
|
||||
map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
|
||||
if( i != lookup.end() )
|
||||
return i->second;
|
||||
|
||||
PHRASE_ID id = phraseTable.size();
|
||||
phraseTable.push_back( phrase );
|
||||
lookup[ phrase ] = id;
|
||||
return id;
|
||||
}
|
||||
|
||||
PHRASE_ID PhraseTable::getPhraseID( const PHRASE& phrase ) {
|
||||
map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
|
||||
if( i == lookup.end() )
|
||||
return 0;
|
||||
return i->second;
|
||||
}
|
||||
|
||||
void PhraseTable::clear() {
|
||||
lookup.clear();
|
||||
phraseTable.clear();
|
||||
}
|
||||
|
||||
void DTable::init() {
|
||||
for(int i = -10; i<10; i++)
|
||||
dtable[i] = -abs( i );
|
||||
}
|
||||
|
||||
/*
|
||||
void DTable::load( const string& fileName ) {
|
||||
ifstream inFile;
|
||||
inFile.open(fileName.c_str());
|
||||
istream *inFileP = &inFile;
|
||||
|
||||
char line[TABLE_LINE_MAX_LENGTH];
|
||||
int i=0;
|
||||
while(true) {
|
||||
i++;
|
||||
SAFE_GETLINE((*inFileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__);
|
||||
if (inFileP->eof()) break;
|
||||
|
||||
vector<string> token = tokenize( line );
|
||||
if (token.size() < 2) {
|
||||
cerr << "line " << i << " in " << fileName << " too short, skipping\n";
|
||||
continue;
|
||||
}
|
||||
|
||||
int d = atoi( token[0].c_str() );
|
||||
double prob = log( atof( token[1].c_str() ) );
|
||||
dtable[ d ] = prob;
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
double DTable::get( int distortion ) {
|
||||
if (dtable.find( distortion ) == dtable.end())
|
||||
return log( 0.00001 );
|
||||
return dtable[ distortion ];
|
||||
}
|
||||
|
@ -1,72 +0,0 @@
|
||||
#pragma once
|
||||
// $Id: tables-core.h 2416 2009-07-30 11:07:38Z hieuhoang1972 $
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <queue>
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define TABLE_LINE_MAX_LENGTH 1000
|
||||
#define UNKNOWNSTR "UNK"
|
||||
|
||||
vector<string> tokenize( const char[] );
|
||||
|
||||
//! delete and remove every element of a collection object such as map, set, list etc
|
||||
template<class COLL>
|
||||
void RemoveAllInColl(COLL &coll)
|
||||
{
|
||||
for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter)
|
||||
{
|
||||
delete (*iter);
|
||||
}
|
||||
coll.clear();
|
||||
}
|
||||
|
||||
typedef string WORD;
|
||||
typedef unsigned int WORD_ID;
|
||||
|
||||
class Vocabulary {
|
||||
public:
|
||||
map<WORD, WORD_ID> lookup;
|
||||
vector< WORD > vocab;
|
||||
WORD_ID storeIfNew( const WORD& );
|
||||
WORD_ID getWordID( const WORD& );
|
||||
inline WORD &getWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
|
||||
};
|
||||
|
||||
typedef vector< WORD_ID > PHRASE;
|
||||
typedef unsigned int PHRASE_ID;
|
||||
|
||||
class PhraseTable {
|
||||
public:
|
||||
map< PHRASE, PHRASE_ID > lookup;
|
||||
vector< PHRASE > phraseTable;
|
||||
PHRASE_ID storeIfNew( const PHRASE& );
|
||||
PHRASE_ID getPhraseID( const PHRASE& );
|
||||
void clear();
|
||||
inline PHRASE &getPhrase( const PHRASE_ID id ) { return phraseTable[ id ]; }
|
||||
};
|
||||
|
||||
typedef vector< pair< PHRASE_ID, double > > PHRASEPROBVEC;
|
||||
|
||||
class TTable {
|
||||
public:
|
||||
map< PHRASE_ID, vector< pair< PHRASE_ID, double > > > ttable;
|
||||
map< PHRASE_ID, vector< pair< PHRASE_ID, vector< double > > > > ttableMulti;
|
||||
};
|
||||
|
||||
class DTable {
|
||||
public:
|
||||
map< int, double > dtable;
|
||||
void init();
|
||||
void load( const string& );
|
||||
double get( int );
|
||||
};
|
||||
|
||||
|
@ -1,135 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.499747849" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.798364121" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-ordering}/Debug" id="cdt.managedbuild.builder.gnu.cross.1976289814" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1324749613" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1750299246" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.719498215" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.251118848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.99297656" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.106920816" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1844372739" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1178164658" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.libs.1434184833" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_system-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.paths.974811544" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.904916320" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1005231499" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1318928675" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.604255673" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.818331963">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.818331963." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1489025499" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1052477856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-ordering}/Release" id="cdt.managedbuild.builder.gnu.cross.33925527" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1884790737" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.197048136" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.106898878" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1920378037" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.37950410" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1197641703" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1356351201" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2053623412" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1988048517" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1494470963" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1553727957" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="extract-ordering.cdt.managedbuild.target.gnu.cross.exe.1840421491" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417;cdt.managedbuild.tool.gnu.c.compiler.input.106898878">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446;cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964;cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827;cdt.managedbuild.tool.gnu.c.compiler.input.719498215">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
@ -1,137 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.124769989" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.266544803" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-rules}/Debug" id="cdt.managedbuild.builder.gnu.cross.335858926" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.947547329" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.426953885" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.include.paths.1671695899" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths"/>
|
||||
<option id="gnu.c.compiler.option.include.files.1838960067" name="Include files (-include)" superClass="gnu.c.compiler.option.include.files"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.985831394" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1726371873" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.899893408" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1099087456" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1616232021" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1411857637" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.libs.109133121" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_system-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.paths.1030374421" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.272393234" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1391783790" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2066621509" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1945638157" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1113964425" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1722595316" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract-rules}/Release" id="cdt.managedbuild.builder.gnu.cross.691589832" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.593530229" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1320426973" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.947026588" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1217031668" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.1504181086" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.1645775798" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1807515346" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.44234391" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1468234013" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.467923425" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1673313707" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.518252425" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="extract-rules.cdt.managedbuild.target.gnu.cross.exe.1916763759" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469;cdt.managedbuild.tool.gnu.c.compiler.input.985831394">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540;cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863;cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.c.compiler.593530229;cdt.managedbuild.tool.gnu.c.compiler.input.1217031668">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-rules"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract-rules"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
@ -1 +0,0 @@
|
||||
/Debug
|
@ -25,26 +25,6 @@
|
||||
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
|
||||
</natures>
|
||||
<linkedResources>
|
||||
<link>
|
||||
<name>ExtractedRule.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ExtractedRule.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>Hole.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>HoleCollection.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>HoleCollection.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>InputFileStream.cpp</name>
|
||||
<type>1</type>
|
||||
@ -65,11 +45,6 @@
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>RuleExtractionOptions.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>SentenceAlignment.cpp</name>
|
||||
<type>1</type>
|
||||
@ -111,14 +86,9 @@
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>extract-rules-main.cpp</name>
|
||||
<name>extract-main.cpp</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>gzfilebuf.h</name>
|
||||
<type>1</type>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/gzfilebuf.h</locationURI>
|
||||
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-main.cpp</locationURI>
|
||||
</link>
|
||||
<link>
|
||||
<name>tables-core.cpp</name>
|
||||
|
@ -1,135 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.671913278" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1231657738" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract}/Debug" id="cdt.managedbuild.builder.gnu.cross.571044108" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.332036857" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1292572253" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.1873227592" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1165888615" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.698819695" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.1451916947" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1702398011" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1856691234" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1699542791" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<option id="gnu.cpp.link.option.libs.1880730637" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_system-mt"/>
|
||||
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.paths.298225069" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1339210059" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.976825054" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1971927463" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.704926167" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.140124152">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.140124152." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1250240843" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
|
||||
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.597335968" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
|
||||
<builder buildPath="${workspace_loc:/extract}/Release" id="cdt.managedbuild.builder.gnu.cross.95066247" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.88795016" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.option.debugging.level.383328020" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.681105644" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
|
||||
<option id="gnu.cpp.compiler.option.optimization.level.553394848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.debugging.level.1420596769" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.234409052" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.320346578" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2045242811" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.417132714" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1944597759" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.203400619" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="extract.cdt.managedbuild.target.gnu.cross.exe.1220534104" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544;cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162;cdt.managedbuild.tool.gnu.c.compiler.input.681105644">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.c.compiler.332036857;cdt.managedbuild.tool.gnu.c.compiler.input.1165888615">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600;cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/extract"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
@ -1,137 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1133345948">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1133345948" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1133345948" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1133345948." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1405862229" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.605722566" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/extractor/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.238577912" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1956867596" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1512268277" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2143789149" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.285958391" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.966722418" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.554846982" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.538786560" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.debug.option.debugging.level.2125704556" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.100176353" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1048685119" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1295498016" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
|
||||
<option id="gnu.cpp.link.option.paths.338150127" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/../../boost/lib64""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/mert_lib/Debug""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc:}/util/Debug""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.link.option.libs.585257079" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
|
||||
<listOptionValue builtIn="false" value="mert_lib"/>
|
||||
<listOptionValue builtIn="false" value="boost_system-mt"/>
|
||||
<listOptionValue builtIn="false" value="util"/>
|
||||
<listOptionValue builtIn="false" value="z"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.656319745" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1361889787" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.955209559" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1385955159">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1385955159" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1385955159" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1385955159." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.887500021" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
|
||||
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1965146498" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/extractor/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.1583162909" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.base.141140356" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2048722912" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
|
||||
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1971624451" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.582466413" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1466533418" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.328232610" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.447164665" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.exe.release.option.debugging.level.28848417" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1088446293" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1134906841" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.450257401" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.813260151" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.205332755" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.337244768" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="extractor.cdt.managedbuild.target.gnu.exe.1336860963" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1385955159;cdt.managedbuild.config.gnu.exe.release.1385955159.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.328232610;cdt.managedbuild.tool.gnu.c.compiler.input.1088446293">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1133345948;cdt.managedbuild.config.gnu.exe.debug.1133345948.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1512268277;cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1385955159;cdt.managedbuild.config.gnu.exe.release.1385955159.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2048722912;cdt.managedbuild.tool.gnu.cpp.compiler.input.1466533418">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1133345948;cdt.managedbuild.config.gnu.exe.debug.1133345948.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.554846982;cdt.managedbuild.tool.gnu.c.compiler.input.100176353">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/extractor"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/extractor"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
</cproject>
|
@ -4,6 +4,7 @@
|
||||
<comment></comment>
|
||||
<projects>
|
||||
<project>mert_lib</project>
|
||||
<project>util</project>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
|
@ -1,144 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
|
||||
<storageModule moduleId="org.eclipse.cdt.core.settings">
|
||||
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" moduleId="org.eclipse.cdt.core.settings" name="Debug">
|
||||
<externalSettings>
|
||||
<externalSetting>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/lm"/>
|
||||
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/lm/Debug"/>
|
||||
<entry flags="RESOLVED" kind="libraryFile" name="lm" srcPrefixMapping="" srcRootPath=""/>
|
||||
</externalSetting>
|
||||
</externalSettings>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
|
||||
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.640882096" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
|
||||
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.793478365" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
|
||||
<builder buildPath="${workspace_loc:/lm/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.36011795" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1252826468" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1024598065" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.139111896" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.62265891" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.588438623" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.775866405" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
|
||||
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.586969644" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
|
||||
<listOptionValue builtIn="false" value="/opt/local/include"/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../boost/include""/>
|
||||
<listOptionValue builtIn="false" value=""${workspace_loc}/../../""/>
|
||||
</option>
|
||||
<option id="gnu.cpp.compiler.option.preprocessor.def.1980966336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
|
||||
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
|
||||
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
|
||||
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
|
||||
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
|
||||
</option>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
|
||||
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.934764060" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.2078705375" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1028526865" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
<sourceEntries>
|
||||
<entry excluding="left_test.cc|model_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
|
||||
</sourceEntries>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.203229648">
|
||||
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.203229648" moduleId="org.eclipse.cdt.core.settings" name="Release">
|
||||
<externalSettings/>
|
||||
<extensions>
|
||||
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
|
||||
</extensions>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.203229648" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
|
||||
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.203229648." name="/" resourcePath="">
|
||||
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1942852701" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
|
||||
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.2107180060" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
|
||||
<builder buildPath="${workspace_loc:/lm/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.127652112" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1668850519" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
|
||||
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.934899611" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.794276660" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
|
||||
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
|
||||
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
|
||||
</inputType>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.362272521" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
|
||||
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.370659018" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2103660404" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
|
||||
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1671568858" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
|
||||
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.230723898" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
|
||||
</tool>
|
||||
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
|
||||
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1934130159" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
|
||||
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1848737807" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
|
||||
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1294441742" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
|
||||
</tool>
|
||||
</toolChain>
|
||||
</folderInfo>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
|
||||
</cconfiguration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
|
||||
<project id="lm.cdt.managedbuild.target.macosx.exe.1399596076" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
|
||||
</storageModule>
|
||||
<storageModule moduleId="scannerConfiguration">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140;cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990;cdt.managedbuild.tool.gnu.c.compiler.input.1294441742">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722;cdt.managedbuild.tool.gnu.c.compiler.input.1028526865">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
|
||||
</scannerConfigBuildInfo>
|
||||
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795;cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602">
|
||||
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
|
||||
</scannerConfigBuildInfo>
|
||||
</storageModule>
|
||||
<storageModule moduleId="refreshScope" versionNumber="2">
|
||||
<configuration configurationName="Release">
|
||||
<resource resourceType="PROJECT" workspacePath="/lm"/>
|
||||
</configuration>
|
||||
<configuration configurationName="Debug">
|
||||
<resource resourceType="PROJECT" workspacePath="/lm"/>
|
||||
</configuration>
|
||||
</storageModule>
|
||||
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
|
||||
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
|
||||
</cproject>
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user