merged master into this branch

This commit is contained in:
Marcin Junczys-Dowmunt 2014-07-21 10:39:59 +02:00
commit 8f861a7f9e
375 changed files with 23468 additions and 7837 deletions

15
Jamroot
View File

@ -114,10 +114,24 @@ requirements += [ option.get "with-mm" : : <define>PT_UG ] ;
requirements += [ option.get "with-mm" : : <define>MAX_NUM_FACTORS=4 ] ;
requirements += [ option.get "unlabelled-source" : : <define>UNLABELLED_SOURCE ] ;
if [ option.get "with-lbllm" ] {
external-lib boost_serialization ;
external-lib gomp ;
requirements += <library>boost_serialization ;
requirements += <library>gomp ;
}
if [ option.get "with-cmph" ] {
requirements += <define>HAVE_CMPH ;
}
if [ option.get "with-probing-pt" : : "yes" ]
{
external-lib boost_serialization ;
requirements += <define>HAVE_PROBINGPT ;
requirements += <library>boost_serialization ;
}
project : default-build
<threading>multi
<warnings>on
@ -145,6 +159,7 @@ build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses
if [ option.get "with-mm" : : "yes" ]
{
alias mm :
moses/TranslationModel/UG//ptable-lookup
moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//mtt-dump
moses/TranslationModel/UG/mm//symal2mam

View File

@ -66,10 +66,9 @@ int main (int argc, char * const argv[])
PhraseNode &rootNode = onDiskWrapper.GetRootSourceNode();
size_t lineNum = 0;
char line[100000];
string line;
//while(getline(inStream, line))
while(inStream.getline(line, 100000)) {
while(getline(inStream, line)) {
lineNum++;
if (lineNum%1000 == 0) cerr << "." << flush;
if (lineNum%10000 == 0) cerr << ":" << flush;
@ -107,8 +106,13 @@ bool Flush(const OnDiskPt::SourcePhrase *prevSourcePhrase, const OnDiskPt::Sourc
return ret;
}
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, char *line, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhrase, const std::string &lineStr, OnDiskWrapper &onDiskWrapper, int numScores, vector<float> &misc)
{
char line[lineStr.size() + 1];
strcpy(line, lineStr.c_str());
stringstream sparseFeatures, property;
size_t scoreInd = 0;
// MAIN LOOP
@ -118,6 +122,7 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
2 = scores
3 = align
4 = count
7 = properties
*/
char *tok = strtok (line," ");
OnDiskPt::PhrasePtr out(new Phrase());
@ -148,29 +153,20 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
targetPhrase.CreateAlignFromString(tok);
break;
}
case 4:
++stage;
break;
/* case 5: {
// count info. Only store the 2nd one
float val = Moses::Scan<float>(tok);
misc[0] = val;
++stage;
break;
}*/
case 4: {
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
break;
}
case 5: {
// count info. Only store the 2nd one
//float val = Moses::Scan<float>(tok);
//misc[0] = val;
++stage;
// sparse features
sparseFeatures << tok << " ";
break;
}
case 6: {
// store only the 3rd one (rule count)
float val = Moses::Scan<float>(tok);
misc[0] = val;
++stage;
break;
property << tok << " ";
break;
}
default:
cerr << "ERROR in line " << line << endl;
@ -183,6 +179,8 @@ OnDiskPt::PhrasePtr Tokenize(SourcePhrase &sourcePhrase, TargetPhrase &targetPhr
} // while (tok != NULL)
assert(scoreInd == numScores);
targetPhrase.SetSparseFeatures(Moses::Trim(sparseFeatures.str()));
targetPhrase.SetProperty(Moses::Trim(property.str()));
targetPhrase.SortAlign();
return out;
} // Tokenize()

View File

@ -29,7 +29,7 @@ OnDiskPt::WordPtr Tokenize(OnDiskPt::Phrase &phrase
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
, OnDiskPt::OnDiskWrapper &onDiskWrapper, int retSourceTarget);
OnDiskPt::PhrasePtr Tokenize(OnDiskPt::SourcePhrase &sourcePhrase, OnDiskPt::TargetPhrase &targetPhrase
, char *line, OnDiskPt::OnDiskWrapper &onDiskWrapper
, const std::string &lineStr, OnDiskPt::OnDiskWrapper &onDiskWrapper
, int numScores
, std::vector<float> &misc);

View File

@ -31,7 +31,7 @@ using namespace std;
namespace OnDiskPt
{
int OnDiskWrapper::VERSION_NUM = 5;
int OnDiskWrapper::VERSION_NUM = 7;
OnDiskWrapper::OnDiskWrapper()
{

View File

@ -162,10 +162,14 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// allocate mem
size_t numScores = onDiskWrapper.GetNumScores()
,numAlign = GetAlign().size();
size_t sparseFeatureSize = m_sparseFeatures.size();
size_t propSize = m_property.size();
size_t memNeeded = sizeof(UINT64); // file pos (phrase id)
memNeeded += sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign; // align
memNeeded += sizeof(float) * numScores; // scores
size_t memNeeded = sizeof(UINT64) // file pos (phrase id)
+ sizeof(UINT64) + 2 * sizeof(UINT64) * numAlign // align
+ sizeof(float) * numScores // scores
+ sizeof(UINT64) + sparseFeatureSize // sparse features string
+ sizeof(UINT64) + propSize; // property string
char *mem = (char*) malloc(memNeeded);
//memset(mem, 0, memNeeded);
@ -183,11 +187,33 @@ char *TargetPhrase::WriteOtherInfoToMemory(OnDiskWrapper &onDiskWrapper, size_t
// scores
memUsed += WriteScoresToMemory(mem + memUsed);
// sparse features
memUsed += WriteStringToMemory(mem + memUsed, m_sparseFeatures);
// property string
memUsed += WriteStringToMemory(mem + memUsed, m_property);
//DebugMem(mem, memNeeded);
assert(memNeeded == memUsed);
return mem;
}
size_t TargetPhrase::WriteStringToMemory(char *mem, const std::string &str) const
{
size_t memUsed = 0;
UINT64 *memTmp = (UINT64*) mem;
size_t strSize = str.size();
memTmp[0] = strSize;
memUsed += sizeof(UINT64);
const char *charStr = str.c_str();
memcpy(mem + memUsed, charStr, strSize);
memUsed += strSize;
return memUsed;
}
size_t TargetPhrase::WriteAlignToMemory(char *mem) const
{
size_t memUsed = 0;
@ -279,6 +305,13 @@ Moses::TargetPhrase *TargetPhrase::ConvertToMoses(const std::vector<Moses::Facto
// scores
ret->GetScoreBreakdown().Assign(&phraseDict, m_scores);
// sparse features
ret->GetScoreBreakdown().Assign(&phraseDict, m_sparseFeatures);
// property
ret->SetProperties(m_property);
ret->Evaluate(mosesSP, phraseDict.GetFeaturesToApply());
return ret;
@ -299,9 +332,36 @@ UINT64 TargetPhrase::ReadOtherInfoFromFile(UINT64 filePos, std::fstream &fileTPC
memUsed += ReadScoresFromFile(fileTPColl);
assert((memUsed + filePos) == (UINT64)fileTPColl.tellg());
// sparse features
memUsed += ReadStringFromFile(fileTPColl, m_sparseFeatures);
// properties
memUsed += ReadStringFromFile(fileTPColl, m_property);
return memUsed;
}
UINT64 TargetPhrase::ReadStringFromFile(std::fstream &fileTPColl, std::string &outStr)
{
UINT64 bytesRead = 0;
UINT64 strSize;
fileTPColl.read((char*) &strSize, sizeof(UINT64));
bytesRead += sizeof(UINT64);
if (strSize) {
char *mem = (char*) malloc(strSize + 1);
mem[strSize] = '\0';
fileTPColl.read(mem, strSize);
outStr = string(mem);
free(mem);
bytesRead += strSize;
}
return bytesRead;
}
UINT64 TargetPhrase::ReadFromFile(std::fstream &fileTP)
{
UINT64 bytesRead = 0;

View File

@ -50,15 +50,18 @@ class TargetPhrase: public Phrase
protected:
AlignType m_align;
PhrasePtr m_sourcePhrase;
std::string m_sparseFeatures, m_property;
std::vector<float> m_scores;
UINT64 m_filePos;
size_t WriteAlignToMemory(char *mem) const;
size_t WriteScoresToMemory(char *mem) const;
size_t WriteStringToMemory(char *mem, const std::string &str) const;
UINT64 ReadAlignFromFile(std::fstream &fileTPColl);
UINT64 ReadScoresFromFile(std::fstream &fileTPColl);
UINT64 ReadStringFromFile(std::fstream &fileTPColl, std::string &outStr);
public:
TargetPhrase() {
@ -110,6 +113,15 @@ public:
virtual void DebugPrint(std::ostream &out, const Vocab &vocab) const;
void SetProperty(const std::string &value)
{
m_property = value;
}
void SetSparseFeatures(const std::string &value)
{
m_sparseFeatures = value;
}
};
}

View File

@ -104,14 +104,20 @@ void Word::ConvertToMoses(
Moses::FactorCollection &factorColl = Moses::FactorCollection::Instance();
overwrite = Moses::Word(m_isNonTerminal);
// TODO: this conversion should have been done at load time.
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
if (m_isNonTerminal) {
const std::string &tok = vocab.GetString(m_vocabId);
overwrite.SetFactor(0, factorColl.AddFactor(tok, m_isNonTerminal));
}
else {
// TODO: this conversion should have been done at load time.
util::TokenIter<util::SingleCharacter> tok(vocab.GetString(m_vocabId), '|');
for (std::vector<Moses::FactorType>::const_iterator t = outputFactorsVec.begin(); t != outputFactorsVec.end(); ++t, ++tok) {
UTIL_THROW_IF2(!tok, "Too few factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
overwrite.SetFactor(*t, factorColl.AddFactor(*tok, m_isNonTerminal));
}
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
UTIL_THROW_IF2(tok, "Too many factors in \"" << vocab.GetString(m_vocabId) << "\"; was expecting " << outputFactorsVec.size());
}
int Word::Compare(const Word &compare) const

View File

@ -0,0 +1,122 @@
# Moses speedtesting framework
### Description
This is an automatic test framework that is designed to test the day to day performance changes in Moses.
### Set up
#### Set up a Moses repo
Set up a Moses repo and build it with the desired configuration.
```bash
git clone https://github.com/moses-smt/mosesdecoder.git
cd mosesdecoder
./bjam -j10 --with-cmph=/usr/include/
```
You need to build Moses first, so that the testsuite knows what command you want it to use when rebuilding against newer revisions.
#### Create a parent directory.
Create a parent directory where the **runtests.py** and related scripts and configuration file should reside.
This should also be the location of the TEST_DIR and TEST_LOG_DIR as explained in the next section.
#### Set up a global configuration file.
You need a configuration file for the testsuite. A sample configuration file is provided in **testsuite\_config**
<pre>
MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder
DROP_CACHES_COMM: sys_drop_caches 3
TEST_DIR: /home/moses-speedtest/phrase_tables/tests
TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
BASEBRANCH: RELEASE-2.1.1
</pre>
The _MOSES\_REPO\_PATH_ is the place where you have set up and built moses.
The _DROP\_CACHES\_COMM_ is the command that would beused to drop caches. It should run without needing root access.
_TEST\_DIR_ is the directory where all the tests will reside.
_TEST\_LOG\_DIR_ is the directory where the performance logs will be gathered. It should be created before running the testsuite for the first time.
_BASEBRANCH_ is the branch against which all new tests will be compared. It should normally be set to be the latest Moses stable release.
### Creating tests
In order to create a test one should go into the TEST_DIR and create a new folder. That folder will be used for the name of the test.
Inside that folder one should place a configuration file named **config**. The naming is mandatory.
An example such configuration file is **test\_config**
<pre>
Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/,
Variants: vanilla, cached, ldpre #Can't have cached without ldpre or vanilla
</pre>
The _Command:_ line specifies the executable (which is looked up in the /bin directory of the repo.) and any arguments necessary. Before running the test, the script cds to the current test directory so you can use relative paths.
The _LDPRE:_ specifies if tests should be run with any LD\_PRELOAD flags.
The _Variants:_ line specifies what type of tests should we run. This particular line will run the following tests:
1. A Vanilla test meaning just the command after _Command_ will be issued.
2. A vanilla cached test meaning that after the vanilla test, the test will be run again without dropping caches in order to benchmark performance on cached filesystem.
3. A test with LD_PRELOAD ldpreloads moses -f command. For each available LDPRELOAD comma separated library to preload.
4. A cached version of all LD_PRELOAD tests.
### Running tests.
Running the tests is done through the **runtests.py** script.
#### Running all tests.
To run all tests, with the base branch and the latests revision (and generate new basebranch test data if such is missing) do a:
```bash
python3 runtests.py -c testsuite_config
```
#### Running specific tests.
The script allows the user to manually run a particular test or to test against a specific branch or revision:
<pre>
moses-speedtest@crom:~/phrase_tables$ python3 runtests.py --help
usage: runtests.py [-h] -c CONFIGFILE [-s SINGLETESTDIR] [-r REVISION]
[-b BRANCH]
A python based speedtest suite for moses.
optional arguments:
-h, --help show this help message and exit
-c CONFIGFILE, --configfile CONFIGFILE
Specify test config file
-s SINGLETESTDIR, --singletest SINGLETESTDIR
Single test name directory. Specify directory name,
not full path!
-r REVISION, --revision REVISION
Specify a specific revison for the test.
-b BRANCH, --branch BRANCH
Specify a branch for the test.
</pre>
### Generating HTML report.
To generate a summary of the test results use the **html\_gen.py** script. It places a file named *index.html* in the current script directory.
```bash
python3 html_gen.py testsuite_config
```
You should use the generated file with the **style.css** file provided in the html directory.
### Command line regression testing.
Alternatively you could check for regressions from the command line using the **check\_fo\r_regression.py** script:
```bash
python3 check_for_regression.py TESTLOGS_DIRECTORY
```
Alternatively the results of all tests are logged inside the the specified TESTLOGS directory so you can manually check them for additional information such as date, time, revision, branch, etc...
### Create a cron job:
Create a cron job to run the tests daily and generate an html report. An example *cronjob* is available.
```bash
#!/bin/sh
cd /home/moses-speedtest/phrase_tables
python3 runtests.py -c testsuite_config #Run the tests.
python3 html_gen.py testsuite_config #Generate html
cp index.html /fs/thor4/html/www/speed-test/ #Update the html
```
Place the script in _/etc/cron.daily_ for dayly testing
###### Author
Nikolay Bogoychev, 2014
###### License
This software is licensed under the LGPL.

View File

@ -0,0 +1,63 @@
"""Checks if any of the latests tests has performed considerably different than
the previous ones. Takes the log directory as an argument."""
import os
import sys
from testsuite_common import Result, processLogLine, bcolors, getLastTwoLines
LOGDIR = sys.argv[1] #Get the log directory as an argument
PERCENTAGE = 5 #Default value for how much a test shoudl change
if len(sys.argv) == 3:
PERCENTAGE = float(sys.argv[2]) #Default is 5%, but we can specify more
#line parameter
def printResults(regressed, better, unchanged, firsttime):
"""Pretty print the results in different colours"""
if regressed != []:
for item in regressed:
print(bcolors.RED + "REGRESSION! " + item.testname + " Was: "\
+ str(item.previous) + " Is: " + str(item.current) + " Change: "\
+ str(abs(item.percentage)) + "%. Revision: " + item.revision\
+ bcolors.ENDC)
print('\n')
if unchanged != []:
for item in unchanged:
print(bcolors.BLUE + "UNCHANGED: " + item.testname + " Revision: " +\
item.revision + bcolors.ENDC)
print('\n')
if better != []:
for item in better:
print(bcolors.GREEN + "IMPROVEMENT! " + item.testname + " Was: "\
+ str(item.previous) + " Is: " + str(item.current) + " Change: "\
+ str(abs(item.percentage)) + "%. Revision: " + item.revision\
+ bcolors.ENDC)
if firsttime != []:
for item in firsttime:
print(bcolors.PURPLE + "First time test! " + item.testname +\
" Took: " + str(item.real) + " seconds. Revision: " +\
item.revision + bcolors.ENDC)
all_files = os.listdir(LOGDIR)
regressed = []
better = []
unchanged = []
firsttime = []
#Go through all log files and find which tests have performed better.
for logfile in all_files:
(line1, line2) = getLastTwoLines(logfile, LOGDIR)
log1 = processLogLine(line1)
if line2 == '\n': # Empty line, only one test ever run
firsttime.append(log1)
continue
log2 = processLogLine(line2)
res = Result(log1.testname, log1.real, log2.real, log2.revision,\
log2.branch, log1.revision, log1.branch)
if res.percentage < -PERCENTAGE:
regressed.append(res)
elif res.change > PERCENTAGE:
better.append(res)
else:
unchanged.append(res)
printResults(regressed, better, unchanged, firsttime)

View File

@ -0,0 +1,7 @@
#!/bin/sh
cd /home/moses-speedtest/phrase_tables
python3 runtests.py -c testsuite_config #Run the tests.
python3 html_gen.py testsuite_config #Generate html
cp index.html /fs/thor4/html/www/speed-test/ #Update the html

View File

@ -0,0 +1,5 @@
###Helpers
This is a python script that basically gives you the equivalent of:
```echo 3 > /proc/sys/vm/drop_caches```
You need to set it up so it is executed with root access without needing a password so that the tests can be automated.

View File

@ -0,0 +1,22 @@
#!/usr/bin/spython
from sys import argv, stderr, exit
from os import linesep as ls
procfile = "/proc/sys/vm/drop_caches"
options = ["1","2","3"]
flush_type = None
try:
flush_type = argv[1][0:1]
if not flush_type in options:
raise IndexError, "not in options"
with open(procfile, "w") as f:
f.write("%s%s" % (flush_type,ls))
exit(0)
except IndexError, e:
stderr.write("Argument %s required.%s" % (options, ls))
except IOError, e:
stderr.write("Error writing to file.%s" % ls)
except StandardError, e:
stderr.write("Unknown Error.%s" % ls)
exit(1)

View File

@ -0,0 +1,5 @@
###HTML files.
_index.html_ is a sample generated file by this testsuite.
_style.css_ should be placed in the html directory in which _index.html_ will be placed in order to visualize the test results in a browser.

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,21 @@
table,th,td
{
border:1px solid black;
border-collapse:collapse
}
tr:nth-child(odd) {
background-color: Gainsboro;
}
.better {
color: Green;
}
.worse {
color: Red;
}
.unchanged {
color: SkyBlue;
}

View File

@ -0,0 +1,192 @@
"""Generates HTML page containing the testresults"""
from testsuite_common import Result, processLogLine, getLastTwoLines
from runtests import parse_testconfig
import os
import sys
from datetime import datetime, timedelta
HTML_HEADING = """<html>
<head>
<title>Moses speed testing</title>
<link rel="stylesheet" type="text/css" href="style.css"></head><body>"""
HTML_ENDING = "</table></body></html>\n"
TABLE_HEADING = """<table><tr class="heading">
<th>Date</th>
<th>Time</th>
<th>Testname</th>
<th>Revision</th>
<th>Branch</th>
<th>Time</th>
<th>Prevtime</th>
<th>Prevrev</th>
<th>Change (%)</th>
<th>Time (Basebranch)</th>
<th>Change (%, Basebranch)</th>
<th>Time (Days -2)</th>
<th>Change (%, Days -2)</th>
<th>Time (Days -3)</th>
<th>Change (%, Days -3)</th>
<th>Time (Days -4)</th>
<th>Change (%, Days -4)</th>
<th>Time (Days -5)</th>
<th>Change (%, Days -5)</th>
<th>Time (Days -6)</th>
<th>Change (%, Days -6)</th>
<th>Time (Days -7)</th>
<th>Change (%, Days -7)</th>
<th>Time (Days -14)</th>
<th>Change (%, Days -14)</th>
<th>Time (Years -1)</th>
<th>Change (%, Years -1)</th>
</tr>"""
def get_prev_days(date, numdays):
"""Gets the date numdays previous days so that we could search for
that test in the config file"""
date_obj = datetime.strptime(date, '%d.%m.%Y').date()
past_date = date_obj - timedelta(days=numdays)
return past_date.strftime('%d.%m.%Y')
def gather_necessary_lines(logfile, date):
"""Gathers the necessary lines corresponding to past dates
and parses them if they exist"""
#Get a dictionary of dates
dates = {}
dates[get_prev_days(date, 2)] = ('-2', None)
dates[get_prev_days(date, 3)] = ('-3', None)
dates[get_prev_days(date, 4)] = ('-4', None)
dates[get_prev_days(date, 5)] = ('-5', None)
dates[get_prev_days(date, 6)] = ('-6', None)
dates[get_prev_days(date, 7)] = ('-7', None)
dates[get_prev_days(date, 14)] = ('-14', None)
dates[get_prev_days(date, 365)] = ('-365', None)
openfile = open(logfile, 'r')
for line in openfile:
if line.split()[0] in dates.keys():
day = dates[line.split()[0]][0]
dates[line.split()[0]] = (day, processLogLine(line))
openfile.close()
return dates
def append_date_to_table(resline):
"""Appends past dates to the html"""
cur_html = '<td>' + str(resline.previous) + '</td>'
if resline.percentage > 0.05: #If we have improvement of more than 5%
cur_html = cur_html + '<td class="better">' + str(resline.percentage) + '</td>'
elif resline.percentage < -0.05: #We have a regression of more than 5%
cur_html = cur_html + '<td class="worse">' + str(resline.percentage) + '</td>'
else:
cur_html = cur_html + '<td class="unchanged">' + str(resline.percentage) + '</td>'
return cur_html
def compare_rev(filename, rev1, rev2, branch1=False, branch2=False):
"""Compare the test results of two lines. We can specify either a
revision or a branch for comparison. The first rev should be the
base version and the second revision should be the later version"""
#In the log file the index of the revision is 2 but the index of
#the branch is 12. Alternate those depending on whether we are looking
#for a specific revision or branch.
firstidx = 2
secondidx = 2
if branch1 == True:
firstidx = 12
if branch2 == True:
secondidx = 12
rev1line = ''
rev2line = ''
resfile = open(filename, 'r')
for line in resfile:
if rev1 == line.split()[firstidx]:
rev1line = line
elif rev2 == line.split()[secondidx]:
rev2line = line
if rev1line != '' and rev2line != '':
break
resfile.close()
if rev1line == '':
raise ValueError('Revision ' + rev1 + " was not found!")
if rev2line == '':
raise ValueError('Revision ' + rev2 + " was not found!")
logLine1 = processLogLine(rev1line)
logLine2 = processLogLine(rev2line)
res = Result(logLine1.testname, logLine1.real, logLine2.real,\
logLine2.revision, logLine2.branch, logLine1.revision, logLine1.branch)
return res
def produce_html(path, global_config):
"""Produces html file for the report."""
html = '' #The table HTML
for filenam in os.listdir(global_config.testlogs):
#Generate html for the newest two lines
#Get the lines from the config file
(ll1, ll2) = getLastTwoLines(filenam, global_config.testlogs)
logLine1 = processLogLine(ll1)
logLine2 = processLogLine(ll2) #This is the life from the latest revision
#Generate html
res1 = Result(logLine1.testname, logLine1.real, logLine2.real,\
logLine2.revision, logLine2.branch, logLine1.revision, logLine1.branch)
html = html + '<tr><td>' + logLine2.date + '</td><td>' + logLine2.time + '</td><td>' +\
res1.testname + '</td><td>' + res1.revision[:10] + '</td><td>' + res1.branch + '</td><td>' +\
str(res1.current) + '</td><td>' + str(res1.previous) + '</td><td>' + res1.prevrev[:10] + '</td>'
#Add fancy colours depending on the change
if res1.percentage > 0.05: #If we have improvement of more than 5%
html = html + '<td class="better">' + str(res1.percentage) + '</td>'
elif res1.percentage < -0.05: #We have a regression of more than 5%
html = html + '<td class="worse">' + str(res1.percentage) + '</td>'
else:
html = html + '<td class="unchanged">' + str(res1.percentage) + '</td>'
#Get comparison against the base version
filenam = global_config.testlogs + '/' + filenam #Get proper directory
res2 = compare_rev(filenam, global_config.basebranch, res1.revision, branch1=True)
html = html + '<td>' + str(res2.previous) + '</td>'
#Add fancy colours depending on the change
if res2.percentage > 0.05: #If we have improvement of more than 5%
html = html + '<td class="better">' + str(res2.percentage) + '</td>'
elif res2.percentage < -0.05: #We have a regression of more than 5%
html = html + '<td class="worse">' + str(res2.percentage) + '</td>'
else:
html = html + '<td class="unchanged">' + str(res2.percentage) + '</td>'
#Add extra dates comparison dating from the beginning of time if they exist
past_dates = list(range(2, 8))
past_dates.append(14)
past_dates.append(365) # Get the 1 year ago day
linesdict = gather_necessary_lines(filenam, logLine2.date)
for days in past_dates:
act_date = get_prev_days(logLine2.date, days)
if linesdict[act_date][1] is not None:
logline_date = linesdict[act_date][1]
restemp = Result(logline_date.testname, logline_date.real, logLine2.real,\
logLine2.revision, logLine2.branch, logline_date.revision, logline_date.branch)
html = html + append_date_to_table(restemp)
else:
html = html + '<td>N/A</td><td>N/A</td>'
html = html + '</tr>' #End row
#Write out the file
basebranch_info = '<text><b>Basebranch:</b> ' + res2.prevbranch + ' <b>Revision:</b> ' +\
res2.prevrev + '</text>'
writeoutstr = HTML_HEADING + basebranch_info + TABLE_HEADING + html + HTML_ENDING
writefile = open(path, 'w')
writefile.write(writeoutstr)
writefile.close()
if __name__ == '__main__':
CONFIG = parse_testconfig(sys.argv[1])
produce_html('index.html', CONFIG)

View File

@ -0,0 +1,293 @@
"""Given a config file, runs tests"""
import os
import subprocess
import time
from argparse import ArgumentParser
from testsuite_common import processLogLine
def parse_cmd():
"""Parse the command line arguments"""
description = "A python based speedtest suite for moses."
parser = ArgumentParser(description=description)
parser.add_argument("-c", "--configfile", action="store",\
dest="configfile", required=True,\
help="Specify test config file")
parser.add_argument("-s", "--singletest", action="store",\
dest="singletestdir", default=None,\
help="Single test name directory. Specify directory name,\
not full path!")
parser.add_argument("-r", "--revision", action="store",\
dest="revision", default=None,\
help="Specify a specific revison for the test.")
parser.add_argument("-b", "--branch", action="store",\
dest="branch", default=None,\
help="Specify a branch for the test.")
arguments = parser.parse_args()
return arguments
def repoinit(testconfig):
"""Determines revision and sets up the repo."""
revision = ''
#Update the repo
os.chdir(testconfig.repo)
#Checkout specific branch, else maintain main branch
if testconfig.branch != 'master':
subprocess.call(['git', 'checkout', testconfig.branch])
rev, _ = subprocess.Popen(['git', 'rev-parse', 'HEAD'],\
stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
revision = str(rev).replace("\\n'", '').replace("b'", '')
else:
subprocess.call(['git checkout master'], shell=True)
#Check a specific revision. Else checkout master.
if testconfig.revision:
subprocess.call(['git', 'checkout', testconfig.revision])
revision = testconfig.revision
elif testconfig.branch == 'master':
subprocess.call(['git pull'], shell=True)
rev, _ = subprocess.Popen(['git rev-parse HEAD'], stdout=subprocess.PIPE,\
stderr=subprocess.PIPE, shell=True).communicate()
revision = str(rev).replace("\\n'", '').replace("b'", '')
return revision
class Configuration:
"""A simple class to hold all of the configuration constatns"""
def __init__(self, repo, drop_caches, tests, testlogs, basebranch, baserev):
self.repo = repo
self.drop_caches = drop_caches
self.tests = tests
self.testlogs = testlogs
self.basebranch = basebranch
self.baserev = baserev
self.singletest = None
self.revision = None
self.branch = 'master' # Default branch
def additional_args(self, singletest, revision, branch):
"""Additional configuration from command line arguments"""
self.singletest = singletest
if revision is not None:
self.revision = revision
if branch is not None:
self.branch = branch
def set_revision(self, revision):
"""Sets the current revision that is being tested"""
self.revision = revision
class Test:
"""A simple class to contain all information about tests"""
def __init__(self, name, command, ldopts, permutations):
self.name = name
self.command = command
self.ldopts = ldopts.replace(' ', '').split(',') #Not tested yet
self.permutations = permutations
def parse_configfile(conffile, testdir, moses_repo):
"""Parses the config file"""
command, ldopts = '', ''
permutations = []
fileopen = open(conffile, 'r')
for line in fileopen:
line = line.split('#')[0] # Discard comments
if line == '' or line == '\n':
continue # Discard lines with comments only and empty lines
opt, args = line.split(' ', 1) # Get arguments
if opt == 'Command:':
command = args.replace('\n', '')
command = moses_repo + '/bin/' + command
elif opt == 'LDPRE:':
ldopts = args.replace('\n', '')
elif opt == 'Variants:':
permutations = args.replace('\n', '').replace(' ', '').split(',')
else:
raise ValueError('Unrecognized option ' + opt)
#We use the testdir as the name.
testcase = Test(testdir, command, ldopts, permutations)
fileopen.close()
return testcase
def parse_testconfig(conffile):
"""Parses the config file for the whole testsuite."""
repo_path, drop_caches, tests_dir, testlog_dir = '', '', '', ''
basebranch, baserev = '', ''
fileopen = open(conffile, 'r')
for line in fileopen:
line = line.split('#')[0] # Discard comments
if line == '' or line == '\n':
continue # Discard lines with comments only and empty lines
opt, args = line.split(' ', 1) # Get arguments
if opt == 'MOSES_REPO_PATH:':
repo_path = args.replace('\n', '')
elif opt == 'DROP_CACHES_COMM:':
drop_caches = args.replace('\n', '')
elif opt == 'TEST_DIR:':
tests_dir = args.replace('\n', '')
elif opt == 'TEST_LOG_DIR:':
testlog_dir = args.replace('\n', '')
elif opt == 'BASEBRANCH:':
basebranch = args.replace('\n', '')
elif opt == 'BASEREV:':
baserev = args.replace('\n', '')
else:
raise ValueError('Unrecognized option ' + opt)
config = Configuration(repo_path, drop_caches, tests_dir, testlog_dir,\
basebranch, baserev)
fileopen.close()
return config
def get_config():
"""Builds the config object with all necessary attributes"""
args = parse_cmd()
config = parse_testconfig(args.configfile)
config.additional_args(args.singletestdir, args.revision, args.branch)
revision = repoinit(config)
config.set_revision(revision)
return config
def check_for_basever(testlogfile, basebranch):
"""Checks if the base revision is present in the testlogs"""
filetoopen = open(testlogfile, 'r')
for line in filetoopen:
templine = processLogLine(line)
if templine.branch == basebranch:
return True
return False
def split_time(filename):
"""Splits the output of the time function into seperate parts.
We will write time to file, because many programs output to
stderr which makes it difficult to get only the exact results we need."""
timefile = open(filename, 'r')
realtime = float(timefile.readline().replace('\n', '').split()[1])
usertime = float(timefile.readline().replace('\n', '').split()[1])
systime = float(timefile.readline().replace('\n', '').split()[1])
timefile.close()
return (realtime, usertime, systime)
def write_log(time_file, logname, config):
"""Writes to a logfile"""
log_write = open(config.testlogs + '/' + logname, 'a') # Open logfile
date_run = time.strftime("%d.%m.%Y %H:%M:%S") # Get the time of the test
realtime, usertime, systime = split_time(time_file) # Get the times in a nice form
# Append everything to a log file.
writestr = date_run + " " + config.revision + " Testname: " + logname +\
" RealTime: " + str(realtime) + " UserTime: " + str(usertime) +\
" SystemTime: " + str(systime) + " Branch: " + config.branch +'\n'
log_write.write(writestr)
log_write.close()
def execute_tests(testcase, cur_directory, config):
"""Executes timed tests based on the config file"""
#Figure out the order of which tests must be executed.
#Change to the current test directory
os.chdir(config.tests + '/' + cur_directory)
#Clear caches
subprocess.call(['sync'], shell=True)
subprocess.call([config.drop_caches], shell=True)
#Perform vanilla test and if a cached test exists - as well
print(testcase.name)
if 'vanilla' in testcase.permutations:
print(testcase.command)
subprocess.Popen(['time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
stderr=subprocess.PIPE, shell=True).communicate()
write_log('/tmp/time_moses_tests', testcase.name + '_vanilla', config)
if 'cached' in testcase.permutations:
subprocess.Popen(['time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
stderr=None, shell=True).communicate()
write_log('/tmp/time_moses_tests', testcase.name + '_vanilla_cached', config)
#Now perform LD_PRELOAD tests
if 'ldpre' in testcase.permutations:
for opt in testcase.ldopts:
#Clear caches
subprocess.call(['sync'], shell=True)
subprocess.call([config.drop_caches], shell=True)
#test
subprocess.Popen(['LD_PRELOAD ' + opt + ' time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
stderr=None, shell=True).communicate()
write_log('/tmp/time_moses_tests', testcase.name + '_ldpre_' + opt, config)
if 'cached' in testcase.permutations:
subprocess.Popen(['LD_PRELOAD ' + opt + ' time -p -o /tmp/time_moses_tests ' + testcase.command], stdout=None,\
stderr=None, shell=True).communicate()
write_log('/tmp/time_moses_tests', testcase.name + '_ldpre_' +opt +'_cached', config)
# Go through all the test directories and executes tests
if __name__ == '__main__':
CONFIG = get_config()
ALL_DIR = os.listdir(CONFIG.tests)
#We should first check if any of the tests is run for the first time.
#If some of them are run for the first time we should first get their
#time with the base version (usually the previous release)
FIRSTTIME = []
TESTLOGS = []
#Strip filenames of test underscores
for listline in os.listdir(CONFIG.testlogs):
listline = listline.replace('_vanilla', '')
listline = listline.replace('_cached', '')
listline = listline.replace('_ldpre', '')
TESTLOGS.append(listline)
for directory in ALL_DIR:
if directory not in TESTLOGS:
FIRSTTIME.append(directory)
#Sometimes even though we have the log files, we will need to rerun them
#Against a base version, because we require a different baseversion (for
#example when a new version of Moses is released.) Therefore we should
#Check if the version of Moses that we have as a base version is in all
#of the log files.
for logfile in os.listdir(CONFIG.testlogs):
logfile_name = CONFIG.testlogs + '/' + logfile
if not check_for_basever(logfile_name, CONFIG.basebranch):
logfile = logfile.replace('_vanilla', '')
logfile = logfile.replace('_cached', '')
logfile = logfile.replace('_ldpre', '')
FIRSTTIME.append(logfile)
FIRSTTIME = list(set(FIRSTTIME)) #Deduplicate
if FIRSTTIME != []:
#Create a new configuration for base version tests:
BASECONFIG = Configuration(CONFIG.repo, CONFIG.drop_caches,\
CONFIG.tests, CONFIG.testlogs, CONFIG.basebranch,\
CONFIG.baserev)
BASECONFIG.additional_args(None, CONFIG.baserev, CONFIG.basebranch)
#Set up the repository and get its revision:
REVISION = repoinit(BASECONFIG)
BASECONFIG.set_revision(REVISION)
#Build
os.chdir(BASECONFIG.repo)
subprocess.call(['./previous.sh'], shell=True)
#Perform tests
for directory in FIRSTTIME:
cur_testcase = parse_configfile(BASECONFIG.tests + '/' + directory +\
'/config', directory, BASECONFIG.repo)
execute_tests(cur_testcase, directory, BASECONFIG)
#Reset back the repository to the normal configuration
repoinit(CONFIG)
#Builds moses
os.chdir(CONFIG.repo)
subprocess.call(['./previous.sh'], shell=True)
if CONFIG.singletest:
TESTCASE = parse_configfile(CONFIG.tests + '/' +\
CONFIG.singletest + '/config', CONFIG.singletest, CONFIG.repo)
execute_tests(TESTCASE, CONFIG.singletest, CONFIG)
else:
for directory in ALL_DIR:
cur_testcase = parse_configfile(CONFIG.tests + '/' + directory +\
'/config', directory, CONFIG.repo)
execute_tests(cur_testcase, directory, CONFIG)

View File

@ -0,0 +1,22 @@
#!/usr/bin/spython
from sys import argv, stderr, exit
from os import linesep as ls
procfile = "/proc/sys/vm/drop_caches"
options = ["1","2","3"]
flush_type = None
try:
flush_type = argv[1][0:1]
if not flush_type in options:
raise IndexError, "not in options"
with open(procfile, "w") as f:
f.write("%s%s" % (flush_type,ls))
exit(0)
except IndexError, e:
stderr.write("Argument %s required.%s" % (options, ls))
except IOError, e:
stderr.write("Error writing to file.%s" % ls)
except StandardError, e:
stderr.write("Unknown Error.%s" % ls)
exit(1)

View File

@ -0,0 +1,3 @@
Command: moses -f ... -i fff #Looks for the command in the /bin directory of the repo specified in the testsuite_config
LDPRE: ldpreloads #Comma separated LD_LIBRARY_PATH:/,
Variants: vanilla, cached, ldpre #Can't have cached without ldpre or vanilla

View File

@ -0,0 +1,54 @@
"""Common functions of the testsuitce"""
import os
#Clour constants
class bcolors:
PURPLE = '\033[95m'
BLUE = '\033[94m'
GREEN = '\033[92m'
YELLOW = '\033[93m'
RED = '\033[91m'
ENDC = '\033[0m'
class LogLine:
"""A class to contain logfile line"""
def __init__(self, date, time, revision, testname, real, user, system, branch):
self.date = date
self.time = time
self.revision = revision
self.testname = testname
self.real = real
self.system = system
self.user = user
self.branch = branch
class Result:
"""A class to contain results of benchmarking"""
def __init__(self, testname, previous, current, revision, branch, prevrev, prevbranch):
self.testname = testname
self.previous = previous
self.current = current
self.change = previous - current
self.revision = revision
self.branch = branch
self.prevbranch = prevbranch
self.prevrev = prevrev
#Produce a percentage with fewer digits
self.percentage = float(format(1 - current/previous, '.4f'))
def processLogLine(logline):
"""Parses the log line into a nice datastructure"""
logline = logline.split()
log = LogLine(logline[0], logline[1], logline[2], logline[4],\
float(logline[6]), float(logline[8]), float(logline[10]), logline[12])
return log
def getLastTwoLines(filename, logdir):
"""Just a call to tail to get the diff between the last two runs"""
try:
line1, line2 = os.popen("tail -n2 " + logdir + '/' + filename)
except ValueError: #Check for new tests
tempfile = open(logdir + '/' + filename)
line1 = tempfile.readline()
tempfile.close()
return (line1, '\n')
return (line1, line2)

View File

@ -0,0 +1,5 @@
MOSES_REPO_PATH: /home/moses-speedtest/moses-standard/mosesdecoder
DROP_CACHES_COMM: sys_drop_caches 3
TEST_DIR: /home/moses-speedtest/phrase_tables/tests
TEST_LOG_DIR: /home/moses-speedtest/phrase_tables/testlogs
BASEBRANCH: RELEASE-2.1.1

View File

@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>CreateOnDiskPt</name>
<comment></comment>
<projects>
<project>lm</project>
<project>moses</project>
<project>OnDiskPt</project>
<project>search</project>
<project>util</project>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.genmakebuilder</name>
<triggers>clean,full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder</name>
<triggers>full,incremental,</triggers>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.cdt.core.cnature</nature>
<nature>org.eclipse.cdt.core.ccnature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.managedBuildNature</nature>
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.cpp</locationURI>
</link>
<link>
<name>Main.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/OnDiskPt/Main.h</locationURI>
</link>
</linkedResources>
</projectDescription>

View File

@ -1,146 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/OnDiskPt"/>
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/OnDiskPt/Debug"/>
<entry flags="RESOLVED" kind="libraryFile" name="OnDiskPt" srcPrefixMapping="" srcRootPath=""/>
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.725420545" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.1586272140" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/OnDiskPt/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.1909553559" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.30521110" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.478334849" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1328561226" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.108239817" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1825070846" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.901309550" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.676959181" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.1484480101" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1556683035" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="${workspace_loc}/../../"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.1052680347" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="WITH_THREADS"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.576529322" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.426851981" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1925590121" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
<fileInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.726316251" name="Main.h" rcbsApplicability="disable" resourcePath="Main.h" toolsToInvoke=""/>
<sourceEntries>
<entry excluding="Main.h|Main.cpp" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.701931933">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.701931933" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.701931933" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.701931933." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.5036266" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.396818757" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/OnDiskPt/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.1081186575" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.894082374" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.640159085" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.1673993744" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.596082362" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.851420859" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.385722535" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.21058138" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.1704184753" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.171488636" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.843129626" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1014721928" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="OnDiskPt.cdt.managedbuild.target.macosx.exe.542902806" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.1029035384;cdt.managedbuild.tool.gnu.c.compiler.input.1014721928">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.1161943634;cdt.managedbuild.tool.gnu.c.compiler.input.1925590121">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.846397978;cdt.managedbuild.config.gnu.macosx.exe.debug.846397978.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.2001028511;cdt.managedbuild.tool.gnu.cpp.compiler.input.1930757481">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.701931933;cdt.managedbuild.config.macosx.exe.release.701931933.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.983488413;cdt.managedbuild.tool.gnu.cpp.compiler.input.1034344194">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/OnDiskPt"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
</cproject>

View File

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>extract-ordering</name>
<name>consolidate</name>
<comment></comment>
<projects>
</projects>
@ -46,19 +46,9 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>SentenceAlignment.cpp</name>
<name>consolidate-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
</link>
<link>
<name>SentenceAlignment.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
</link>
<link>
<name>extract-ordering-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ordering-main.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/consolidate-main.cpp</locationURI>
</link>
<link>
<name>tables-core.cpp</name>

View File

@ -1,138 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.1035891586" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.242178856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ghkm/Debug}" id="cdt.managedbuild.builder.gnu.cross.430400318" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.251687262" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.962699619" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.230503798" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.433137197" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.971749711" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.984190691" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1374841264" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../phrase-extract&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1026620601" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1419857560" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.paths.668926503" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/lib64&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.2091468346" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="boost_program_options-mt"/>
<listOptionValue builtIn="false" value="boost_thread-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="bz2"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1684298294" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.320160974" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2021657841" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1689419664" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1825927494." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.2000920404" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1106451881" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ghkm/Release}" id="cdt.managedbuild.builder.gnu.cross.727887705" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.819016498" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1057468997" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1130475273" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.164617278" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.406333630" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1059243022" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1068655225" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1213865062" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.764325642" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1299258961" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.896866692" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.276294580" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-ghkm.cdt.managedbuild.target.gnu.cross.exe.1830080171" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1312144641;cdt.managedbuild.tool.gnu.cpp.compiler.input.1204977083">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.c.compiler.251687262;cdt.managedbuild.tool.gnu.c.compiler.input.433137197">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1825927494;cdt.managedbuild.config.gnu.cross.exe.release.1825927494.;cdt.managedbuild.tool.gnu.cross.c.compiler.819016498;cdt.managedbuild.tool.gnu.c.compiler.input.164617278">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1410559002;cdt.managedbuild.config.gnu.cross.exe.debug.1410559002.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.367822268;cdt.managedbuild.tool.gnu.cpp.compiler.input.2075381818">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-ghkm"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -26,49 +26,19 @@
</natures>
<linkedResources>
<link>
<name>Alignment.cpp</name>
<name>Hole.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
</link>
<link>
<name>Alignment.h</name>
<name>HoleCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Alignment.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
</link>
<link>
<name>AlignmentGraph.cpp</name>
<name>HoleCollection.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.cpp</locationURI>
</link>
<link>
<name>AlignmentGraph.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/AlignmentGraph.h</locationURI>
</link>
<link>
<name>ComposedRule.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.cpp</locationURI>
</link>
<link>
<name>ComposedRule.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ComposedRule.h</locationURI>
</link>
<link>
<name>Exception.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Exception.h</locationURI>
</link>
<link>
<name>ExtractGHKM.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.cpp</locationURI>
</link>
<link>
<name>ExtractGHKM.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ExtractGHKM.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
</link>
<link>
<name>InputFileStream.cpp</name>
@ -80,31 +50,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/InputFileStream.h</locationURI>
</link>
<link>
<name>Jamfile</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Jamfile</locationURI>
</link>
<link>
<name>Main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Main.cpp</locationURI>
</link>
<link>
<name>Node.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.cpp</locationURI>
</link>
<link>
<name>Node.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Node.h</locationURI>
</link>
<link>
<name>Options.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Options.h</locationURI>
</link>
<link>
<name>OutputFileStream.cpp</name>
<type>1</type>
@ -116,54 +61,24 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>ParseTree.cpp</name>
<name>SentenceAlignment.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.cpp</locationURI>
</link>
<link>
<name>ParseTree.h</name>
<name>SentenceAlignment.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ParseTree.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignment.h</locationURI>
</link>
<link>
<name>ScfgRule.cpp</name>
<name>SentenceAlignmentWithSyntax.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.cpp</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.cpp</locationURI>
</link>
<link>
<name>ScfgRule.h</name>
<name>SentenceAlignmentWithSyntax.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRule.h</locationURI>
</link>
<link>
<name>ScfgRuleWriter.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.cpp</locationURI>
</link>
<link>
<name>ScfgRuleWriter.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/ScfgRuleWriter.h</locationURI>
</link>
<link>
<name>Span.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.cpp</locationURI>
</link>
<link>
<name>Span.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Span.h</locationURI>
</link>
<link>
<name>Subgraph.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.cpp</locationURI>
</link>
<link>
<name>Subgraph.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/Subgraph.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/SentenceAlignmentWithSyntax.h</locationURI>
</link>
<link>
<name>SyntaxTree.cpp</name>
@ -186,14 +101,9 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
</link>
<link>
<name>XmlTreeParser.cpp</name>
<name>extract-rules-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.cpp</locationURI>
</link>
<link>
<name>XmlTreeParser.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-ghkm/XmlTreeParser.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
</link>
<link>
<name>tables-core.cpp</name>

View File

@ -1,134 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.456080129" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.582801917" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-mixed-syntax/Debug}" id="cdt.managedbuild.builder.gnu.cross.1220166455" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.2055012191" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1768196213" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.2007889843" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.855436310" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.506549229" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1497326561" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.606353571" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.740521305" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1946120010" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.1563475751" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.106010037" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.136661991" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2112208574" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.172930211" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.715007893">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.715007893" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.715007893." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.99436307" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.801178939" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-mixed-syntax/Release}" id="cdt.managedbuild.builder.gnu.cross.1999547547" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1481537766" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1967527847" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.442342681" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1847950300" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1130138972" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.158429528" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.2020667840" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1372779734" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.371006952" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1770045040" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.707592414" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-mixed-syntax.cdt.managedbuild.target.gnu.cross.exe.1868010260" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1604862038;cdt.managedbuild.tool.gnu.cpp.compiler.input.870650754">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.715007893;cdt.managedbuild.config.gnu.cross.exe.release.715007893.;cdt.managedbuild.tool.gnu.cross.c.compiler.2138817906;cdt.managedbuild.tool.gnu.c.compiler.input.442342681">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1194558915;cdt.managedbuild.tool.gnu.cpp.compiler.input.2118510064">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1919499982;cdt.managedbuild.config.gnu.cross.exe.debug.1919499982.;cdt.managedbuild.tool.gnu.cross.c.compiler.1245611568;cdt.managedbuild.tool.gnu.c.compiler.input.2007889843">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-mixed-syntax"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.internal.ui.text.commentOwnerProjectMappings"/>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -0,0 +1,189 @@
/*
* AlignedSentence.cpp
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#include <sstream>
#include "moses/Util.h"
#include "AlignedSentence.h"
#include "Parameter.h"
using namespace std;
/////////////////////////////////////////////////////////////////////////////////
AlignedSentence::AlignedSentence(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment)
:m_lineNum(lineNum)
{
PopulateWordVec(m_source, source);
PopulateWordVec(m_target, target);
PopulateAlignment(alignment);
}
AlignedSentence::~AlignedSentence() {
Moses::RemoveAllInColl(m_source);
Moses::RemoveAllInColl(m_target);
}
void AlignedSentence::PopulateWordVec(Phrase &vec, const std::string &line)
{
std::vector<string> toks;
Moses::Tokenize(toks, line);
vec.resize(toks.size());
for (size_t i = 0; i < vec.size(); ++i) {
const string &tok = toks[i];
Word *word = new Word(i, tok);
vec[i] = word;
}
}
void AlignedSentence::PopulateAlignment(const std::string &line)
{
vector<string> alignStr;
Moses::Tokenize(alignStr, line);
for (size_t i = 0; i < alignStr.size(); ++i) {
vector<int> alignPair;
Moses::Tokenize(alignPair, alignStr[i], "-");
assert(alignPair.size() == 2);
int sourcePos = alignPair[0];
int targetPos = alignPair[1];
if (sourcePos >= m_source.size()) {
cerr << "ERROR1:AlignedSentence=" << Debug() << endl;
cerr << "m_source=" << m_source.size() << endl;
abort();
}
assert(sourcePos < m_source.size());
assert(targetPos < m_target.size());
Word *sourceWord = m_source[sourcePos];
Word *targetWord = m_target[targetPos];
sourceWord->AddAlignment(targetWord);
targetWord->AddAlignment(sourceWord);
}
}
std::string AlignedSentence::Debug() const
{
stringstream out;
out << "m_lineNum:";
out << m_lineNum;
out << endl;
out << "m_source:";
out << m_source.Debug();
out << endl;
out << "m_target:";
out << m_target.Debug();
out << endl;
out << "consistent phrases:" << endl;
out << m_consistentPhrases.Debug();
out << endl;
return out.str();
}
std::vector<int> AlignedSentence::GetSourceAlignmentCount() const
{
vector<int> ret(m_source.size());
for (size_t i = 0; i < m_source.size(); ++i) {
const Word &word = *m_source[i];
ret[i] = word.GetAlignmentIndex().size();
}
return ret;
}
void AlignedSentence::Create(const Parameter &params)
{
CreateConsistentPhrases(params);
m_consistentPhrases.AddHieroNonTerms(params);
}
void AlignedSentence::CreateConsistentPhrases(const Parameter &params)
{
int countT = m_target.size();
int countS = m_source.size();
m_consistentPhrases.Initialize(countS);
// check alignments for target phrase startT...endT
for(int lengthT=1;
lengthT <= params.maxSpan && lengthT <= countT;
lengthT++) {
for(int startT=0; startT < countT-(lengthT-1); startT++) {
// that's nice to have
int endT = startT + lengthT - 1;
// find find aligned source words
// first: find minimum and maximum source word
int minS = 9999;
int maxS = -1;
vector< int > usedS = GetSourceAlignmentCount();
for(int ti=startT; ti<=endT; ti++) {
const Word &word = *m_target[ti];
const std::set<int> &alignment = word.GetAlignmentIndex();
std::set<int>::const_iterator iterAlign;
for(iterAlign = alignment.begin(); iterAlign != alignment.end(); ++iterAlign) {
int si = *iterAlign;
if (si<minS) {
minS = si;
}
if (si>maxS) {
maxS = si;
}
usedS[ si ]--;
}
}
// unaligned phrases are not allowed
if( maxS == -1 )
continue;
// source phrase has to be within limits
if( maxS-minS >= params.maxSpan )
continue;
// check if source words are aligned to out of bound target words
bool out_of_bounds = false;
for(int si=minS; si<=maxS && !out_of_bounds; si++)
if (usedS[si]>0) {
out_of_bounds = true;
}
// if out of bound, you gotta go
if (out_of_bounds)
continue;
// done with all the checks, lets go over all consistent phrase pairs
// start point of source phrase may retreat over unaligned
for(int startS=minS;
(startS>=0 &&
startS>maxS - params.maxSpan && // within length limit
(startS==minS || m_source[startS]->GetAlignment().size()==0)); // unaligned
startS--) {
// end point of source phrase may advance over unaligned
for(int endS=maxS;
(endS<countS && endS<startS + params.maxSpan && // within length limit
(endS==maxS || m_source[endS]->GetAlignment().size()==0)); // unaligned
endS++) {
// take note that this is a valid phrase alignment
m_consistentPhrases.Add(startS, endS, startT, endT, params);
}
}
}
}
}

View File

@ -0,0 +1,51 @@
/*
* AlignedSentence.h
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#pragma once
#include <string>
#include <set>
#include "ConsistentPhrases.h"
#include "Phrase.h"
#include "moses/TypeDef.h"
class Parameter;
class AlignedSentence {
public:
AlignedSentence(int lineNum)
:m_lineNum(lineNum)
{}
AlignedSentence(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment);
virtual ~AlignedSentence();
virtual void Create(const Parameter &params);
const Phrase &GetPhrase(Moses::FactorDirection direction) const
{ return (direction == Moses::Input) ? m_source : m_target; }
const ConsistentPhrases &GetConsistentPhrases() const
{ return m_consistentPhrases; }
virtual std::string Debug() const;
int m_lineNum;
protected:
Phrase m_source, m_target;
ConsistentPhrases m_consistentPhrases;
void CreateConsistentPhrases(const Parameter &params);
void PopulateWordVec(Phrase &vec, const std::string &line);
// m_source and m_target MUST be populated before calling this
void PopulateAlignment(const std::string &line);
std::vector<int> GetSourceAlignmentCount() const;
};

View File

@ -0,0 +1,183 @@
/*
* AlignedSentenceSyntax.cpp
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#include "AlignedSentenceSyntax.h"
#include "Parameter.h"
#include "pugixml.hpp"
#include "moses/Util.h"
using namespace std;
AlignedSentenceSyntax::AlignedSentenceSyntax(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment)
:AlignedSentence(lineNum)
,m_sourceStr(source)
,m_targetStr(target)
,m_alignmentStr(alignment)
{
}
AlignedSentenceSyntax::~AlignedSentenceSyntax() {
// TODO Auto-generated destructor stub
}
void AlignedSentenceSyntax::Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
string line, Phrase &phrase, SyntaxTree &tree)
{
// parse source and target string
if (isSyntax) {
line = "<xml><tree label=\"X\">" + line + "</tree></xml>";
XMLParse(phrase, tree, line, params);
if (mixedSyntaxType != 0) {
// mixed syntax. Always add [X] where there isn't 1
tree.SetHieroLabel(params.hieroNonTerm);
if (mixedSyntaxType == 2) {
tree.AddToAll(params.hieroNonTerm);
}
}
}
else {
PopulateWordVec(phrase, line);
tree.SetHieroLabel(params.hieroNonTerm);
}
}
void AlignedSentenceSyntax::Create(const Parameter &params)
{
Populate(params.sourceSyntax, params.mixedSyntaxType, params, m_sourceStr,
m_source, m_sourceTree);
Populate(params.targetSyntax, params.mixedSyntaxType, params, m_targetStr,
m_target, m_targetTree);
PopulateAlignment(m_alignmentStr);
CreateConsistentPhrases(params);
// create labels
CreateNonTerms();
}
void Escape(string &text)
{
text = Moses::Replace(text, "&", "&amp;");
text = Moses::Replace(text, "|", "&#124;");
text = Moses::Replace(text, "<", "&lt;");
text = Moses::Replace(text, ">", "&gt;");
text = Moses::Replace(text, "'", "&apos;");
text = Moses::Replace(text, "\"", "&quot;");
text = Moses::Replace(text, "[", "&#91;");
text = Moses::Replace(text, "]", "&#93;");
}
void AlignedSentenceSyntax::XMLParse(Phrase &output,
SyntaxTree &tree,
const pugi::xml_node &parentNode,
const Parameter &params)
{
int childNum = 0;
for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling())
{
string nodeName = childNode.name();
// span label
string label;
int startPos = output.size();
if (!nodeName.empty()) {
pugi::xml_attribute attribute = childNode.attribute("label");
label = attribute.as_string();
// recursively call this function. For proper recursive trees
XMLParse(output, tree, childNode, params);
}
// fill phrase vector
string text = childNode.value();
Escape(text);
//cerr << childNum << " " << label << "=" << text << endl;
std::vector<string> toks;
Moses::Tokenize(toks, text);
for (size_t i = 0; i < toks.size(); ++i) {
const string &tok = toks[i];
Word *word = new Word(output.size(), tok);
output.push_back(word);
}
// is it a labelled span?
int endPos = output.size() - 1;
// fill syntax labels
if (!label.empty()) {
label = "[" + label + "]";
tree.Add(startPos, endPos, label, params);
}
++childNum;
}
}
void AlignedSentenceSyntax::XMLParse(Phrase &output,
SyntaxTree &tree,
const std::string input,
const Parameter &params)
{
pugi::xml_document doc;
pugi::xml_parse_result result = doc.load(input.c_str(),
pugi::parse_default | pugi::parse_comments);
pugi::xml_node topNode = doc.child("xml");
XMLParse(output, tree, topNode, params);
}
void AlignedSentenceSyntax::CreateNonTerms()
{
for (int sourceStart = 0; sourceStart < m_source.size(); ++sourceStart) {
for (int sourceEnd = sourceStart; sourceEnd < m_source.size(); ++sourceEnd) {
ConsistentPhrases::Coll &coll = m_consistentPhrases.GetColl(sourceStart, sourceEnd);
const SyntaxTree::Labels &sourceLabels = m_sourceTree.Find(sourceStart, sourceEnd);
ConsistentPhrases::Coll::iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
ConsistentPhrase &cp = **iter;
int targetStart = cp.corners[2];
int targetEnd = cp.corners[3];
const SyntaxTree::Labels &targetLabels = m_targetTree.Find(targetStart, targetEnd);
CreateNonTerms(cp, sourceLabels, targetLabels);
}
}
}
}
void AlignedSentenceSyntax::CreateNonTerms(ConsistentPhrase &cp,
const SyntaxTree::Labels &sourceLabels,
const SyntaxTree::Labels &targetLabels)
{
SyntaxTree::Labels::const_iterator iterSource;
for (iterSource = sourceLabels.begin(); iterSource != sourceLabels.end(); ++iterSource) {
const string &sourceLabel = *iterSource;
SyntaxTree::Labels::const_iterator iterTarget;
for (iterTarget = targetLabels.begin(); iterTarget != targetLabels.end(); ++iterTarget) {
const string &targetLabel = *iterTarget;
cp.AddNonTerms(sourceLabel, targetLabel);
}
}
}

View File

@ -0,0 +1,46 @@
/*
* AlignedSentenceSyntax.h
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#pragma once
#include "AlignedSentence.h"
#include "SyntaxTree.h"
#include "pugixml.hpp"
class AlignedSentenceSyntax : public AlignedSentence
{
public:
AlignedSentenceSyntax(int lineNum,
const std::string &source,
const std::string &target,
const std::string &alignment);
virtual ~AlignedSentenceSyntax();
void Create(const Parameter &params);
//virtual std::string Debug() const;
protected:
std::string m_sourceStr, m_targetStr, m_alignmentStr;
SyntaxTree m_sourceTree, m_targetTree;
void XMLParse(Phrase &output,
SyntaxTree &tree,
const std::string input,
const Parameter &params);
void XMLParse(Phrase &output,
SyntaxTree &tree,
const pugi::xml_node &parentNode,
const Parameter &params);
void CreateNonTerms();
void CreateNonTerms(ConsistentPhrase &cp,
const SyntaxTree::Labels &sourceLabels,
const SyntaxTree::Labels &targetLabels);
void Populate(bool isSyntax, int mixedSyntaxType, const Parameter &params,
std::string line, Phrase &phrase, SyntaxTree &tree);
};

View File

@ -0,0 +1,66 @@
/*
* ConsistentPhrase.cpp
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "ConsistentPhrase.h"
#include "Word.h"
#include "NonTerm.h"
#include "Parameter.h"
using namespace std;
ConsistentPhrase::ConsistentPhrase(
int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params)
:corners(4)
,m_hieroNonTerm(*this, params.hieroNonTerm, params.hieroNonTerm)
{
corners[0] = sourceStart;
corners[1] = sourceEnd;
corners[2] = targetStart;
corners[3] = targetEnd;
}
ConsistentPhrase::~ConsistentPhrase() {
// TODO Auto-generated destructor stub
}
bool ConsistentPhrase::operator<(const ConsistentPhrase &other) const
{
return corners < other.corners;
}
void ConsistentPhrase::AddNonTerms(const std::string &source,
const std::string &target)
{
m_nonTerms.push_back(NonTerm(*this, source, target));
}
bool ConsistentPhrase::TargetOverlap(const ConsistentPhrase &other) const
{
if ( other.corners[3] < corners[2] || other.corners[2] > corners[3])
return false;
return true;
}
std::string ConsistentPhrase::Debug() const
{
stringstream out;
out << "[" << corners[0] << "-" << corners[1]
<< "][" << corners[2] << "-" << corners[3] << "]";
out << "NT:";
for (size_t i = 0; i < m_nonTerms.size(); ++i) {
const NonTerm &nonTerm = m_nonTerms[i];
out << nonTerm.GetLabel(Moses::Input) << ":" << nonTerm.GetLabel(Moses::Output);
}
return out.str();
}

View File

@ -0,0 +1,51 @@
/*
* ConsistentPhrase.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <cassert>
#include <vector>
#include <iostream>
#include "moses/TypeDef.h"
#include "NonTerm.h"
class ConsistentPhrase
{
public:
typedef std::vector<NonTerm> NonTerms;
std::vector<int> corners;
ConsistentPhrase(const ConsistentPhrase &copy); // do not implement
ConsistentPhrase(int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params);
virtual ~ConsistentPhrase();
int GetWidth(Moses::FactorDirection direction) const
{ return (direction == Moses::Input) ? corners[1] - corners[0] + 1 : corners[3] - corners[2] + 1; }
void AddNonTerms(const std::string &source,
const std::string &target);
const NonTerms &GetNonTerms() const
{ return m_nonTerms;}
const NonTerm &GetHieroNonTerm() const
{ return m_hieroNonTerm;}
bool TargetOverlap(const ConsistentPhrase &other) const;
bool operator<(const ConsistentPhrase &other) const;
std::string Debug() const;
protected:
NonTerms m_nonTerms;
NonTerm m_hieroNonTerm;
};

View File

@ -0,0 +1,103 @@
/*
* ConsistentPhrases.cpp
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#include <sstream>
#include <cassert>
#include "ConsistentPhrases.h"
#include "NonTerm.h"
#include "Parameter.h"
#include "moses/Util.h"
using namespace std;
ConsistentPhrases::ConsistentPhrases()
{
}
ConsistentPhrases::~ConsistentPhrases() {
for (int start = 0; start < m_coll.size(); ++start) {
std::vector<Coll> &allSourceStart = m_coll[start];
for (int size = 0; size < allSourceStart.size(); ++size) {
Coll &coll = allSourceStart[size];
Moses::RemoveAllInColl(coll);
}
}
}
void ConsistentPhrases::Initialize(size_t size)
{
m_coll.resize(size);
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
allSourceStart.resize(size - sourceStart);
}
}
void ConsistentPhrases::Add(int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params)
{
Coll &coll = m_coll[sourceStart][sourceEnd - sourceStart];
ConsistentPhrase *cp = new ConsistentPhrase(sourceStart, sourceEnd,
targetStart, targetEnd,
params);
pair<Coll::iterator, bool> inserted = coll.insert(cp);
assert(inserted.second);
}
const ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd) const
{
const std::vector<Coll> &allSourceStart = m_coll[sourceStart];
const Coll &ret = allSourceStart[sourceEnd - sourceStart];
return ret;
}
ConsistentPhrases::Coll &ConsistentPhrases::GetColl(int sourceStart, int sourceEnd)
{
std::vector<Coll> &allSourceStart = m_coll[sourceStart];
Coll &ret = allSourceStart[sourceEnd - sourceStart];
return ret;
}
std::string ConsistentPhrases::Debug() const
{
std::stringstream out;
for (int start = 0; start < m_coll.size(); ++start) {
const std::vector<Coll> &allSourceStart = m_coll[start];
for (int size = 0; size < allSourceStart.size(); ++size) {
const Coll &coll = allSourceStart[size];
Coll::const_iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
const ConsistentPhrase &consistentPhrase = **iter;
out << consistentPhrase.Debug() << endl;
}
}
}
return out.str();
}
void ConsistentPhrases::AddHieroNonTerms(const Parameter &params)
{
// add [X] labels everywhere
for (int i = 0; i < m_coll.size(); ++i) {
vector<Coll> &inner = m_coll[i];
for (int j = 0; j < inner.size(); ++j) {
ConsistentPhrases::Coll &coll = inner[j];
ConsistentPhrases::Coll::iterator iter;
for (iter = coll.begin(); iter != coll.end(); ++iter) {
ConsistentPhrase &cp = **iter;
cp.AddNonTerms(params.hieroNonTerm, params.hieroNonTerm);
}
}
}
}

View File

@ -0,0 +1,40 @@
/*
* ConsistentPhrases.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <set>
#include <vector>
#include <iostream>
#include "ConsistentPhrase.h"
class Word;
class Parameter;
class ConsistentPhrases {
public:
typedef std::set<ConsistentPhrase*> Coll;
ConsistentPhrases();
virtual ~ConsistentPhrases();
void Initialize(size_t size);
void Add(int sourceStart, int sourceEnd,
int targetStart, int targetEnd,
const Parameter &params);
void AddHieroNonTerms(const Parameter &params);
const Coll &GetColl(int sourceStart, int sourceEnd) const;
Coll &GetColl(int sourceStart, int sourceEnd);
std::string Debug() const;
protected:
std::vector< std::vector<Coll> > m_coll;
};

View File

@ -1,37 +0,0 @@
/*
* Global.cpp
* extract
*
* Created by Hieu Hoang on 01/02/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "Global.h"
bool g_debug = false;
Global::Global()
: minHoleSpanSourceDefault(2)
, maxHoleSpanSourceDefault(7)
, minHoleSpanSourceSyntax(1)
, maxHoleSpanSourceSyntax(1000)
, maxUnaligned(5)
, maxSymbols(5)
, maxNonTerm(3)
, maxNonTermDefault(2)
// int minHoleSize(1)
// int minSubPhraseSize(1) // minimum size of a remaining lexical phrase
, glueGrammarFlag(false)
, unknownWordLabelFlag(false)
//bool zipFiles(false)
, sourceSyntax(true)
, targetSyntax(false)
, mixed(true)
, uppermostOnly(true)
, allowDefaultNonTermEdge(true)
, gzOutput(false)
{}

View File

@ -1,45 +0,0 @@
#pragma once
/*
* Global.h
* extract
*
* Created by Hieu Hoang on 01/02/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <set>
#include <map>
#include <string>
class Global
{
public:
int minHoleSpanSourceDefault;
int maxHoleSpanSourceDefault;
int minHoleSpanSourceSyntax;
int maxHoleSpanSourceSyntax;
int maxSymbols;
bool glueGrammarFlag;
bool unknownWordLabelFlag;
int maxNonTerm;
int maxNonTermDefault;
bool sourceSyntax;
bool targetSyntax;
bool mixed;
int maxUnaligned;
bool uppermostOnly;
bool allowDefaultNonTermEdge;
bool gzOutput;
Global();
Global(const Global&);
};
extern bool g_debug;
#define DEBUG_OUTPUT() void DebugOutput() const;

View File

@ -1,180 +0,0 @@
/*
* Lattice.cpp
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <cassert>
#include "Lattice.h"
#include "LatticeNode.h"
#include "Tunnel.h"
#include "TunnelCollection.h"
#include "SyntaxTree.h"
#include "SentenceAlignment.h"
#include "tables-core.h"
#include "Rule.h"
#include "RuleCollection.h"
using namespace std;
Lattice::Lattice(size_t sourceSize)
:m_stacks(sourceSize + 1)
{
}
Lattice::~Lattice()
{
std::vector<Stack>::iterator iterStack;
for (iterStack = m_stacks.begin(); iterStack != m_stacks.end(); ++iterStack)
{
Stack &stack = *iterStack;
RemoveAllInColl(stack);
}
}
void Lattice::CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global)
{
// term
Stack &startStack = GetStack(startPos);
LatticeNode *node = new LatticeNode(startPos, &sentence);
startStack.push_back(node);
// non-term
for (size_t endPos = startPos + 1; endPos <= sentence.source.size(); ++endPos)
{
const TunnelList &tunnels = tunnelColl.GetTunnels(startPos, endPos - 1);
TunnelList::const_iterator iterHole;
for (iterHole = tunnels.begin(); iterHole != tunnels.end(); ++iterHole)
{
const Tunnel &tunnel = *iterHole;
CreateArcsUsing1Hole(tunnel, sentence, global);
}
}
}
void Lattice::CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global)
{
size_t startPos = tunnel.GetRange(0).GetStartPos()
, endPos = tunnel.GetRange(0).GetEndPos();
size_t numSymbols = tunnel.GetRange(0).GetWidth();
assert(numSymbols > 0);
Stack &startStack = GetStack(startPos);
// non-terms. cartesian product of source & target labels
assert(startPos == tunnel.GetRange(0).GetStartPos() && endPos == tunnel.GetRange(0).GetEndPos());
size_t startT = tunnel.GetRange(1).GetStartPos()
,endT = tunnel.GetRange(1).GetEndPos();
const SyntaxNodes &nodesS = sentence.sourceTree.GetNodes(startPos, endPos);
const SyntaxNodes &nodesT = sentence.targetTree.GetNodes(startT, endT );
SyntaxNodes::const_iterator iterS, iterT;
for (iterS = nodesS.begin(); iterS != nodesS.end(); ++iterS)
{
const SyntaxNode *syntaxNodeS = *iterS;
for (iterT = nodesT.begin(); iterT != nodesT.end(); ++iterT)
{
const SyntaxNode *syntaxNodeT = *iterT;
bool isSyntax = syntaxNodeS->IsSyntax() || syntaxNodeT->IsSyntax();
size_t maxSourceNonTermSpan = isSyntax ? global.maxHoleSpanSourceSyntax : global.maxHoleSpanSourceDefault;
if (maxSourceNonTermSpan >= endPos - startPos)
{
LatticeNode *node = new LatticeNode(tunnel, syntaxNodeS, syntaxNodeT);
startStack.push_back(node);
}
}
}
}
Stack &Lattice::GetStack(size_t startPos)
{
assert(startPos < m_stacks.size());
return m_stacks[startPos];
}
const Stack &Lattice::GetStack(size_t startPos) const
{
assert(startPos < m_stacks.size());
return m_stacks[startPos];
}
void Lattice::CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global)
{
const Stack &startStack = GetStack(startPos);
Stack::const_iterator iterStack;
for (iterStack = startStack.begin(); iterStack != startStack.end(); ++iterStack)
{
const LatticeNode *node = *iterStack;
Rule *initRule = new Rule(node);
if (initRule->CanRecurse(global, sentence.GetTunnelCollection()))
{ // may or maynot be valid, but can continue to build on this rule
initRule->CreateRules(m_rules, *this, sentence, global);
}
if (initRule->IsValid(global, sentence.GetTunnelCollection()))
{ // add to rule collection
m_rules.Add(global, initRule, sentence);
}
else
{
delete initRule;
}
}
}
Stack Lattice::GetNonTermNode(const Range &sourceRange) const
{
Stack ret;
size_t sourcePos = sourceRange.GetStartPos();
const Stack &origStack = GetStack(sourcePos);
Stack::const_iterator iter;
for (iter = origStack.begin(); iter != origStack.end(); ++iter)
{
LatticeNode *node = *iter;
const Range &nodeRangeS = node->GetSourceRange();
assert(nodeRangeS.GetStartPos() == sourceRange.GetStartPos());
if (! node->IsTerminal() && nodeRangeS.GetEndPos() == sourceRange.GetEndPos())
{
ret.push_back(node);
}
}
return ret;
}
std::ostream& operator<<(std::ostream &out, const Lattice &obj)
{
std::vector<Stack>::const_iterator iter;
for (iter = obj.m_stacks.begin(); iter != obj.m_stacks.end(); ++iter)
{
const Stack &stack = *iter;
Stack::const_iterator iterStack;
for (iterStack = stack.begin(); iterStack != stack.end(); ++iterStack)
{
const LatticeNode &node = **iterStack;
out << node << " ";
}
}
return out;
}

View File

@ -1,47 +0,0 @@
#pragma once
/*
* Lattice.h
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <iostream>
#include <vector>
#include "RuleCollection.h"
class Global;
class LatticeNode;
class Tunnel;
class TunnelCollection;
class SentenceAlignment;
typedef std::vector<LatticeNode*> Stack;
class Lattice
{
friend std::ostream& operator<<(std::ostream&, const Lattice&);
std::vector<Stack> m_stacks;
RuleCollection m_rules;
Stack &GetStack(size_t endPos);
void CreateArcsUsing1Hole(const Tunnel &tunnel, const SentenceAlignment &sentence, const Global &global);
public:
Lattice(size_t sourceSize);
~Lattice();
void CreateArcs(size_t startPos, const TunnelCollection &tunnelColl, const SentenceAlignment &sentence, const Global &global);
void CreateRules(size_t startPos, const SentenceAlignment &sentence, const Global &global);
const Stack &GetStack(size_t startPos) const;
const RuleCollection &GetRules() const
{ return m_rules; }
Stack GetNonTermNode(const Range &sourceRange) const;
};

View File

@ -1,149 +0,0 @@
/*
* LatticeNode.cpp
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <sstream>
#include "LatticeNode.h"
#include "SyntaxTree.h"
#include "Tunnel.h"
#include "SentenceAlignment.h"
#include "SymbolSequence.h"
size_t LatticeNode::s_count = 0;
using namespace std;
// for terms
LatticeNode::LatticeNode(size_t pos, const SentenceAlignment *sentence)
:m_tunnel(NULL)
,m_isTerminal(true)
,m_sourceTreeNode(NULL)
,m_targetTreeNode(NULL)
,m_sentence(sentence)
,m_sourceRange(pos, pos)
{
s_count++;
//cerr << *this << endl;
}
// for non-terms
LatticeNode::LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode)
:m_tunnel(&tunnel)
,m_isTerminal(false)
,m_sourceTreeNode(sourceTreeNode)
,m_targetTreeNode(targetTreeNode)
,m_sentence(NULL)
,m_sourceRange(tunnel.GetRange(0))
{
s_count++;
//cerr << *this << endl;
}
bool LatticeNode::IsSyntax() const
{
assert(!m_isTerminal);
bool ret = m_sourceTreeNode->IsSyntax() || m_targetTreeNode->IsSyntax();
return ret;
}
size_t LatticeNode::GetNumSymbols(size_t direction) const
{
return 1;
}
int LatticeNode::Compare(const LatticeNode &otherNode) const
{
int ret = 0;
if (m_isTerminal != otherNode.m_isTerminal)
{
ret = m_isTerminal ? -1 : 1;
}
// both term or non-term
else if (m_isTerminal)
{ // term. compare source span
if (m_sourceRange.GetStartPos() == otherNode.m_sourceRange.GetStartPos())
ret = 0;
else
ret = (m_sourceRange.GetStartPos() < otherNode.m_sourceRange.GetStartPos()) ? -1 : +1;
}
else
{ // non-term. compare source span and BOTH label
assert(!m_isTerminal);
assert(!otherNode.m_isTerminal);
if (m_sourceTreeNode->IsSyntax())
{
ret = m_tunnel->Compare(*otherNode.m_tunnel, 0);
if (ret == 0 && m_sourceTreeNode->GetLabel() != otherNode.m_sourceTreeNode->GetLabel())
{
ret = (m_sourceTreeNode->GetLabel() < otherNode.m_sourceTreeNode->GetLabel()) ? -1 : +1;
}
}
if (ret == 0 && m_targetTreeNode->IsSyntax())
{
ret = m_tunnel->Compare(*otherNode.m_tunnel, 1);
if (ret == 0 && m_targetTreeNode->GetLabel() != otherNode.m_targetTreeNode->GetLabel())
{
ret = (m_targetTreeNode->GetLabel() < otherNode.m_targetTreeNode->GetLabel()) ? -1 : +1;
}
}
}
return ret;
}
void LatticeNode::CreateSymbols(size_t direction, SymbolSequence &symbols) const
{
if (m_isTerminal)
{
/*
const std::vector<std::string> &words = (direction == 0 ? m_sentence->source : m_sentence->target);
size_t startPos = m_tunnel.GetStart(direction)
,endPos = m_tunnel.GetEnd(direction);
for (size_t pos = startPos; pos <= endPos; ++pos)
{
Symbol symbol(words[pos], pos);
symbols.Add(symbol);
}
*/
}
else
{ // output both
Symbol symbol(m_sourceTreeNode->GetLabel(), m_targetTreeNode->GetLabel()
, m_tunnel->GetRange(0).GetStartPos(), m_tunnel->GetRange(0).GetEndPos()
, m_tunnel->GetRange(1).GetStartPos(), m_tunnel->GetRange(1).GetEndPos()
, m_sourceTreeNode->IsSyntax(), m_targetTreeNode->IsSyntax());
symbols.Add(symbol);
}
}
std::ostream& operator<<(std::ostream &out, const LatticeNode &obj)
{
if (obj.m_isTerminal)
{
assert(obj.m_sourceRange.GetWidth() == 1);
size_t pos = obj.m_sourceRange.GetStartPos();
const SentenceAlignment &sentence = *obj.m_sentence;
out << obj.m_sourceRange << "=" << sentence.source[pos];
}
else
{
assert(obj.m_tunnel);
out << obj.GetTunnel() << "=" << obj.m_sourceTreeNode->GetLabel() << obj.m_targetTreeNode->GetLabel() << " ";
}
return out;
}

View File

@ -1,77 +0,0 @@
#pragma once
/*
* LatticeNode.h
* extract
*
* Created by Hieu Hoang on 18/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include <iostream>
#include <cassert>
#include "Range.h"
class Tunnel;
class SyntaxNode;
class SentenceAlignment;
class SymbolSequence;
class LatticeNode
{
friend std::ostream& operator<<(std::ostream&, const LatticeNode&);
bool m_isTerminal;
// for terms & non-term
Range m_sourceRange;
// non-terms. source range should be same as m_sourceRange
const Tunnel *m_tunnel;
public:
static size_t s_count;
const SyntaxNode *m_sourceTreeNode, *m_targetTreeNode;
const SentenceAlignment *m_sentence;
// for terms
LatticeNode(size_t pos, const SentenceAlignment *sentence);
// for non-terms
LatticeNode(const Tunnel &tunnel, const SyntaxNode *sourceTreeNode, const SyntaxNode *targetTreeNode);
bool IsTerminal() const
{ return m_isTerminal; }
bool IsSyntax() const;
size_t GetNumSymbols(size_t direction) const;
std::string ToString() const;
int Compare(const LatticeNode &otherNode) const;
void CreateSymbols(size_t direction, SymbolSequence &symbols) const;
const Tunnel &GetTunnel() const
{
assert(m_tunnel);
return *m_tunnel;
}
const Range &GetSourceRange() const
{
return m_sourceRange;
}
const SyntaxNode &GetSyntaxNode(size_t direction) const
{
const SyntaxNode *node = direction == 0 ? m_sourceTreeNode : m_targetTreeNode;
assert(node);
return *node;
}
};

View File

@ -0,0 +1,174 @@
#include <iostream>
#include <cstdlib>
#include <boost/program_options.hpp>
#include "Main.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "AlignedSentence.h"
#include "AlignedSentenceSyntax.h"
#include "Parameter.h"
#include "Rules.h"
using namespace std;
bool g_debug = false;
int main(int argc, char** argv)
{
cerr << "Starting" << endl;
Parameter params;
namespace po = boost::program_options;
po::options_description desc("Options");
desc.add_options()
("help", "Print help messages")
("MaxSpan", po::value<int>()->default_value(params.maxSpan), "Max (source) span of a rule. ie. number of words in the source")
("GlueGrammar", po::value<string>()->default_value(params.gluePath), "Output glue grammar to here")
("SentenceOffset", po::value<long>()->default_value(params.sentenceOffset), "Starting sentence id. Not used")
("GZOutput", "Compress extract files")
("MaxNonTerm", po::value<int>()->default_value(params.maxNonTerm), "Maximum number of non-terms allowed per rule")
("MaxHieroNonTerm", po::value<int>()->default_value(params.maxHieroNonTerm), "Maximum number of Hiero non-term. Usually, --MaxNonTerm is the normal constraint")
("MinHoleSource", po::value<int>()->default_value(params.minHoleSource), "Minimum source span for a non-term.")
("SourceSyntax", "Source sentence is a parse tree")
("TargetSyntax", "Target sentence is a parse tree")
("MixedSyntaxType", po::value<int>()->default_value(params.mixedSyntaxType), "Hieu's Mixed syntax type. 0(default)=no mixed syntax, 1=add [X] only if no syntactic label. 2=add [X] everywhere")
("MultiLabel", po::value<int>()->default_value(params.multiLabel), "What to do with multiple labels on the same span. 0(default)=keep them all, 1=keep only top-most, 2=keep only bottom-most")
("HieroSourceLHS", "Always use Hiero source LHS? Default = 0")
("MaxSpanFreeNonTermSource", po::value<int>()->default_value(params.maxSpanFreeNonTermSource), "Max number of words covered by beginning/end NT. Default = 0 (no limit)")
("NoNieceTerminal", "Don't extract rule if 1 of the non-term covers the same word as 1 of the terminals")
("MaxScope", po::value<int>()->default_value(params.maxScope), "maximum scope (see Hopkins and Langmead (2010)). Default is HIGH")
("SpanLength", "Property - span length of RHS each non-term")
("NonTermContext", "Property - left and right, inside and outside words of each non-term");
po::variables_map vm;
try
{
po::store(po::parse_command_line(argc, argv, desc),
vm); // can throw
/** --help option
*/
if ( vm.count("help") || argc < 5 )
{
std::cout << argv[0] << " target source alignment [options...]" << std::endl
<< desc << std::endl;
return EXIT_SUCCESS;
}
po::notify(vm); // throws on error, so do after help in case
// there are any problems
}
catch(po::error& e)
{
std::cerr << "ERROR: " << e.what() << std::endl << std::endl;
std::cerr << desc << std::endl;
return EXIT_FAILURE;
}
if (vm.count("MaxSpan")) params.maxSpan = vm["MaxSpan"].as<int>();
if (vm.count("GZOutput")) params.gzOutput = true;
if (vm.count("GlueGrammar")) params.gluePath = vm["GlueGrammar"].as<string>();
if (vm.count("SentenceOffset")) params.sentenceOffset = vm["SentenceOffset"].as<long>();
if (vm.count("MaxNonTerm")) params.maxNonTerm = vm["MaxNonTerm"].as<int>();
if (vm.count("MaxHieroNonTerm")) params.maxHieroNonTerm = vm["MaxHieroNonTerm"].as<int>();
if (vm.count("MinHoleSource")) params.minHoleSource = vm["MinHoleSource"].as<int>();
if (vm.count("SourceSyntax")) params.sourceSyntax = true;
if (vm.count("TargetSyntax")) params.targetSyntax = true;
if (vm.count("MixedSyntaxType")) params.mixedSyntaxType = vm["MixedSyntaxType"].as<int>();
if (vm.count("MultiLabel")) params.multiLabel = vm["MultiLabel"].as<int>();
if (vm.count("HieroSourceLHS")) params.hieroSourceLHS = true;
if (vm.count("MaxSpanFreeNonTermSource")) params.maxSpanFreeNonTermSource = vm["MaxSpanFreeNonTermSource"].as<int>();
if (vm.count("NoNieceTerminal")) params.nieceTerminal = false;
if (vm.count("MaxScope")) params.maxScope = vm["MaxScope"].as<int>();
// properties
if (vm.count("SpanLength")) params.spanLength = true;
if (vm.count("NonTermContext")) params.nonTermContext = true;
// input files;
string pathTarget = argv[1];
string pathSource = argv[2];
string pathAlignment = argv[3];
string pathExtract = argv[4];
string pathExtractInv = pathExtract + ".inv";
if (params.gzOutput) {
pathExtract += ".gz";
pathExtractInv += ".gz";
}
Moses::InputFileStream strmTarget(pathTarget);
Moses::InputFileStream strmSource(pathSource);
Moses::InputFileStream strmAlignment(pathAlignment);
Moses::OutputFileStream extractFile(pathExtract);
Moses::OutputFileStream extractInvFile(pathExtractInv);
// MAIN LOOP
int lineNum = 1;
string lineTarget, lineSource, lineAlignment;
while (getline(strmTarget, lineTarget)) {
if (lineNum % 10000 == 0) {
cerr << lineNum << " ";
}
bool success;
success = getline(strmSource, lineSource);
if (!success) {
throw "Couldn't read source";
}
success = getline(strmAlignment, lineAlignment);
if (!success) {
throw "Couldn't read alignment";
}
/*
cerr << "lineTarget=" << lineTarget << endl;
cerr << "lineSource=" << lineSource << endl;
cerr << "lineAlignment=" << lineAlignment << endl;
*/
AlignedSentence *alignedSentence;
if (params.sourceSyntax || params.targetSyntax) {
alignedSentence = new AlignedSentenceSyntax(lineNum, lineSource, lineTarget, lineAlignment);
}
else {
alignedSentence = new AlignedSentence(lineNum, lineSource, lineTarget, lineAlignment);
}
alignedSentence->Create(params);
//cerr << alignedSentence->Debug();
Rules rules(*alignedSentence);
rules.Extend(params);
rules.Consolidate(params);
//cerr << rules.Debug();
rules.Output(extractFile, true, params);
rules.Output(extractInvFile, false, params);
delete alignedSentence;
++lineNum;
}
if (!params.gluePath.empty()) {
Moses::OutputFileStream glueFile(params.gluePath);
CreateGlueGrammar(glueFile);
}
cerr << "Finished" << endl;
}
void CreateGlueGrammar(Moses::OutputFileStream &glueFile)
{
glueFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
<< "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
<< "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
}

View File

@ -0,0 +1,12 @@
/*
* Main.h
*
* Created on: 28 Feb 2014
* Author: hieu
*/
#pragma once
#include "OutputFileStream.h"
void CreateGlueGrammar(Moses::OutputFileStream &glueFile);

View File

@ -1,13 +1,17 @@
all: extract
all: extract-mixed-syntax
clean:
rm -f *.o extract-mixed-syntax
.cpp.o:
g++ -O6 -g -c $<
g++ -O4 -g -c -I../../../boost/include -I../../../ $<
extract: tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o
OBJECTS = AlignedSentence.o ConsistentPhrase.o ConsistentPhrases.o InputFileStream.o \
Main.o OutputFileStream.o Parameter.o Phrase.o Rule.o Rules.o RuleSymbol.o \
SyntaxTree.o Word.o NonTerm.o RulePhrase.o AlignedSentenceSyntax.o pugixml.o
g++ tables-core.o extract.o SyntaxTree.o XmlTree.o Tunnel.o Lattice.o LatticeNode.o SentenceAlignment.o Global.o InputFileStream.o TunnelCollection.o RuleCollection.o Rule.o Symbol.o SymbolSequence.o Range.o OutputFileStream.o -lz -lboost_iostreams-mt -o extract-mixed-syntax
extract-mixed-syntax: $(OBJECTS)
g++ $(OBJECTS) -L../../../boost/lib64 -lz -lboost_iostreams-mt -lboost_program_options-mt -o extract-mixed-syntax

View File

@ -0,0 +1,65 @@
/*
* NonTerm.cpp
*
* Created on: 22 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "NonTerm.h"
#include "Word.h"
#include "ConsistentPhrase.h"
#include "Parameter.h"
using namespace std;
NonTerm::NonTerm(const ConsistentPhrase &consistentPhrase,
const std::string &source,
const std::string &target)
:m_consistentPhrase(&consistentPhrase)
,m_source(source)
,m_target(target)
{
// TODO Auto-generated constructor stub
}
NonTerm::~NonTerm() {
// TODO Auto-generated destructor stub
}
std::string NonTerm::Debug() const
{
stringstream out;
out << m_source << m_target;
out << m_consistentPhrase->Debug();
return out.str();
}
void NonTerm::Output(std::ostream &out) const
{
out << m_source << m_target;
}
void NonTerm::Output(std::ostream &out, Moses::FactorDirection direction) const
{
out << GetLabel(direction);
}
const std::string &NonTerm::GetLabel(Moses::FactorDirection direction) const
{
return (direction == Moses::Input) ? m_source : m_target;
}
bool NonTerm::IsHiero(Moses::FactorDirection direction, const Parameter &params) const
{
const std::string &label = NonTerm::GetLabel(direction);
return label == params.hieroNonTerm;
}
bool NonTerm::IsHiero(const Parameter &params) const
{
return IsHiero(Moses::Input, params) && IsHiero(Moses::Output, params);
}
int NonTerm::GetWidth(Moses::FactorDirection direction) const
{ return GetConsistentPhrase().GetWidth(direction); }

View File

@ -0,0 +1,47 @@
/*
* NonTerm.h
*
* Created on: 22 Feb 2014
* Author: hieu
*/
#pragma once
#include <string>
#include "RuleSymbol.h"
#include "moses/TypeDef.h"
class ConsistentPhrase;
class Parameter;
class NonTerm : public RuleSymbol
{
public:
NonTerm(const ConsistentPhrase &consistentPhrase,
const std::string &source,
const std::string &target);
virtual ~NonTerm();
const ConsistentPhrase &GetConsistentPhrase() const
{ return *m_consistentPhrase; }
int GetWidth(Moses::FactorDirection direction) const;
virtual bool IsNonTerm() const
{ return true; }
std::string GetString() const
{ return m_source + m_target; }
virtual std::string Debug() const;
virtual void Output(std::ostream &out) const;
void Output(std::ostream &out, Moses::FactorDirection direction) const;
const std::string &GetLabel(Moses::FactorDirection direction) const;
bool IsHiero(Moses::FactorDirection direction, const Parameter &params) const;
bool IsHiero(const Parameter &params) const;
protected:
const ConsistentPhrase *m_consistentPhrase;
std::string m_source, m_target;
};

View File

@ -0,0 +1,41 @@
/*
* Parameter.cpp
*
* Created on: 17 Feb 2014
* Author: hieu
*/
#include "Parameter.h"
Parameter::Parameter()
:maxSpan(10)
,maxNonTerm(2)
,maxHieroNonTerm(999)
,maxSymbolsTarget(999)
,maxSymbolsSource(5)
,minHoleSource(2)
,sentenceOffset(0)
,nonTermConsecSource(false)
,requireAlignedWord(true)
,fractionalCounting(true)
,gzOutput(false)
,hieroNonTerm("[X]")
,sourceSyntax(false)
,targetSyntax(false)
,mixedSyntaxType(0)
,multiLabel(0)
,nonTermConsecSourceMixed(true)
,hieroSourceLHS(false)
,maxSpanFreeNonTermSource(0)
,nieceTerminal(true)
,maxScope(UNDEFINED)
,spanLength(false)
,nonTermContext(false)
{}
Parameter::~Parameter() {
// TODO Auto-generated destructor stub
}

View File

@ -0,0 +1,51 @@
/*
* Parameter.h
*
* Created on: 17 Feb 2014
* Author: hieu
*/
#pragma once
#include <string>
#include <limits>
#define UNDEFINED std::numeric_limits<int>::max()
class Parameter
{
public:
Parameter();
virtual ~Parameter();
int maxSpan;
int maxNonTerm;
int maxHieroNonTerm;
int maxSymbolsTarget;
int maxSymbolsSource;
int minHoleSource;
long sentenceOffset;
bool nonTermConsecSource;
bool requireAlignedWord;
bool fractionalCounting;
bool gzOutput;
std::string hieroNonTerm;
std::string gluePath;
bool sourceSyntax, targetSyntax;
int mixedSyntaxType, multiLabel;
bool nonTermConsecSourceMixed;
bool hieroSourceLHS;
int maxSpanFreeNonTermSource;
bool nieceTerminal;
int maxScope;
// prperties
bool spanLength;
bool nonTermContext;
};

View File

@ -0,0 +1,14 @@
#include <sstream>
#include "Phrase.h"
std::string Phrase::Debug() const
{
std::stringstream out;
for (size_t i = 0; i < size(); ++i) {
Word &word = *at(i);
out << word.Debug() << " ";
}
return out.str();
}

View File

@ -0,0 +1,19 @@
#pragma once
#include <vector>
#include "Word.h"
// a vector of terminals
class Phrase : public std::vector<Word*>
{
public:
Phrase()
{}
Phrase(size_t size)
:std::vector<Word*>(size)
{}
std::string Debug() const;
};

View File

@ -1,74 +0,0 @@
/*
* Range.cpp
* extract
*
* Created by Hieu Hoang on 22/02/2011.
* Copyright 2011 __MyCompanyName__. All rights reserved.
*
*/
#include "Range.h"
using namespace std;
void Range::Merge(const Range &a, const Range &b)
{
if (a.m_startPos == NOT_FOUND)
{ // get the other regardless
m_startPos = b.m_startPos;
}
else if (b.m_startPos == NOT_FOUND)
{
m_startPos = a.m_startPos;
}
else
{
m_startPos = min(a.m_startPos, b.m_startPos);
}
if (a.m_endPos == NOT_FOUND)
{ // get the other regardless
m_endPos = b.m_endPos;
}
else if (b.m_endPos == NOT_FOUND)
{ // do nothing
m_endPos = a.m_endPos;
}
else
{
m_endPos = max(a.m_endPos, b.m_endPos);
}
}
int Range::Compare(const Range &other) const
{
if (m_startPos < other.m_startPos)
return -1;
else if (m_startPos > other.m_startPos)
return +1;
else if (m_endPos < other.m_endPos)
return -1;
else if (m_endPos > other.m_endPos)
return +1;
return 0;
}
bool Range::Overlap(const Range &other) const
{
if ( other.m_endPos < m_startPos || other.m_startPos > m_endPos)
return false;
return true;
}
std::ostream& operator<<(std::ostream &out, const Range &range)
{
out << "[" << range.m_startPos << "-" << range.m_endPos << "]";
return out;
}

View File

@ -1,57 +0,0 @@
/*
* Range.h
* extract
*
* Created by Hieu Hoang on 22/02/2011.
* Copyright 2011 __MyCompanyName__. All rights reserved.
*
*/
#pragma once
#include <string>
#include <iostream>
#include <limits>
#define NOT_FOUND std::numeric_limits<size_t>::max()
class Range
{
friend std::ostream& operator<<(std::ostream&, const Range&);
size_t m_startPos, m_endPos;
public:
Range()
:m_startPos(NOT_FOUND)
,m_endPos(NOT_FOUND)
{}
Range(const Range &copy)
:m_startPos(copy.m_startPos)
,m_endPos(copy.m_endPos)
{}
Range(size_t startPos, size_t endPos)
:m_startPos(startPos)
,m_endPos(endPos)
{}
size_t GetStartPos() const
{ return m_startPos; }
size_t GetEndPos() const
{ return m_endPos; }
size_t GetWidth() const
{ return m_endPos - m_startPos + 1; }
void SetStartPos(size_t startPos)
{ m_startPos = startPos; }
void SetEndPos(size_t endPos)
{ m_endPos = endPos; }
void Merge(const Range &a, const Range &b);
int Compare(const Range &other) const;
bool Overlap(const Range &other) const;
};

File diff suppressed because it is too large Load Diff

View File

@ -1,96 +1,87 @@
#pragma once
/*
* Rule.h
* extract
*
* Created by Hieu Hoang on 19/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
* Rule.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <vector>
#include <iostream>
#include "LatticeNode.h"
#include "SymbolSequence.h"
#include "Global.h"
#include "Phrase.h"
#include "RulePhrase.h"
#include "moses/TypeDef.h"
class Lattice;
class SentenceAlignment;
class Global;
class RuleCollection;
class SyntaxNode;
class TunnelCollection;
class Range;
class ConsistentPhrase;
class AlignedSentence;
class NonTerm;
class Parameter;
class RuleElement
{
protected:
const LatticeNode *m_latticeNode;
class Rule {
public:
std::pair<size_t, size_t> m_alignmentPos;
RuleElement(const RuleElement &copy);
RuleElement(const LatticeNode &latticeNode)
:m_latticeNode(&latticeNode)
,m_alignmentPos(NOT_FOUND, NOT_FOUND)
{}
typedef std::set<std::pair<int,int> > Alignments;
const LatticeNode &GetLatticeNode() const
{ return *m_latticeNode; }
Rule(const Rule &copy); // do not implement
};
// original rule with no non-term
Rule(const NonTerm &lhsNonTerm, const AlignedSentence &alignedSentence);
class Rule
{
protected:
typedef std::vector<RuleElement> CollType;
CollType m_coll;
const LatticeNode *m_lhs;
SymbolSequence m_source, m_target;
bool IsHole(const TunnelCollection &tunnelColl) const;
bool NonTermOverlap() const;
const LatticeNode &GetLatticeNode(size_t ind) const;
void CreateSymbols(const Global &global, bool &isValid, const SentenceAlignment &sentence);
public:
// init
Rule(const LatticeNode *latticeNode);
// create new rule by appending node to prev rule
Rule(const Rule &prevRule, const LatticeNode *latticeNode);
// create copy with lhs
Rule(const Global &global, bool &isValid, const Rule &copy, const LatticeNode *lhs, const SentenceAlignment &sentence);
// can continue to add to this rule
bool CanRecurse(const Global &global, const TunnelCollection &tunnelColl) const;
// extend a rule, adding 1 new non-term
Rule(const Rule &copy, const NonTerm &nonTerm);
virtual ~Rule();
// can add this to the set of rules
bool IsValid(const Global &global, const TunnelCollection &tunnelColl) const;
bool IsValid() const
{ return m_isValid; }
size_t GetNumSymbols() const;
bool AdjacentDefaultNonTerms() const;
bool MaxNonTerm(const Global &global) const;
bool MoreDefaultNonTermThanTerm() const;
bool SourceHasEdgeDefaultNonTerm() const;
bool CanRecurse() const
{ return m_canRecurse; }
void CreateRules(RuleCollection &rules
, const Lattice &lattice
, const SentenceAlignment &sentence
, const Global &global);
int Compare(const Rule &compare) const;
bool operator<(const Rule &compare) const;
Range GetSourceRange() const;
DEBUG_OUTPUT();
const NonTerm &GetLHS() const
{ return m_lhs; }
void Output(std::ostream &out) const;
void OutputInv(std::ostream &out) const;
const ConsistentPhrase &GetConsistentPhrase() const;
int GetNextSourcePosForNonTerm() const;
void SetCount(float count)
{ m_count = count; }
float GetCount() const
{ return m_count; }
const Alignments &GetAlignments() const
{ return m_alignments; }
std::string Debug() const;
void Output(std::ostream &out, bool forward, const Parameter &params) const;
void Prevalidate(const Parameter &params);
void CreateTarget(const Parameter &params);
const RulePhrase &GetPhrase(Moses::FactorDirection direction) const
{ return (direction == Moses::Input) ? m_source : m_target; }
protected:
const NonTerm &m_lhs;
const AlignedSentence &m_alignedSentence;
RulePhrase m_source, m_target;
float m_count;
Alignments m_alignments;
// in source order
std::vector<const NonTerm*> m_nonterms;
bool m_isValid, m_canRecurse;
void CreateSource();
void CreateAlignments();
void CreateAlignments(int sourcePos, const std::set<const Word *> &targetWords);
void CreateAlignments(int sourcePos, const RuleSymbol *targetSought);
bool ContainTerm(const ConsistentPhrase &cp, const std::set<const Word*> &terms) const;
int CalcScope() const; // not yet correctly calculated
void NonTermContext(size_t ntInd, const ConsistentPhrase &cp, std::ostream &out) const;
};

View File

@ -1,102 +0,0 @@
/*
* RuleCollection.cpp
* extract
*
* Created by Hieu Hoang on 19/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "RuleCollection.h"
#include "Rule.h"
#include "SentenceAlignment.h"
#include "tables-core.h"
#include "Lattice.h"
#include "SyntaxTree.h"
using namespace std;
RuleCollection::~RuleCollection()
{
RemoveAllInColl(m_coll);
}
void RuleCollection::Add(const Global &global, Rule *rule, const SentenceAlignment &sentence)
{
Range spanS = rule->GetSourceRange();
// cartesian product of lhs
Stack nontermNodes = sentence.GetLattice().GetNonTermNode(spanS);
Stack::const_iterator iterStack;
for (iterStack = nontermNodes.begin(); iterStack != nontermNodes.end(); ++iterStack)
{
const LatticeNode &node = **iterStack;
assert(!node.IsTerminal());
bool isValid;
// create rules with LHS
//cerr << "old:" << *rule << endl;
Rule *newRule = new Rule(global, isValid, *rule, &node, sentence);
if (!isValid)
{ // lhs doesn't match non-term spans
delete newRule;
continue;
}
/*
stringstream s;
s << *newRule;
if (s.str().find("Wiederaufnahme der [X] ||| resumption of the [X] ||| ||| 1") == 0)
{
cerr << "READY:" << *newRule << endl;
g_debug = true;
}
else {
g_debug = false;
}
*/
typedef set<const Rule*, CompareRule>::iterator Iterator;
pair<Iterator,bool> ret = m_coll.insert(newRule);
if (ret.second)
{
//cerr << "ACCEPTED:" << *newRule << endl;
//cerr << "";
}
else
{
//cerr << "REJECTED:" << *newRule << endl;
delete newRule;
}
}
delete rule;
}
void RuleCollection::Output(std::ostream &out) const
{
RuleCollection::CollType::const_iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
{
const Rule &rule = **iter;
rule.Output(out);
out << endl;
}
}
void RuleCollection::OutputInv(std::ostream &out) const
{
RuleCollection::CollType::const_iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter)
{
const Rule &rule = **iter;
rule.OutputInv(out);
out << endl;
}
}

View File

@ -1,55 +0,0 @@
#pragma once
/*
* RuleCollection.h
* extract
*
* Created by Hieu Hoang on 19/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <set>
#include <iostream>
#include "Rule.h"
class SentenceAlignment;
// helper for sort. Don't compare default non-terminals
struct CompareRule
{
bool operator() (const Rule *a, const Rule *b)
{
/*
if (g_debug)
{
std::cerr << std::endl << (*a) << std::endl << (*b) << " ";
}
*/
bool ret = (*a) < (*b);
/*
if (g_debug)
{
std::cerr << ret << std::endl;
}
*/
return ret;
}
};
class RuleCollection
{
protected:
typedef std::set<const Rule*, CompareRule> CollType;
CollType m_coll;
public:
~RuleCollection();
void Add(const Global &global, Rule *rule, const SentenceAlignment &sentence);
size_t GetSize() const
{ return m_coll.size(); }
void Output(std::ostream &out) const;
void OutputInv(std::ostream &out) const;
};

View File

@ -0,0 +1,50 @@
/*
* RulePhrase.cpp
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "RulePhrase.h"
#include "RuleSymbol.h"
using namespace std;
extern bool g_debug;
int RulePhrase::Compare(const RulePhrase &other) const
{
if (GetSize() != other.GetSize()) {
return GetSize() < other.GetSize() ? -1 : +1;
}
for (size_t i = 0; i < m_coll.size(); ++i) {
const RuleSymbol &symbol = *m_coll[i];
const RuleSymbol &otherSymbol = *other.m_coll[i];
int compare = symbol.Compare(otherSymbol);
if (compare) {
return compare;
}
}
return 0;
}
void RulePhrase::Output(std::ostream &out) const
{
for (size_t i = 0; i < m_coll.size(); ++i) {
const RuleSymbol &symbol = *m_coll[i];
symbol.Output(out);
out << " ";
}
}
std::string RulePhrase::Debug() const
{
std::stringstream out;
Output(out);
return out.str();
}

View File

@ -0,0 +1,49 @@
/*
* RulePhrase.h
*
* Created on: 26 Feb 2014
* Author: hieu
*/
#ifndef RULEPHRASE_H_
#define RULEPHRASE_H_
#include <vector>
#include <cstddef>
#include <iostream>
class RuleSymbol;
// a phrase of terms and non-terms for 1 side of a rule
class RulePhrase
{
public:
typedef std::vector<const RuleSymbol*> Coll;
Coll m_coll;
size_t GetSize() const
{ return m_coll.size(); }
void Add(const RuleSymbol *symbol)
{
m_coll.push_back(symbol);
}
const RuleSymbol* operator[](size_t index) const {
return m_coll[index];
}
const RuleSymbol* Front() const {
return m_coll.front();
}
const RuleSymbol* Back() const {
return m_coll.back();
}
int Compare(const RulePhrase &other) const;
void Output(std::ostream &out) const;
std::string Debug() const;
};
#endif /* RULEPHRASE_H_ */

View File

@ -0,0 +1,36 @@
/*
* RuleSymbol.cpp
*
* Created on: 21 Feb 2014
* Author: hieu
*/
#include "RuleSymbol.h"
using namespace std;
RuleSymbol::RuleSymbol() {
// TODO Auto-generated constructor stub
}
RuleSymbol::~RuleSymbol() {
// TODO Auto-generated destructor stub
}
int RuleSymbol::Compare(const RuleSymbol &other) const
{
if (IsNonTerm() != other.IsNonTerm()) {
return IsNonTerm() ? -1 : +1;
}
string str = GetString();
string otherStr = other.GetString();
if (str == otherStr) {
return 0;
}
else {
return (str < otherStr) ? -1 : +1;
}
}

View File

@ -0,0 +1,31 @@
/*
* RuleSymbol.h
*
* Created on: 21 Feb 2014
* Author: hieu
*/
#ifndef RULESYMBOL_H_
#define RULESYMBOL_H_
#include <iostream>
#include <string>
// base class - terminal or non-term
class RuleSymbol {
public:
RuleSymbol();
virtual ~RuleSymbol();
virtual bool IsNonTerm() const = 0;
virtual std::string Debug() const = 0;
virtual void Output(std::ostream &out) const = 0;
virtual std::string GetString() const = 0;
int Compare(const RuleSymbol &other) const;
};
#endif /* RULESYMBOL_H_ */

View File

@ -0,0 +1,227 @@
/*
* Rules.cpp
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#include <sstream>
#include "Rules.h"
#include "ConsistentPhrase.h"
#include "ConsistentPhrases.h"
#include "AlignedSentence.h"
#include "Rule.h"
#include "Parameter.h"
#include "moses/Util.h"
using namespace std;
extern bool g_debug;
Rules::Rules(const AlignedSentence &alignedSentence)
:m_alignedSentence(alignedSentence)
{
}
Rules::~Rules() {
Moses::RemoveAllInColl(m_keepRules);
}
void Rules::CreateRules(const ConsistentPhrase &cp,
const Parameter &params)
{
if (params.hieroSourceLHS) {
const NonTerm &nonTerm = cp.GetHieroNonTerm();
CreateRule(nonTerm, params);
}
else {
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
for (size_t i = 0; i < nonTerms.size(); ++i) {
const NonTerm &nonTerm = nonTerms[i];
CreateRule(nonTerm, params);
}
}
}
void Rules::CreateRule(const NonTerm &nonTerm,
const Parameter &params)
{
Rule *rule = new Rule(nonTerm, m_alignedSentence);
rule->Prevalidate(params);
rule->CreateTarget(params);
if (rule->CanRecurse()) {
Extend(*rule, params);
}
if (rule->IsValid()) {
m_keepRules.insert(rule);
}
else {
delete rule;
}
}
void Rules::Extend(const Parameter &params)
{
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
size_t size = m_alignedSentence.GetPhrase(Moses::Input).size();
for (size_t sourceStart = 0; sourceStart < size; ++sourceStart) {
for (size_t sourceEnd = sourceStart; sourceEnd < size; ++sourceEnd) {
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
ConsistentPhrases::Coll::const_iterator iter;
for (iter = cps.begin(); iter != cps.end(); ++iter) {
const ConsistentPhrase &cp = **iter;
CreateRules(cp, params);
}
}
}
}
void Rules::Extend(const Rule &rule, const Parameter &params)
{
const ConsistentPhrases &allCPS = m_alignedSentence.GetConsistentPhrases();
int sourceMin = rule.GetNextSourcePosForNonTerm();
int ruleStart = rule.GetConsistentPhrase().corners[0];
int ruleEnd = rule.GetConsistentPhrase().corners[1];
for (int sourceStart = sourceMin; sourceStart <= ruleEnd; ++sourceStart) {
for (int sourceEnd = sourceStart; sourceEnd <= ruleEnd; ++sourceEnd) {
if (sourceStart == ruleStart && sourceEnd == ruleEnd) {
// don't cover whole rule with 1 non-term
continue;
}
const ConsistentPhrases::Coll &cps = allCPS.GetColl(sourceStart, sourceEnd);
Extend(rule, cps, params);
}
}
}
void Rules::Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter &params)
{
ConsistentPhrases::Coll::const_iterator iter;
for (iter = cps.begin(); iter != cps.end(); ++iter) {
const ConsistentPhrase &cp = **iter;
Extend(rule, cp, params);
}
}
void Rules::Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter &params)
{
const ConsistentPhrase::NonTerms &nonTerms = cp.GetNonTerms();
for (size_t i = 0; i < nonTerms.size(); ++i) {
const NonTerm &nonTerm = nonTerms[i];
Rule *newRule = new Rule(rule, nonTerm);
newRule->Prevalidate(params);
newRule->CreateTarget(params);
if (newRule->CanRecurse()) {
// recursively extend
Extend(*newRule, params);
}
if (newRule->IsValid()) {
m_keepRules.insert(newRule);
}
else {
delete newRule;
}
}
}
std::string Rules::Debug() const
{
stringstream out;
std::set<Rule*>::const_iterator iter;
out << "m_keepRules:" << endl;
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
const Rule &rule = **iter;
out << rule.Debug() << endl;
}
return out.str();
}
void Rules::Output(std::ostream &out, bool forward, const Parameter &params) const
{
std::set<Rule*, CompareRules>::const_iterator iter;
for (iter = m_mergeRules.begin(); iter != m_mergeRules.end(); ++iter) {
const Rule &rule = **iter;
rule.Output(out, forward, params);
out << endl;
}
}
void Rules::Consolidate(const Parameter &params)
{
if (params.fractionalCounting) {
CalcFractionalCount();
}
else {
std::set<Rule*>::iterator iter;
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
Rule &rule = **iter;
rule.SetCount(1);
}
}
MergeRules(params);
}
void Rules::MergeRules(const Parameter &params)
{
typedef std::set<Rule*, CompareRules> MergeRules;
std::set<Rule*>::const_iterator iterOrig;
for (iterOrig = m_keepRules.begin(); iterOrig != m_keepRules.end(); ++iterOrig) {
Rule *origRule = *iterOrig;
pair<MergeRules::iterator, bool> inserted = m_mergeRules.insert(origRule);
if (!inserted.second) {
// already there, just add count
Rule &rule = **inserted.first;
float newCount = rule.GetCount() + origRule->GetCount();
rule.SetCount(newCount);
}
}
}
void Rules::CalcFractionalCount()
{
typedef std::set<Rule*> RuleColl;
typedef std::map<const ConsistentPhrase*, RuleColl> RuleByConsistentPhrase;
RuleByConsistentPhrase allRules;
// sort by source AND target ranges
std::set<Rule*>::const_iterator iter;
for (iter = m_keepRules.begin(); iter != m_keepRules.end(); ++iter) {
Rule *rule = *iter;
const ConsistentPhrase &cp = rule->GetConsistentPhrase();
RuleColl &ruleColl = allRules[&cp];
ruleColl.insert(rule);
}
// fractional count
RuleByConsistentPhrase::iterator iterOuter;
for (iterOuter = allRules.begin(); iterOuter != allRules.end(); ++iterOuter) {
RuleColl &rules = iterOuter->second;
RuleColl::iterator iterInner;
for (iterInner = rules.begin(); iterInner != rules.end(); ++iterInner) {
Rule &rule = **iterInner;
rule.SetCount(1.0f / (float) rules.size());
}
}
}

View File

@ -0,0 +1,72 @@
/*
* Rules.h
*
* Created on: 20 Feb 2014
* Author: hieu
*/
#pragma once
#include <set>
#include <iostream>
#include "ConsistentPhrases.h"
#include "Rule.h"
extern bool g_debug;
class AlignedSentence;
class Parameter;
struct CompareRules {
bool operator()(const Rule *a, const Rule *b)
{
int compare;
compare = a->GetPhrase(Moses::Input).Compare(b->GetPhrase(Moses::Input));
if (compare) return compare < 0;
compare = a->GetPhrase(Moses::Output).Compare(b->GetPhrase(Moses::Output));
if (compare) return compare < 0;
if (a->GetAlignments() != b->GetAlignments()) {
return a->GetAlignments() < b->GetAlignments();
}
if (a->GetLHS().GetString() != b->GetLHS().GetString()) {
return a->GetLHS().GetString() < b->GetLHS().GetString();
}
return false;
}
};
class Rules {
public:
Rules(const AlignedSentence &alignedSentence);
virtual ~Rules();
void Extend(const Parameter &params);
void Consolidate(const Parameter &params);
std::string Debug() const;
void Output(std::ostream &out, bool forward, const Parameter &params) const;
protected:
const AlignedSentence &m_alignedSentence;
std::set<Rule*> m_keepRules;
std::set<Rule*, CompareRules> m_mergeRules;
void Extend(const Rule &rule, const Parameter &params);
void Extend(const Rule &rule, const ConsistentPhrases::Coll &cps, const Parameter &params);
void Extend(const Rule &rule, const ConsistentPhrase &cp, const Parameter &params);
// create original rules
void CreateRules(const ConsistentPhrase &cp,
const Parameter &params);
void CreateRule(const NonTerm &nonTerm,
const Parameter &params);
void MergeRules(const Parameter &params);
void CalcFractionalCount();
};

View File

@ -1,331 +0,0 @@
/*
* SentenceAlignment.cpp
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <set>
#include <map>
#include <sstream>
#include "SentenceAlignment.h"
#include "XmlTree.h"
#include "tables-core.h"
#include "TunnelCollection.h"
#include "Lattice.h"
#include "LatticeNode.h"
using namespace std;
extern std::set< std::string > targetLabelCollection, sourceLabelCollection;
extern std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;
SentenceAlignment::SentenceAlignment()
:m_tunnelCollection(NULL)
,m_lattice(NULL)
{}
SentenceAlignment::~SentenceAlignment()
{
delete m_tunnelCollection;
delete m_lattice;
}
int SentenceAlignment::Create( const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global )
{
// tokenizing English (and potentially extract syntax spans)
if (global.targetSyntax) {
string targetStringCPP = string(targetString);
ProcessAndStripXMLTags( targetStringCPP, targetTree, targetLabelCollection , targetTopLabelCollection );
target = tokenize( targetStringCPP.c_str() );
// cerr << "E: " << targetStringCPP << endl;
}
else {
target = tokenize( targetString.c_str() );
}
// tokenizing source (and potentially extract syntax spans)
if (global.sourceSyntax) {
string sourceStringCPP = string(sourceString);
ProcessAndStripXMLTags( sourceStringCPP, sourceTree, sourceLabelCollection , sourceTopLabelCollection );
source = tokenize( sourceStringCPP.c_str() );
// cerr << "F: " << sourceStringCPP << endl;
}
else {
source = tokenize( sourceString.c_str() );
}
// check if sentences are empty
if (target.size() == 0 || source.size() == 0) {
cerr << "no target (" << target.size() << ") or source (" << source.size() << ") words << end insentence " << sentenceID << endl;
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return 0;
}
// prepare data structures for alignments
for(int i=0; i<source.size(); i++) {
alignedCountS.push_back( 0 );
}
for(int i=0; i<target.size(); i++) {
vector< int > dummy;
alignedToT.push_back( dummy );
}
//InitTightest(m_s2tTightest, source.size());
//InitTightest(m_t2sTightest, target.size());
// reading in alignments
vector<string> alignmentSequence = tokenize( alignmentString.c_str() );
for(int i=0; i<alignmentSequence.size(); i++) {
int s,t;
// cout << "scaning " << alignmentSequence[i].c_str() << endl;
if (! sscanf(alignmentSequence[i].c_str(), "%d-%d", &s, &t)) {
cerr << "WARNING: " << alignmentSequence[i] << " is a bad alignment point in sentence " << sentenceID << endl;
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return 0;
}
// cout << "alignmentSequence[i] " << alignmentSequence[i] << " is " << s << ", " << t << endl;
if (t >= target.size() || s >= source.size()) {
cerr << "WARNING: sentence " << sentenceID << " has alignment point (" << s << ", " << t << ") out of bounds (" << source.size() << ", " << target.size() << ")\n";
cerr << "T: " << targetString << endl << "S: " << sourceString << endl;
return 0;
}
alignedToT[t].push_back( s );
alignedCountS[s]++;
//SetAlignment(s, t);
}
bool mixed = global.mixed;
sourceTree.AddDefaultNonTerms(global.sourceSyntax, mixed, source.size());
targetTree.AddDefaultNonTerms(global.targetSyntax, mixed, target.size());
//CalcTightestSpan(m_s2tTightest);
//CalcTightestSpan(m_t2sTightest);
return 1;
}
/*
void SentenceAlignment::InitTightest(Outer &tightest, size_t len)
{
tightest.resize(len);
for (size_t posOuter = 0; posOuter < len; ++posOuter)
{
Inner &inner = tightest[posOuter];
size_t innerSize = len - posOuter;
inner.resize(innerSize);
}
}
void SentenceAlignment::CalcTightestSpan(Outer &tightest)
{
size_t len = tightest.size();
for (size_t startPos = 0; startPos < len; ++startPos)
{
for (size_t endPos = startPos + 1; endPos < len; ++endPos)
{
const Range &prevRange = GetTightest(tightest, startPos, endPos - 1);
const Range &smallRange = GetTightest(tightest, endPos, endPos);
Range &newRange = GetTightest(tightest, startPos, endPos);
newRange.Merge(prevRange, smallRange);
//cerr << "[" << startPos << "-" << endPos << "] --> [" << newRange.GetStartPos() << "-" << newRange.GetEndPos() << "]";
}
}
}
Range &SentenceAlignment::GetTightest(Outer &tightest, size_t startPos, size_t endPos)
{
assert(endPos < tightest.size());
assert(endPos >= startPos);
Inner &inner = tightest[startPos];
size_t ind = endPos - startPos;
Range &ret = inner[ind];
return ret;
}
void SentenceAlignment::SetAlignment(size_t source, size_t target)
{
SetAlignment(m_s2tTightest, source, target);
SetAlignment(m_t2sTightest, target, source);
}
void SentenceAlignment::SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos)
{
Range &range = GetTightest(tightest, thisPos, thisPos);
if (range.GetStartPos() == NOT_FOUND)
{ // not yet set, do them both
assert(range.GetEndPos() == NOT_FOUND);
range.SetStartPos(thatPos);
range.SetEndPos(thatPos);
}
else
{
assert(range.GetEndPos() != NOT_FOUND);
range.SetStartPos( (range.GetStartPos() > thatPos) ? thatPos : range.GetStartPos() );
range.SetEndPos( (range.GetEndPos() < thatPos) ? thatPos : range.GetEndPos() );
}
}
*/
void SentenceAlignment::FindTunnels(const Global &global )
{
int countT = target.size();
int countS = source.size();
int maxSpan = max(global.maxHoleSpanSourceDefault, global.maxHoleSpanSourceSyntax);
m_tunnelCollection = new TunnelCollection(countS);
m_tunnelCollection->alignedCountS = alignedCountS;
m_tunnelCollection->alignedCountT.resize(alignedToT.size());
for (size_t ind = 0; ind < alignedToT.size(); ind++)
{
m_tunnelCollection->alignedCountT[ind] = alignedToT[ind].size();
}
// phrase repository for creating hiero phrases
// check alignments for target phrase startT...endT
for(int lengthT=1;
lengthT <= maxSpan && lengthT <= countT;
lengthT++) {
for(int startT=0; startT < countT-(lengthT-1); startT++) {
// that's nice to have
int endT = startT + lengthT - 1;
// if there is target side syntax, there has to be a node
if (global.targetSyntax && !targetTree.HasNode(startT,endT))
continue;
// find find aligned source words
// first: find minimum and maximum source word
int minS = 9999;
int maxS = -1;
vector< int > usedS = alignedCountS;
for(int ti=startT;ti<=endT;ti++) {
for(int i=0;i<alignedToT[ti].size();i++) {
int si = alignedToT[ti][i];
// cerr << "point (" << si << ", " << ti << ")\n";
if (si<minS) { minS = si; }
if (si>maxS) { maxS = si; }
usedS[ si ]--;
}
}
// unaligned phrases are not allowed
if( maxS == -1 )
continue;
// source phrase has to be within limits
if( maxS-minS >= maxSpan )
{
continue;
}
// check if source words are aligned to out of bound target words
bool out_of_bounds = false;
for(int si=minS;si<=maxS && !out_of_bounds;si++)
{
if (usedS[si]>0) {
out_of_bounds = true;
}
}
// if out of bound, you gotta go
if (out_of_bounds)
continue;
if (m_tunnelCollection->NumUnalignedWord(1, startT, endT) >= global.maxUnaligned)
continue;
// done with all the checks, lets go over all consistent phrase pairs
// start point of source phrase may retreat over unaligned
for(int startS=minS;
(startS>=0 &&
startS>maxS - maxSpan && // within length limit
(startS==minS || alignedCountS[startS]==0)); // unaligned
startS--)
{
// end point of source phrase may advance over unaligned
for(int endS=maxS;
(endS<countS && endS<startS + maxSpan && // within length limit
(endS==maxS || alignedCountS[endS]==0)); // unaligned
endS++)
{
if (m_tunnelCollection->NumUnalignedWord(0, startS, endS) >= global.maxUnaligned)
continue;
// take note that this is a valid phrase alignment
m_tunnelCollection->Add(startS, endS, startT, endT);
}
}
}
}
//cerr << *tunnelCollection << endl;
}
void SentenceAlignment::CreateLattice(const Global &global)
{
size_t countS = source.size();
m_lattice = new Lattice(countS);
for (size_t startPos = 0; startPos < countS; ++startPos)
{
//cerr << "creating arcs for " << startPos << "=";
m_lattice->CreateArcs(startPos, *m_tunnelCollection, *this, global);
//cerr << LatticeNode::s_count << endl;
}
}
void SentenceAlignment::CreateRules(const Global &global)
{
size_t countS = source.size();
for (size_t startPos = 0; startPos < countS; ++startPos)
{
//cerr << "creating rules for " << startPos << "\n";
m_lattice->CreateRules(startPos, *this, global);
}
}
void OutputSentenceStr(std::ostream &out, const std::vector<std::string> &vec)
{
for (size_t pos = 0; pos < vec.size(); ++pos)
{
out << vec[pos] << " ";
}
}
std::ostream& operator<<(std::ostream &out, const SentenceAlignment &obj)
{
OutputSentenceStr(out, obj.target);
out << " ==> ";
OutputSentenceStr(out, obj.source);
out << endl;
out << *obj.m_tunnelCollection;
if (obj.m_lattice)
out << endl << *obj.m_lattice;
return out;
}

View File

@ -1,69 +0,0 @@
#pragma once
/*
* SentenceAlignment.h
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include <cassert>
#include <iostream>
#include "SyntaxTree.h"
#include "Global.h"
#include "Range.h"
class TunnelCollection;
class Lattice;
class SentenceAlignment
{
friend std::ostream& operator<<(std::ostream&, const SentenceAlignment&);
public:
std::vector<std::string> target;
std::vector<std::string> source;
std::vector<int> alignedCountS;
std::vector< std::vector<int> > alignedToT;
SyntaxTree sourceTree, targetTree;
//typedef std::vector<Range> Inner;
//typedef std::vector<Inner> Outer;
//Outer m_s2tTightest, m_t2sTightest;
SentenceAlignment();
~SentenceAlignment();
int Create(const std::string &targetString, const std::string &sourceString, const std::string &alignmentString, int sentenceID, const Global &global);
// void clear() { delete(alignment); };
void FindTunnels( const Global &global ) ;
void CreateLattice(const Global &global);
void CreateRules(const Global &global);
const TunnelCollection &GetTunnelCollection() const
{
assert(m_tunnelCollection);
return *m_tunnelCollection;
}
const Lattice &GetLattice() const
{
assert(m_lattice);
return *m_lattice;
}
protected:
TunnelCollection *m_tunnelCollection;
Lattice *m_lattice;
/*
void CalcTightestSpan(Outer &tightest);
void InitTightest(Outer &tightest, size_t len);
Range &GetTightest(Outer &tightest, size_t startPos, size_t endPos);
void SetAlignment(size_t source, size_t target);
void SetAlignment(Outer &tightest, size_t thisPos, size_t thatPos);
*/
};

View File

@ -1,101 +0,0 @@
/*
* Symbol.cpp
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <cassert>
#include "Symbol.h"
using namespace std;
Symbol::Symbol(const std::string &label, size_t pos)
:m_label(label)
,m_isTerminal(true)
,m_span(2)
{
m_span[0].first = pos;
}
Symbol::Symbol(const std::string &labelS, const std::string &labelT
, size_t startS, size_t endS
, size_t startT, size_t endT
, bool isSourceSyntax, bool isTargetSyntax)
:m_label(labelS)
,m_labelT(labelT)
,m_isTerminal(false)
,m_span(2)
,m_isSourceSyntax(isSourceSyntax)
,m_isTargetSyntax(isTargetSyntax)
{
m_span[0] = std::pair<size_t, size_t>(startS, endS);
m_span[1] = std::pair<size_t, size_t>(startT, endT);
}
int CompareNonTerm(bool thisIsSyntax, bool otherIsSyntax
, const std::pair<size_t, size_t> &thisSpan, const std::pair<size_t, size_t> &otherSpan
, std::string thisLabel, std::string otherLabel)
{
if (thisIsSyntax != otherIsSyntax)
{ // 1 is [X] & the other is [NP] on the source
return thisIsSyntax ? -1 : +1;
}
assert(thisIsSyntax == otherIsSyntax);
if (thisIsSyntax)
{ // compare span & label
if (thisSpan != otherSpan)
return thisSpan < otherSpan ? -1 : +1;
if (thisLabel != otherLabel)
return thisLabel < otherLabel ? -1 : +1;
}
return 0;
}
int Symbol::Compare(const Symbol &other) const
{
if (m_isTerminal != other.m_isTerminal)
return m_isTerminal ? -1 : +1;
assert(m_isTerminal == other.m_isTerminal);
if (m_isTerminal)
{ // compare labels & pos
if (m_span[0].first != other.m_span[0].first)
return (m_span[0].first < other.m_span[0].first) ? -1 : +1;
if (m_label != other.m_label)
return (m_label < other.m_label) ? -1 : +1;
}
else
{ // non terms
int ret = CompareNonTerm(m_isSourceSyntax, other.m_isSourceSyntax
,m_span[0], other.m_span[0]
,m_label, other.m_label);
if (ret != 0)
return ret;
ret = CompareNonTerm(m_isTargetSyntax, other.m_isTargetSyntax
,m_span[1], other.m_span[1]
,m_label, other.m_label);
if (ret != 0)
return ret;
}
return 0;
}
std::ostream& operator<<(std::ostream &out, const Symbol &obj)
{
if (obj.m_isTerminal)
out << obj.m_label;
else
out << obj.m_label + obj.m_labelT;
return out;
}

View File

@ -1,36 +0,0 @@
#pragma once
/*
* Symbol.h
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <string>
#include <iostream>
#include <vector>
class Symbol
{
friend std::ostream& operator<<(std::ostream &out, const Symbol &obj);
protected:
std::string m_label, m_labelT; // m_labelT only for non-term
std::vector<std::pair<size_t, size_t> > m_span;
bool m_isTerminal, m_isSourceSyntax, m_isTargetSyntax;
public:
// for terminals
Symbol(const std::string &label, size_t pos);
// for non-terminals
Symbol(const std::string &labelS, const std::string &labelT
, size_t startS, size_t endS
, size_t startT, size_t endT
, bool isSourceSyntax, bool isTargetSyntax);
int Compare(const Symbol &other) const;
};

View File

@ -1,56 +0,0 @@
/*
* SymbolSequence.cpp
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <cassert>
#include <sstream>
#include "SymbolSequence.h"
using namespace std;
int SymbolSequence::Compare(const SymbolSequence &other) const
{
int ret;
size_t thisSize = GetSize();
size_t otherSize = other.GetSize();
if (thisSize != otherSize)
{
ret = (thisSize < otherSize) ? -1 : +1;
return ret;
}
else
{
assert(thisSize == otherSize);
for (size_t ind = 0; ind < thisSize; ++ind)
{
const Symbol &thisSymbol = GetSymbol(ind);
const Symbol &otherSymbol = other.GetSymbol(ind);
ret = thisSymbol.Compare(otherSymbol);
if (ret != 0)
{
return ret;
}
}
}
assert(ret == 0);
return ret;
}
std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj)
{
SymbolSequence::CollType::const_iterator iterSymbol;
for (iterSymbol = obj.m_coll.begin(); iterSymbol != obj.m_coll.end(); ++iterSymbol)
{
const Symbol &symbol = *iterSymbol;
out << symbol << " ";
}
return out;
}

View File

@ -1,42 +0,0 @@
#pragma once
/*
* SymbolSequence.h
* extract
*
* Created by Hieu Hoang on 21/07/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <iostream>
#include <vector>
#include "Symbol.h"
class SymbolSequence
{
friend std::ostream& operator<<(std::ostream &out, const SymbolSequence &obj);
protected:
typedef std::vector<Symbol> CollType;
CollType m_coll;
public:
typedef CollType::iterator iterator;
typedef CollType::const_iterator const_iterator;
const_iterator begin() const { return m_coll.begin(); }
const_iterator end() const { return m_coll.end(); }
void Add(const Symbol &symbol)
{
m_coll.push_back(symbol);
}
size_t GetSize() const
{ return m_coll.size(); }
const Symbol &GetSymbol(size_t ind) const
{ return m_coll[ind]; }
void Clear()
{ m_coll.clear(); }
int Compare(const SymbolSequence &other) const;
};

View File

@ -1,245 +1,47 @@
// $Id: SyntaxTree.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <cassert>
#include <iostream>
#include "SyntaxTree.h"
//#include "extract.h"
#include "Global.h"
//extern const Global g_debug;
extern const Global *g_global;
#include "Parameter.h"
using namespace std;
bool SyntaxNode::IsSyntax() const
void SyntaxTree::Add(int startPos, int endPos, const std::string &label, const Parameter &params)
{
bool ret = GetLabel() != "[X]";
return ret;
}
//cerr << "add " << label << " to " << "[" << startPos << "-" << endPos << "]" << endl;
SyntaxTree::SyntaxTree()
:m_defaultLHS(0,0, "[X]")
{
m_emptyNode.clear();
}
Range range(startPos, endPos);
Labels &labels = m_coll[range];
SyntaxTree::~SyntaxTree()
{
// loop through all m_nodes, delete them
for(int i=0; i<m_nodes.size(); i++)
{
delete m_nodes[i];
}
}
bool HasDuplicates(const SyntaxNodes &nodes)
{
string prevLabel;
SyntaxNodes::const_iterator iter;
for (iter = nodes.begin(); iter != nodes.end(); ++iter)
{
const SyntaxNode &node = **iter;
string label = node.GetLabel();
if (label == prevLabel)
return true;
}
return false;
}
void SyntaxTree::AddNode( int startPos, int endPos, std::string label )
{
SyntaxNode* newNode = new SyntaxNode( startPos, endPos, "[" + label + "]");
m_nodes.push_back( newNode );
SyntaxNodes &nodesChart = m_index[ startPos ][ endPos ];
if (!g_global->uppermostOnly)
{
nodesChart.push_back( newNode );
//assert(!HasDuplicates(m_index[ startPos ][ endPos ]));
}
else
{
if (nodesChart.size() > 0)
{
assert(nodesChart.size() == 1);
//delete nodes[0];
nodesChart.resize(0);
bool add = true;
if (labels.size()) {
if (params.multiLabel == 1) {
// delete the label in collection and add new
assert(labels.size() == 1);
labels.clear();
}
assert(nodesChart.size() == 0);
nodesChart.push_back( newNode );
}
}
ParentNodes SyntaxTree::Parse() {
ParentNodes parents;
int size = m_index.size();
// looping through all spans of size >= 2
for( int length=2; length<=size; length++ )
{
for( int startPos = 0; startPos <= size-length; startPos++ )
{
if (HasNode( startPos, startPos+length-1 ))
{
// processing one (parent) span
//std::cerr << "# " << startPos << "-" << (startPos+length-1) << ":";
SplitPoints splitPoints;
splitPoints.push_back( startPos );
//std::cerr << " " << startPos;
int first = 1;
int covered = 0;
while( covered < length )
{
// find largest covering subspan (child)
// starting at last covered position
for( int midPos=length-first; midPos>covered; midPos-- )
{
if( HasNode( startPos+covered, startPos+midPos-1 ) )
{
covered = midPos;
splitPoints.push_back( startPos+covered );
// std::cerr << " " << ( startPos+covered );
first = 0;
}
}
}
// std::cerr << std::endl;
parents.push_back( splitPoints );
}
else if (params.multiLabel == 2) {
// ignore this label
add = false;
}
}
return parents;
}
bool SyntaxTree::HasNode( int startPos, int endPos ) const
{
return GetNodes( startPos, endPos).size() > 0;
}
const SyntaxNodes &SyntaxTree::GetNodes( int startPos, int endPos ) const
{
SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
if (startIndex == m_index.end() )
return m_emptyNode;
SyntaxTreeIndexIterator2 endIndex = startIndex->second.find( endPos );
if (endIndex == startIndex->second.end())
return m_emptyNode;
return endIndex->second;
}
// for printing out tree
std::string SyntaxTree::ToString() const
{
std::stringstream out;
out << *this;
return out.str();
}
void SyntaxTree::AddDefaultNonTerms(size_t phraseSize)
{
for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
{
for (size_t endPos = startPos; endPos < phraseSize; ++endPos)
{
AddNode(startPos, endPos, "X");
}
if (add) {
labels.push_back(label);
}
}
void SyntaxTree::AddDefaultNonTerms(bool isSyntax, bool mixed, size_t phraseSize)
void SyntaxTree::AddToAll(const std::string &label)
{
if (isSyntax)
{
AddDefaultNonTerms(!mixed, phraseSize);
}
else
{ // add X everywhere
AddDefaultNonTerms(phraseSize);
Coll::iterator iter;
for (iter = m_coll.begin(); iter != m_coll.end(); ++iter) {
Labels &labels = iter->second;
labels.push_back(label);
}
}
void SyntaxTree::AddDefaultNonTerms(bool addEverywhere, size_t phraseSize)
const SyntaxTree::Labels &SyntaxTree::Find(int startPos, int endPos) const
{
//cerr << "GetNumWords()=" << GetNumWords() << endl;
//assert(phraseSize == GetNumWords() || GetNumWords() == 1); // 1 if syntax sentence doesn't have any xml. TODO fix syntax tree obj
for (size_t startPos = 0; startPos <= phraseSize; ++startPos)
{
for (size_t endPos = startPos; endPos <= phraseSize; ++endPos)
{
const SyntaxNodes &nodes = GetNodes(startPos, endPos);
if (!addEverywhere && nodes.size() > 0)
{ // only add if no label
continue;
}
AddNode(startPos, endPos, "X");
}
}
Coll::const_iterator iter;
iter = m_coll.find(Range(startPos, endPos));
return (iter == m_coll.end()) ? m_defaultLabels : iter->second;
}
const SyntaxNodes SyntaxTree::GetNodesForLHS( int startPos, int endPos ) const
{
SyntaxNodes ret(GetNodes(startPos, endPos));
if (ret.size() == 0)
ret.push_back(&m_defaultLHS);
return ret;
}
std::ostream& operator<<(std::ostream& os, const SyntaxTree& t)
{
int size = t.m_index.size();
for(size_t length=1; length<=size; length++)
{
for(size_t space=0; space<length; space++)
{
os << " ";
}
for(size_t start=0; start<=size-length; start++)
{
if (t.HasNode( start, start+(length-1) ))
{
std::string label = t.GetNodes( start, start+(length-1) )[0]->GetLabel() + "#######";
os << label.substr(0,7) << " ";
}
else
{
os << "------- ";
}
}
os << std::endl;
}
return os;
}

View File

@ -1,96 +1,32 @@
#pragma once
// $Id: SyntaxTree.h 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <string>
class SyntaxNode;
class Parameter;
typedef std::vector<const SyntaxNode*> SyntaxNodes;
class SyntaxNode {
protected:
int m_start, m_end;
std::string m_label;
SyntaxNodes m_children;
SyntaxNode* m_parent;
class SyntaxTree
{
public:
SyntaxNode( int startPos, int endPos, const std::string &label)
:m_start(startPos)
,m_end(endPos)
,m_label(label)
{}
int GetStart() const
{ return m_start; }
int GetEnd() const
{ return m_end; }
const std::string &GetLabel() const
{ return m_label; }
bool IsSyntax() const;
typedef std::pair<int, int> Range;
typedef std::vector<std::string> Labels;
typedef std::map<Range, Labels> Coll;
void Add(int startPos, int endPos, const std::string &label, const Parameter &params);
void AddToAll(const std::string &label);
const Labels &Find(int startPos, int endPos) const;
void SetHieroLabel(const std::string &label) {
m_defaultLabels.push_back(label);
}
protected:
Coll m_coll;
Labels m_defaultLabels;
};
typedef std::vector< int > SplitPoints;
typedef std::vector< SplitPoints > ParentNodes;
class SyntaxTree {
protected:
SyntaxNodes m_nodes;
SyntaxNode* m_top;
SyntaxNode m_defaultLHS;
typedef std::map< int, SyntaxNodes > SyntaxTreeIndex2;
typedef SyntaxTreeIndex2::const_iterator SyntaxTreeIndexIterator2;
typedef std::map< int, SyntaxTreeIndex2 > SyntaxTreeIndex;
typedef SyntaxTreeIndex::const_iterator SyntaxTreeIndexIterator;
SyntaxTreeIndex m_index;
SyntaxNodes m_emptyNode;
friend std::ostream& operator<<(std::ostream&, const SyntaxTree&);
public:
SyntaxTree();
~SyntaxTree();
void AddNode( int startPos, int endPos, std::string label );
ParentNodes Parse();
bool HasNode( int startPos, int endPos ) const;
const SyntaxNodes &GetNodes( int startPos, int endPos ) const;
const SyntaxNodes &GetAllNodes() const { return m_nodes; } ;
size_t GetNumWords() const { return m_index.size(); }
std::string ToString() const;
void AddDefaultNonTerms(bool isSyntax, bool addEverywhere, size_t phraseSize);
void AddDefaultNonTerms(bool mixed, size_t phraseSize);
void AddDefaultNonTerms(size_t phraseSize);
const SyntaxNodes GetNodesForLHS( int startPos, int endPos ) const;
};
std::ostream& operator<<(std::ostream&, const SyntaxTree&);

View File

@ -1,38 +0,0 @@
/*
* Tunnel.cpp
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "Tunnel.h"
int Tunnel::Compare(const Tunnel &other) const
{
int ret = m_sourceRange.Compare(other.m_sourceRange);
if (ret != 0)
return ret;
ret = m_targetRange.Compare(other.m_targetRange);
return ret;
}
int Tunnel::Compare(const Tunnel &other, size_t direction) const
{
const Range &thisRange = (direction == 0) ? m_sourceRange : m_targetRange;
const Range &otherRange = (direction == 0) ? other.m_sourceRange : other.m_targetRange;
int ret = thisRange.Compare(otherRange);
return ret;
}
std::ostream& operator<<(std::ostream &out, const Tunnel &tunnel)
{
out << tunnel.m_sourceRange << "==>" << tunnel.m_targetRange;
return out;
}

View File

@ -1,49 +0,0 @@
#pragma once
/*
* Tunnel.h
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include <cassert>
#include <string>
#include <iostream>
#include "Range.h"
// for unaligned source terminal
class Tunnel
{
friend std::ostream& operator<<(std::ostream&, const Tunnel&);
protected:
Range m_sourceRange, m_targetRange;
public:
Tunnel()
{}
Tunnel(const Tunnel &copy)
:m_sourceRange(copy.m_sourceRange)
,m_targetRange(copy.m_targetRange)
{}
Tunnel(const Range &sourceRange, const Range &targetRange)
:m_sourceRange(sourceRange)
,m_targetRange(targetRange)
{}
const Range &GetRange(size_t direction) const
{ return (direction == 0) ? m_sourceRange : m_targetRange; }
int Compare(const Tunnel &other) const;
int Compare(const Tunnel &other, size_t direction) const;
};
typedef std::vector<Tunnel> TunnelList;

View File

@ -1,70 +0,0 @@
/*
* TunnelCollection.cpp
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include "TunnelCollection.h"
#include "Range.h"
using namespace std;
size_t TunnelCollection::NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const
{
assert(startPos <= endPos);
if (direction == 0)
assert(endPos < alignedCountS.size());
else
assert(endPos < alignedCountT.size());
size_t ret = 0;
for (size_t ind = startPos; ind <= endPos; ++ind)
{
if (direction == 0 && alignedCountS[ind] == 0)
{
ret++;
}
else if (direction == 1 && alignedCountT[ind] == 0)
{
ret++;
}
}
return ret;
}
void TunnelCollection::Add(int startS, int endS, int startT, int endT)
{
// m_phraseExist[startS][endS - startS].push_back(Tunnel(startT, endT));
m_coll[startS][endS - startS].push_back(Tunnel(Range(startS, endS), Range(startT, endT)));
}
std::ostream& operator<<(std::ostream &out, const TunnelCollection &TunnelCollection)
{
size_t size = TunnelCollection.GetSize();
for (size_t startPos = 0; startPos < size; ++startPos)
{
for (size_t endPos = startPos; endPos < size; ++endPos)
{
const TunnelList &tunnelList = TunnelCollection.GetTunnels(startPos, endPos);
TunnelList::const_iterator iter;
for (iter = tunnelList.begin(); iter != tunnelList.end(); ++iter)
{
const Tunnel &tunnel = *iter;
out << tunnel << " ";
}
}
}
return out;
}

View File

@ -1,61 +0,0 @@
#pragma once
/*
* TunnelCollection.h
* extract
*
* Created by Hieu Hoang on 19/01/2010.
* Copyright 2010 __MyCompanyName__. All rights reserved.
*
*/
#include <vector>
#include "Tunnel.h"
// reposity of extracted phrase pairs
// which are potential tunnels in larger phrase pairs
class TunnelCollection
{
friend std::ostream& operator<<(std::ostream&, const TunnelCollection&);
protected:
std::vector< std::vector<TunnelList> > m_coll;
// indexed by source pos. and source length
// maps to list of tunnels where <int, int> are target pos
public:
std::vector<int> alignedCountS, alignedCountT;
TunnelCollection(const TunnelCollection &);
TunnelCollection(size_t size)
:m_coll(size)
{
// size is the length of the source sentence
for (size_t pos = 0; pos < size; ++pos)
{
// create empty tunnel lists
std::vector<TunnelList> &endVec = m_coll[pos];
endVec.resize(size - pos);
}
}
void Add(int startS, int endS, int startT, int endT);
//const TunnelList &GetTargetHoles(int startS, int endS) const
//{
// const TunnelList &targetHoles = m_phraseExist[startS][endS - startS];
// return targetHoles;
//}
const TunnelList &GetTunnels(int startS, int endS) const
{
const TunnelList &sourceHoles = m_coll[startS][endS - startS];
return sourceHoles;
}
const size_t GetSize() const
{ return m_coll.size(); }
size_t NumUnalignedWord(size_t direction, size_t startPos, size_t endPos) const;
};

View File

@ -0,0 +1,56 @@
/*
* Word.cpp
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#include <limits>
#include "Word.h"
using namespace std;
Word::Word(int pos, const std::string &str)
:m_pos(pos)
,m_str(str)
{
// TODO Auto-generated constructor stub
}
Word::~Word() {
// TODO Auto-generated destructor stub
}
void Word::AddAlignment(const Word *other)
{
m_alignment.insert(other);
}
std::set<int> Word::GetAlignmentIndex() const
{
std::set<int> ret;
std::set<const Word *>::const_iterator iter;
for (iter = m_alignment.begin(); iter != m_alignment.end(); ++iter) {
const Word &otherWord = **iter;
int otherPos = otherWord.GetPos();
ret.insert(otherPos);
}
return ret;
}
void Word::Output(std::ostream &out) const
{
out << m_str;
}
std::string Word::Debug() const
{
return m_str;
}
int Word::CompareString(const Word &other) const
{
return m_str.compare(other.m_str);
}

View File

@ -0,0 +1,47 @@
/*
* Word.h
*
* Created on: 18 Feb 2014
* Author: s0565741
*/
#pragma once
#include <string>
#include <set>
#include "RuleSymbol.h"
// a terminal
class Word : public RuleSymbol
{
public:
Word(const Word&); // do not implement
Word(int pos, const std::string &str);
virtual ~Word();
virtual bool IsNonTerm() const
{ return false; }
std::string GetString() const
{ return m_str; }
int GetPos() const
{ return m_pos; }
void AddAlignment(const Word *other);
const std::set<const Word *> &GetAlignment() const
{ return m_alignment; }
std::set<int> GetAlignmentIndex() const;
void Output(std::ostream &out) const;
std::string Debug() const;
int CompareString(const Word &other) const;
protected:
int m_pos; // original position in sentence, NOT in lattice
std::string m_str;
std::set<const Word *> m_alignment;
};

View File

@ -1,344 +0,0 @@
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <vector>
#include <string>
#include <set>
#include <iostream>
#include <stdlib.h>
#include "SyntaxTree.h"
using namespace std;
inline std::vector<std::string> Tokenize(const std::string& str,
const std::string& delimiters = " \t")
{
std::vector<std::string> tokens;
// Skip delimiters at beginning.
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
// Find first "non-delimiter".
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
while (std::string::npos != pos || std::string::npos != lastPos)
{
// Found a token, add it to the vector.
tokens.push_back(str.substr(lastPos, pos - lastPos));
// Skip delimiters. Note the "not_of"
lastPos = str.find_first_not_of(delimiters, pos);
// Find next "non-delimiter"
pos = str.find_first_of(delimiters, lastPos);
}
return tokens;
}
const std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r")
{
std::string res = str;
res.erase(str.find_last_not_of(dropChars)+1);
return res.erase(0, res.find_first_not_of(dropChars));
}
string ParseXmlTagAttribute(const string& tag,const string& attributeName){
/*TODO deal with unescaping \"*/
string tagOpen = attributeName + "=\"";
size_t contentsStart = tag.find(tagOpen);
if (contentsStart == string::npos) return "";
contentsStart += tagOpen.size();
size_t contentsEnd = tag.find_first_of('"',contentsStart+1);
if (contentsEnd == string::npos) {
cerr << "Malformed XML attribute: "<< tag;
return "";
}
size_t possibleEnd;
while (tag.at(contentsEnd-1) == '\\' && (possibleEnd = tag.find_first_of('"',contentsEnd+1)) != string::npos) {
contentsEnd = possibleEnd;
}
return tag.substr(contentsStart,contentsEnd-contentsStart);
}
/**
* Remove "<" and ">" from XML tag
*
* \param str xml token to be stripped
*/
string TrimXml(const string& str)
{
// too short to be xml token -> do nothing
if (str.size() < 2) return str;
// strip first and last character
if (str[0] == '<' && str[str.size() - 1] == '>')
{
return str.substr(1, str.size() - 2);
}
// not an xml token -> do nothing
else { return str; }
}
/**
* Check if the token is an XML tag, i.e. starts with "<"
*
* \param tag token to be checked
*/
bool isXmlTag(const string& tag)
{
return tag[0] == '<';
}
/**
* Split up the input character string into tokens made up of
* either XML tags or text.
* example: this <b> is a </b> test .
* => (this ), (<b>), ( is a ), (</b>), ( test .)
*
* \param str input string
*/
inline vector<string> TokenizeXml(const string& str)
{
string lbrack = "<";
string rbrack = ">";
vector<string> tokens; // vector of tokens to be returned
string::size_type cpos = 0; // current position in string
string::size_type lpos = 0; // left start of xml tag
string::size_type rpos = 0; // right end of xml tag
// walk thorugh the string (loop vver cpos)
while (cpos != str.size())
{
// find the next opening "<" of an xml tag
lpos = str.find_first_of(lbrack, cpos);
if (lpos != string::npos)
{
// find the end of the xml tag
rpos = str.find_first_of(rbrack, lpos);
// sanity check: there has to be closing ">"
if (rpos == string::npos)
{
cerr << "ERROR: malformed XML: " << str << endl;
return tokens;
}
}
else // no more tags found
{
// add the rest as token
tokens.push_back(str.substr(cpos));
break;
}
// add stuff before xml tag as token, if there is any
if (lpos - cpos > 0)
tokens.push_back(str.substr(cpos, lpos - cpos));
// add xml tag as token
tokens.push_back(str.substr(lpos, rpos-lpos+1));
cpos = rpos + 1;
}
return tokens;
}
/**
* Process a sentence with xml annotation
* Xml tags may specifiy additional/replacing translation options
* and reordering constraints
*
* \param line in: sentence, out: sentence without the xml
* \param res vector with translation options specified by xml
* \param reorderingConstraint reordering constraint zones specified by xml
* \param walls reordering constraint walls specified by xml
*/
/*TODO: we'd only have to return a vector of XML options if we dropped linking. 2-d vector
is so we can link things up afterwards. We can't create TranslationOptions as we
parse because we don't have the completed source parsed until after this function
removes all the markup from it (CreateFromString in Sentence::Read).
*/
bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &labelCollection, map< string, int > &topLabelCollection ) {
//parse XML markup in translation line
// no xml tag? we're done.
if (line.find_first_of('<') == string::npos) { return true; }
// break up input into a vector of xml tags and text
// example: (this), (<b>), (is a), (</b>), (test .)
vector<string> xmlTokens = TokenizeXml(line);
// we need to store opened tags, until they are closed
// tags are stored as tripled (tagname, startpos, contents)
typedef pair< string, pair< size_t, string > > OpenedTag;
vector< OpenedTag > tagStack; // stack that contains active opened tags
string cleanLine; // return string (text without xml)
size_t wordPos = 0; // position in sentence (in terms of number of words)
bool isLinked = false;
// loop through the tokens
for (size_t xmlTokenPos = 0 ; xmlTokenPos < xmlTokens.size() ; xmlTokenPos++)
{
// not a xml tag, but regular text (may contain many words)
if(!isXmlTag(xmlTokens[xmlTokenPos]))
{
// add a space at boundary, if necessary
if (cleanLine.size()>0 &&
cleanLine[cleanLine.size() - 1] != ' ' &&
xmlTokens[xmlTokenPos][0] != ' ')
{
cleanLine += " ";
}
cleanLine += xmlTokens[xmlTokenPos]; // add to output
wordPos = Tokenize(cleanLine).size(); // count all the words
}
// process xml tag
else
{
// *** get essential information about tag ***
// strip extra boundary spaces and "<" and ">"
string tag = Trim(TrimXml(xmlTokens[xmlTokenPos]));
// cerr << "XML TAG IS: " << tag << std::endl;
if (tag.size() == 0)
{
cerr << "ERROR: empty tag name: " << line << endl;
return false;
}
// check if unary (e.g., "<wall/>")
bool isUnary = ( tag[tag.size() - 1] == '/' );
// check if opening tag (e.g. "<a>", not "</a>")g
bool isClosed = ( tag[0] == '/' );
bool isOpen = !isClosed;
if (isClosed && isUnary)
{
cerr << "ERROR: can't have both closed and unary tag <" << tag << ">: " << line << endl;
return false;
}
if (isClosed)
tag = tag.substr(1); // remove "/" at the beginning
if (isUnary)
tag = tag.substr(0,tag.size()-1); // remove "/" at the end
// find the tag name and contents
string::size_type endOfName = tag.find_first_of(' ');
string tagName = tag;
string tagContent = "";
if (endOfName != string::npos) {
tagName = tag.substr(0,endOfName);
tagContent = tag.substr(endOfName+1);
}
// *** process new tag ***
if (isOpen || isUnary)
{
// put the tag on the tag stack
OpenedTag openedTag = make_pair( tagName, make_pair( wordPos, tagContent ) );
tagStack.push_back( openedTag );
// cerr << "XML TAG " << tagName << " (" << tagContent << ") added to stack, now size " << tagStack.size() << endl;
}
// *** process completed tag ***
if (isClosed || isUnary)
{
// pop last opened tag from stack;
if (tagStack.size() == 0)
{
cerr << "ERROR: tag " << tagName << " closed, but not opened" << ":" << line << endl;
return false;
}
OpenedTag openedTag = tagStack.back();
tagStack.pop_back();
// tag names have to match
if (openedTag.first != tagName)
{
cerr << "ERROR: tag " << openedTag.first << " closed by tag " << tagName << ": " << line << endl;
return false;
}
// assemble remaining information about tag
size_t startPos = openedTag.second.first;
string tagContent = openedTag.second.second;
size_t endPos = wordPos;
// span attribute overwrites position
string span = ParseXmlTagAttribute(tagContent,"span");
if (! span.empty())
{
vector<string> ij = Tokenize(span, "-");
if (ij.size() != 1 && ij.size() != 2) {
cerr << "ERROR: span attribute must be of the form \"i-j\" or \"i\": " << line << endl;
return false;
}
startPos = atoi(ij[0].c_str());
if (ij.size() == 1) endPos = startPos + 1;
else endPos = atoi(ij[1].c_str()) + 1;
}
// cerr << "XML TAG " << tagName << " (" << tagContent << ") spanning " << startPos << " to " << (endPos-1) << " complete, commence processing" << endl;
if (startPos >= endPos)
{
cerr << "ERROR: tag " << tagName << " must span at least one word (" << startPos << "-" << endPos << "): " << line << endl;
return false;
}
string label = ParseXmlTagAttribute(tagContent,"label");
labelCollection.insert( label );
// report what we have processed so far
if (0) {
cerr << "XML TAG NAME IS: '" << tagName << "'" << endl;
cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
}
tree.AddNode( startPos, endPos-1, label );
}
}
}
// we are done. check if there are tags that are still open
if (tagStack.size() > 0)
{
cerr << "ERROR: some opened tags were never closed: " << line << endl;
return false;
}
// collect top labels
const SyntaxNodes &topNodes = tree.GetNodes( 0, wordPos-1 );
for( SyntaxNodes::const_iterator node = topNodes.begin(); node != topNodes.end(); node++ )
{
const SyntaxNode *n = *node;
const string &label = n->GetLabel();
if (topLabelCollection.find( label ) == topLabelCollection.end())
topLabelCollection[ label ] = 0;
topLabelCollection[ label ]++;
}
// return de-xml'ed sentence in line
line = cleanLine;
return true;
}

View File

@ -1,35 +0,0 @@
#pragma once
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <string>
#include <vector>
#include <set>
#include <map>
#include "SyntaxTree.h"
std::string ParseXmlTagAttribute(const std::string& tag,const std::string& attributeName);
std::string TrimXml(const std::string& str);
bool isXmlTag(const std::string& tag);
inline std::vector<std::string> TokenizeXml(const std::string& str);
bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection );

View File

@ -1,310 +0,0 @@
// $Id: extract.cpp 2828 2010-02-01 16:07:58Z hieuhoang1972 $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <cstdio>
#include <stdlib.h>
#include <assert.h>
#include <time.h>
#include <cstring>
#include <sstream>
#include <iostream>
#include "extract.h"
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "Lattice.h"
#ifdef WIN32
// Include Visual Leak Detector
#include <vld.h>
#endif
using namespace std;
void writeGlueGrammar(const string &, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection);
int main(int argc, char* argv[])
{
cerr << "Extract v2.0, written by Philipp Koehn\n"
<< "rule extraction from an aligned parallel corpus\n";
//time_t starttime = time(NULL);
Global *global = new Global();
g_global = global;
int sentenceOffset = 0;
if (argc < 5) {
cerr << "syntax: extract-mixed-syntax corpus.target corpus.source corpus.align extract "
<< " [ --Hierarchical | --Orientation"
<< " | --GlueGrammar FILE | --UnknownWordLabel FILE"
<< " | --OnlyDirect"
<< " | --MinHoleSpanSourceDefault[" << global->minHoleSpanSourceDefault << "]"
<< " | --MaxHoleSpanSourceDefault[" << global->maxHoleSpanSourceDefault << "]"
<< " | --MinHoleSpanSourceSyntax[" << global->minHoleSpanSourceSyntax << "]"
<< " | --MaxHoleSpanSourceSyntax[" << global->maxHoleSpanSourceSyntax << "]"
<< " | --MaxSymbols[" << global->maxSymbols<< "]"
<< " | --MaxNonTerm[" << global->maxNonTerm << "]"
<< " | --SourceSyntax | --TargetSyntax"
<< " | --UppermostOnly[" << g_global->uppermostOnly << "]"
<< endl;
exit(1);
}
char* &fileNameT = argv[1];
char* &fileNameS = argv[2];
char* &fileNameA = argv[3];
string fileNameGlueGrammar;
string fileNameUnknownWordLabel;
string fileNameExtract = string(argv[4]);
int optionInd = 5;
for(int i=optionInd;i<argc;i++)
{
if (strcmp(argv[i],"--MinHoleSpanSourceDefault") == 0) {
global->minHoleSpanSourceDefault = atoi(argv[++i]);
if (global->minHoleSpanSourceDefault < 1) {
cerr << "extract error: --minHoleSourceDefault should be at least 1" << endl;
exit(1);
}
}
else if (strcmp(argv[i],"--MaxHoleSpanSourceDefault") == 0) {
global->maxHoleSpanSourceDefault = atoi(argv[++i]);
if (global->maxHoleSpanSourceDefault < 1) {
cerr << "extract error: --maxHoleSourceDefault should be at least 1" << endl;
exit(1);
}
}
else if (strcmp(argv[i],"--MinHoleSpanSourceSyntax") == 0) {
global->minHoleSpanSourceSyntax = atoi(argv[++i]);
if (global->minHoleSpanSourceSyntax < 1) {
cerr << "extract error: --minHoleSourceSyntax should be at least 1" << endl;
exit(1);
}
}
else if (strcmp(argv[i],"--UppermostOnly") == 0) {
global->uppermostOnly = atoi(argv[++i]);
}
else if (strcmp(argv[i],"--MaxHoleSpanSourceSyntax") == 0) {
global->maxHoleSpanSourceSyntax = atoi(argv[++i]);
if (global->maxHoleSpanSourceSyntax < 1) {
cerr << "extract error: --maxHoleSourceSyntax should be at least 1" << endl;
exit(1);
}
}
// maximum number of words in hierarchical phrase
else if (strcmp(argv[i],"--maxSymbols") == 0) {
global->maxSymbols = atoi(argv[++i]);
if (global->maxSymbols < 1) {
cerr << "extract error: --maxSymbols should be at least 1" << endl;
exit(1);
}
}
// maximum number of non-terminals
else if (strcmp(argv[i],"--MaxNonTerm") == 0) {
global->maxNonTerm = atoi(argv[++i]);
if (global->maxNonTerm < 1) {
cerr << "extract error: --MaxNonTerm should be at least 1" << endl;
exit(1);
}
}
// allow consecutive non-terminals (X Y | X Y)
else if (strcmp(argv[i],"--TargetSyntax") == 0) {
global->targetSyntax = true;
}
else if (strcmp(argv[i],"--SourceSyntax") == 0) {
global->sourceSyntax = true;
}
// do not create many part00xx files!
else if (strcmp(argv[i],"--NoFileLimit") == 0) {
// now default
}
else if (strcmp(argv[i],"--GlueGrammar") == 0) {
global->glueGrammarFlag = true;
if (++i >= argc)
{
cerr << "ERROR: Option --GlueGrammar requires a file name" << endl;
exit(0);
}
fileNameGlueGrammar = string(argv[i]);
cerr << "creating glue grammar in '" << fileNameGlueGrammar << "'" << endl;
}
else if (strcmp(argv[i],"--UnknownWordLabel") == 0) {
global->unknownWordLabelFlag = true;
if (++i >= argc)
{
cerr << "ERROR: Option --UnknownWordLabel requires a file name" << endl;
exit(0);
}
fileNameUnknownWordLabel = string(argv[i]);
cerr << "creating unknown word labels in '" << fileNameUnknownWordLabel << "'" << endl;
}
// TODO: this should be a useful option
//else if (strcmp(argv[i],"--ZipFiles") == 0) {
// zipFiles = true;
//}
// if an source phrase is paired with two target phrases, then count(t|s) = 0.5
else if (strcmp(argv[i],"--Mixed") == 0) {
global->mixed = true;
}
else if (strcmp(argv[i],"--AllowDefaultNonTermEdge") == 0) {
global->allowDefaultNonTermEdge = atoi(argv[++i]);
}
else if (strcmp(argv[i], "--GZOutput") == 0) {
global->gzOutput = true;
}
else if (strcmp(argv[i],"--MaxSpan") == 0) {
// ignore
++i;
}
else if (strcmp(argv[i],"--SentenceOffset") == 0) {
if (i+1 >= argc || argv[i+1][0] < '0' || argv[i+1][0] > '9') {
cerr << "extract: syntax error, used switch --SentenceOffset without a number" << endl;
exit(1);
}
sentenceOffset = atoi(argv[++i]);
}
else {
cerr << "extract: syntax error, unknown option '" << string(argv[i]) << "'\n";
exit(1);
}
}
// open input files
Moses::InputFileStream tFile(fileNameT);
Moses::InputFileStream sFile(fileNameS);
Moses::InputFileStream aFile(fileNameA);
// open output files
string fileNameExtractInv = fileNameExtract + ".inv";
if (global->gzOutput) {
fileNameExtract += ".gz";
fileNameExtractInv += ".gz";
}
Moses::OutputFileStream extractFile;
Moses::OutputFileStream extractFileInv;
extractFile.Open(fileNameExtract.c_str());
extractFileInv.Open(fileNameExtractInv.c_str());
// loop through all sentence pairs
int i = sentenceOffset;
while(true) {
i++;
if (i % 1000 == 0) {
cerr << i << " " << flush;
}
string targetString;
string sourceString;
string alignmentString;
bool ok = getline(tFile, targetString);
if (!ok)
break;
getline(sFile, sourceString);
getline(aFile, alignmentString);
//cerr << endl << targetString << endl << sourceString << endl << alignmentString << endl;
//time_t currTime = time(NULL);
//cerr << "A " << (currTime - starttime) << endl;
SentenceAlignment sentencePair;
if (sentencePair.Create( targetString, sourceString, alignmentString, i, *global ))
{
//cerr << sentence.sourceTree << endl;
//cerr << sentence.targetTree << endl;
sentencePair.FindTunnels(*g_global);
//cerr << "C " << (time(NULL) - starttime) << endl;
//cerr << sentencePair << endl;
sentencePair.CreateLattice(*g_global);
//cerr << "D " << (time(NULL) - starttime) << endl;
//cerr << sentencePair << endl;
sentencePair.CreateRules(*g_global);
//cerr << "E " << (time(NULL) - starttime) << endl;
//cerr << sentence.lattice->GetRules().GetSize() << endl;
sentencePair.GetLattice().GetRules().Output(extractFile);
sentencePair.GetLattice().GetRules().OutputInv(extractFileInv);
}
}
tFile.Close();
sFile.Close();
aFile.Close();
extractFile.Close();
extractFileInv.Close();
if (global->glueGrammarFlag) {
writeGlueGrammar(fileNameGlueGrammar, *global, targetLabelCollection, targetTopLabelCollection);
}
delete global;
}
void writeGlueGrammar( const string & fileName, Global &options, set< string > &targetLabelCollection, map< string, int > &targetTopLabelCollection )
{
ofstream grammarFile;
grammarFile.open(fileName.c_str());
if (!options.targetSyntax) {
grammarFile << "<s> [X] ||| <s> [S] ||| 1 ||| ||| 0" << endl
<< "[X][S] </s> [X] ||| [X][S] </s> [S] ||| 1 ||| 0-0 ||| 0" << endl
<< "[X][S] [X][X] [X] ||| [X][S] [X][X] [S] ||| 2.718 ||| 0-0 1-1 ||| 0" << endl;
} else {
// chose a top label that is not already a label
string topLabel = "QQQQQQ";
for( unsigned int i=1; i<=topLabel.length(); i++) {
if(targetLabelCollection.find( topLabel.substr(0,i) ) == targetLabelCollection.end() ) {
topLabel = topLabel.substr(0,i);
break;
}
}
// basic rules
grammarFile << "<s> [X] ||| <s> [" << topLabel << "] ||| 1 ||| " << endl
<< "[X][" << topLabel << "] </s> [X] ||| [X][" << topLabel << "] </s> [" << topLabel << "] ||| 1 ||| 0-0 " << endl;
// top rules
for( map<string,int>::const_iterator i = targetTopLabelCollection.begin();
i != targetTopLabelCollection.end(); i++ ) {
grammarFile << "<s> [X][" << i->first << "] </s> [X] ||| <s> [X][" << i->first << "] </s> [" << topLabel << "] ||| 1 ||| 1-1" << endl;
}
// glue rules
for( set<string>::const_iterator i = targetLabelCollection.begin();
i != targetLabelCollection.end(); i++ ) {
grammarFile << "[X][" << topLabel << "] [X][" << *i << "] [X] ||| [X][" << topLabel << "] [X][" << *i << "] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1" << endl;
}
grammarFile << "[X][" << topLabel << "] [X][X] [X] ||| [X][" << topLabel << "] [X][X] [" << topLabel << "] ||| 2.718 ||| 0-0 1-1 " << endl; // glue rule for unknown word...
}
grammarFile.close();
}

View File

@ -1,34 +0,0 @@
#pragma once
#include <vector>
#include <list>
#include <map>
#include <set>
#include <string>
#include <fstream>
#include <algorithm>
#include "SyntaxTree.h"
#include "XmlTree.h"
#include "Tunnel.h"
#include "TunnelCollection.h"
#include "SentenceAlignment.h"
#include "Global.h"
std::vector<std::string> tokenize( const char [] );
#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
_IS.getline(_LINE, _SIZE, _DELIM); \
if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
if (_IS.gcount() == _SIZE-1) { \
cerr << "Line too long! Buffer overflow. Delete lines >=" \
<< _SIZE << " chars or raise LINE_MAX_LENGTH in phrase-extract/extract.cpp" \
<< endl; \
exit(1); \
} \
}
#define LINE_MAX_LENGTH 1000000
const Global *g_global;
std::set< std::string > targetLabelCollection, sourceLabelCollection;
std::map< std::string, int > targetTopLabelCollection, sourceTopLabelCollection;

View File

@ -0,0 +1,27 @@
#!/usr/bin/perl
use strict;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
my $maxNumWords = $ARGV[0];
while (my $line = <STDIN>) {
chomp($line);
my @toks = split(/ /,$line);
my $numSourceWords = 0;
my $tok = $toks[$numSourceWords];
while ($tok ne "|||") {
++$numSourceWords;
$tok = $toks[$numSourceWords];
}
if ($numSourceWords <= $maxNumWords) {
print "$line\n";
}
}

View File

@ -0,0 +1,33 @@
#! /usr/bin/perl -w
use strict;
sub trim($);
my $file1 = $ARGV[0];
my $file2 = $ARGV[1];
open (FILE1, $file1);
open (FILE2, $file2);
my $countEqual = 0;
while (my $line1 = <FILE1>) {
my $line2 = <FILE2>;
if (trim($line1) eq trim($line2)) {
++$countEqual;
}
}
print $countEqual ."\n";
######################
# Perl trim function to remove whitespace from the start and end of the string
sub trim($) {
my $string = shift;
$string =~ s/^\s+//;
$string =~ s/\s+$//;
return $string;
}

View File

@ -0,0 +1,29 @@
#! /usr/bin/perl -w
use strict;
binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");
my $fileLineNum = $ARGV[0];
open (FILE_LINE_NUM, $fileLineNum);
my $nextLineNum = <FILE_LINE_NUM>;
my $lineNum = 1;
while (my $line = <STDIN>) {
if (defined($nextLineNum) && $lineNum == $nextLineNum) {
# matches. output line
chomp($line);
print "$line\n";
# next line number
$nextLineNum = <FILE_LINE_NUM>;
}
++$lineNum;
}

View File

@ -0,0 +1,108 @@
#! /usr/bin/perl -w
use strict;
my $iniPath = $ARGV[0];
my $isHiero = $ARGV[1];
my $decoderExec = $ARGV[2];
my $extractExec = $ARGV[3];
my $tmpName = $ARGV[4];
my $WORK_DIR = `pwd`;
chomp($WORK_DIR);
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu";
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
$extractExec = "$MOSES_DIR/bin/$extractExec";
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
if($SPLIT_EXEC) {
$SPLIT_EXEC = 'gsplit';
}
else {
$SPLIT_EXEC = 'split';
}
my $SORT_EXEC = `gsort --help 2>/dev/null`;
if($SORT_EXEC) {
$SORT_EXEC = 'gsort';
}
else {
$SORT_EXEC = 'sort';
}
my $hieroFlag = "";
if ($isHiero == 1) {
$hieroFlag = "--Hierarchical";
}
print STDERR "WORK_DIR=$WORK_DIR \n";
my $cmd;
open (SOURCE, "source");
open (TARGET, "target");
open (ALIGNMENT, "alignment");
my $lineNum = 0;
my ($source, $target, $alignment);
while ($source = <SOURCE>) {
chomp($source);
$target = <TARGET>; chomp($target);
$alignment = <ALIGNMENT>; chomp($alignment);
#print STDERR "$source ||| $target ||| $alignment \n";
# write out 1 line
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
`mkdir -p $tmpDir`;
open (SOURCE1, ">$tmpDir/source");
open (TARGET1, ">$tmpDir/target");
open (ALIGNMENT1, ">$tmpDir/alignment");
print SOURCE1 "$source\n";
print TARGET1 "$target\n";
print ALIGNMENT1 "$alignment\n";
close (SOURCE1);
close (TARGET1);
close (ALIGNMENT1);
# train
if ($isHiero == 1) {
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract --GZOutput";
}
else {
# pb
$cmd = "$extractExec $tmpDir/target $tmpDir/source $tmpDir/alignment $tmpDir/extract 7 --GZOutput";
}
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
print STDERR "Executing: $cmd\n";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
`$cmd`;
# decode
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source -feature-add \"ConstrainedDecoding path=$tmpDir/target\"";
print STDERR "Executing: $cmd\n";
`$cmd`;
# `rm -rf $tmpDir`;
++$lineNum;
}
close(SOURCE);
close(TARGET);
close(ALIGNMENT);

View File

@ -0,0 +1,151 @@
#! /usr/bin/perl -w
use strict;
sub Write1Line;
sub WriteCorpus1Holdout;
my $iniPath = $ARGV[0];
my $isHiero = $ARGV[1];
my $decoderExec = $ARGV[2];
my $extractExec = $ARGV[3];
my $tmpName = $ARGV[4];
my $startLine = $ARGV[5];
my $endLine = $ARGV[6];
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
my $WORK_DIR = `pwd`;
chomp($WORK_DIR);
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
$extractExec = "$MOSES_DIR/bin/$extractExec";
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
if($SPLIT_EXEC) {
$SPLIT_EXEC = 'gsplit';
}
else {
$SPLIT_EXEC = 'split';
}
my $SORT_EXEC = `gsort --help 2>/dev/null`;
if($SORT_EXEC) {
$SORT_EXEC = 'gsort';
}
else {
$SORT_EXEC = 'sort';
}
my $hieroFlag = "";
if ($isHiero == 1) {
$hieroFlag = "--Hierarchical";
}
print STDERR "WORK_DIR=$WORK_DIR \n";
my $cmd;
open (SOURCE, "source");
open (TARGET, "target");
open (ALIGNMENT, "alignment");
my $numLines = `cat source | wc -l`;
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
my $source = <SOURCE>; chomp($source);
my $target = <TARGET>; chomp($target);
my $alignment = <ALIGNMENT>; chomp($alignment);
if ($lineNum < $startLine || $lineNum >= $endLine) {
next;
}
#print STDERR "$source ||| $target ||| $alignment \n";
# write out 1 line
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
`mkdir -p $tmpDir`;
Write1Line($source, $tmpDir, "source.1");
Write1Line($target, $tmpDir, "target.1");
Write1Line($alignment, $tmpDir, "alignment.1");
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
# train
if ($isHiero == 1) {
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
}
else {
# pb
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
}
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
print STDERR "Executing: $cmd\n";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
`$cmd`;
# decode
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -n-best-list $tmpDir/nbest 10000 distinct -v 2";
print STDERR "Executing: $cmd\n";
`$cmd`;
# count the number of translation in nbest list
$cmd = "wc -l $tmpDir/nbest >> out";
`$cmd`;
`rm -rf $tmpDir`;
}
close(SOURCE);
close(TARGET);
close(ALIGNMENT);
######################
sub Write1Line
{
my ($line, $tmpDir, $fileName) = @_;
open (HANDLE, ">$tmpDir/$fileName");
print HANDLE "$line\n";
close (HANDLE);
}
sub WriteCorpus1Holdout
{
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
open (INFILE, "$inFilePath");
open (OUTFILE, ">$tmpDir/$outFileName");
my $lineNum = 0;
while (my $line = <INFILE>) {
chomp($line);
if ($lineNum != $holdoutLineNum) {
print OUTFILE "$line\n";
}
++$lineNum;
}
close (OUTFILE);
close(INFILE);
}

View File

@ -0,0 +1,147 @@
#! /usr/bin/perl -w
use strict;
sub Write1Line;
sub WriteCorpus1Holdout;
my $iniPath = $ARGV[0];
my $isHiero = $ARGV[1];
my $decoderExec = $ARGV[2];
my $extractExec = $ARGV[3];
my $tmpName = $ARGV[4];
my $startLine = $ARGV[5];
my $endLine = $ARGV[6];
print STDERR "iniPath=$iniPath \n isHiero=$isHiero \n decoderExec=$decoderExec \n extractExec=$extractExec \n";
my $WORK_DIR = `pwd`;
chomp($WORK_DIR);
my $MOSES_DIR = "~/workspace/github/mosesdecoder.hieu.gna";
$decoderExec = "$MOSES_DIR/bin/$decoderExec";
$extractExec = "$MOSES_DIR/bin/$extractExec";
my $SPLIT_EXEC = `gsplit --help 2>/dev/null`;
if($SPLIT_EXEC) {
$SPLIT_EXEC = 'gsplit';
}
else {
$SPLIT_EXEC = 'split';
}
my $SORT_EXEC = `gsort --help 2>/dev/null`;
if($SORT_EXEC) {
$SORT_EXEC = 'gsort';
}
else {
$SORT_EXEC = 'sort';
}
my $hieroFlag = "";
if ($isHiero == 1) {
$hieroFlag = "--Hierarchical";
}
print STDERR "WORK_DIR=$WORK_DIR \n";
my $cmd;
open (SOURCE, "source");
open (TARGET, "target");
open (ALIGNMENT, "alignment");
my $numLines = `cat source | wc -l`;
for (my $lineNum = 0; $lineNum < $numLines; ++$lineNum) {
my $source = <SOURCE>; chomp($source);
my $target = <TARGET>; chomp($target);
my $alignment = <ALIGNMENT>; chomp($alignment);
if ($lineNum < $startLine || $lineNum >= $endLine) {
next;
}
#print STDERR "$source ||| $target ||| $alignment \n";
# write out 1 line
my $tmpDir = "$WORK_DIR/$tmpName/work$lineNum";
`mkdir -p $tmpDir`;
Write1Line($source, $tmpDir, "source.1");
Write1Line($target, $tmpDir, "target.1");
Write1Line($alignment, $tmpDir, "alignment.1");
WriteCorpus1Holdout($lineNum, "source", $tmpDir, "source.corpus");
WriteCorpus1Holdout($lineNum, "target", $tmpDir, "target.corpus");
WriteCorpus1Holdout($lineNum, "alignment", $tmpDir, "alignment.corpus");
# train
if ($isHiero == 1) {
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract --GZOutput";
}
else {
# pb
$cmd = "$extractExec $tmpDir/target.corpus $tmpDir/source.corpus $tmpDir/alignment.corpus $tmpDir/extract 7 --GZOutput";
}
$cmd = "$MOSES_DIR/scripts/generic/extract-parallel.perl 1 $SPLIT_EXEC $SORT_EXEC $cmd";
print STDERR "Executing: $cmd\n";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.sorted.gz /dev/null $tmpDir/pt.half.gz $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/scripts/generic/score-parallel.perl 1 $SORT_EXEC $MOSES_DIR/bin/score $tmpDir/extract.inv.sorted.gz /dev/null $tmpDir/pt.half.inv.gz --Inverse $hieroFlag --NoLex 1";
`$cmd`;
$cmd = "$MOSES_DIR/bin/consolidate $tmpDir/pt.half.gz $tmpDir/pt.half.inv.gz $tmpDir/pt $hieroFlag --OnlyDirect";
`$cmd`;
# decode
$cmd = "$decoderExec -f $iniPath -feature-overwrite \"TranslationModel0 path=$tmpDir/pt\" -i $tmpDir/source.1 -feature-add \"ConstrainedDecoding path=$tmpDir/target.1\" -v 2";
print STDERR "Executing: $cmd\n";
`$cmd`;
`rm -rf $tmpDir`;
}
close(SOURCE);
close(TARGET);
close(ALIGNMENT);
######################
sub Write1Line
{
my ($line, $tmpDir, $fileName) = @_;
open (HANDLE, ">$tmpDir/$fileName");
print HANDLE "$line\n";
close (HANDLE);
}
sub WriteCorpus1Holdout
{
my ($holdoutLineNum, $inFilePath, $tmpDir, $outFileName) = @_;
open (INFILE, "$inFilePath");
open (OUTFILE, ">$tmpDir/$outFileName");
my $lineNum = 0;
while (my $line = <INFILE>) {
chomp($line);
if ($lineNum != $holdoutLineNum) {
print OUTFILE "$line\n";
}
++$lineNum;
}
close (OUTFILE);
close(INFILE);
}

View File

@ -0,0 +1,17 @@
#! /usr/bin/perl -w
my $iniPath = $ARGV[0];
my $SPLIT_LINES = 200;
my $lineCount = `cat source | wc -l`;
print STDERR "lineCount=$lineCount \n";
for (my $startLine = 0; $startLine < $lineCount; $startLine += $SPLIT_LINES) {
my $endLine = $startLine + $SPLIT_LINES;
my $cmd = "../../scripts/reachable.perl $iniPath 1 moses_chart extract-rules tmp-reachable $startLine $endLine &>out.reachable.$startLine &";
print STDERR "Executing: $cmd \n";
system($cmd);
}

View File

@ -0,0 +1,69 @@
/**
* pugixml parser - version 1.2
* --------------------------------------------------------
* Copyright (C) 2006-2012, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
* Report bugs and download new versions at http://pugixml.org/
*
* This library is distributed under the MIT License. See notice at the end
* of this file.
*
* This work is based on the pugxml parser, which is:
* Copyright (C) 2003, by Kristen Wegner (kristen@tima.net)
*/
#ifndef HEADER_PUGICONFIG_HPP
#define HEADER_PUGICONFIG_HPP
// Uncomment this to enable wchar_t mode
// #define PUGIXML_WCHAR_MODE
// Uncomment this to disable XPath
// #define PUGIXML_NO_XPATH
// Uncomment this to disable STL
// #define PUGIXML_NO_STL
// Uncomment this to disable exceptions
// #define PUGIXML_NO_EXCEPTIONS
// Set this to control attributes for public classes/functions, i.e.:
// #define PUGIXML_API __declspec(dllexport) // to export all public symbols from DLL
// #define PUGIXML_CLASS __declspec(dllimport) // to import all classes from DLL
// #define PUGIXML_FUNCTION __fastcall // to set calling conventions to all public functions to fastcall
// In absence of PUGIXML_CLASS/PUGIXML_FUNCTION definitions PUGIXML_API is used instead
// Uncomment this to switch to header-only version
// #define PUGIXML_HEADER_ONLY
// #include "pugixml.cpp"
// Tune these constants to adjust memory-related behavior
// #define PUGIXML_MEMORY_PAGE_SIZE 32768
// #define PUGIXML_MEMORY_OUTPUT_STACK 10240
// #define PUGIXML_MEMORY_XPATH_PAGE_SIZE 4096
#endif
/**
* Copyright (c) 2006-2012 Arseny Kapoulkine
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,110 +0,0 @@
// $Id: tables-core.cpp 3131 2010-04-13 16:29:55Z pjwilliams $
//#include "beammain.h"
//#include "SafeGetLine.h"
#include "tables-core.h"
#define TABLE_LINE_MAX_LENGTH 1000
#define UNKNOWNSTR "UNK"
// as in beamdecoder/tables.cpp
vector<string> tokenize( const char* input ) {
vector< string > token;
bool betweenWords = true;
int start=0;
int i=0;
for(; input[i] != '\0'; i++) {
bool isSpace = (input[i] == ' ' || input[i] == '\t');
if (!isSpace && betweenWords) {
start = i;
betweenWords = false;
}
else if (isSpace && !betweenWords) {
token.push_back( string( input+start, i-start ) );
betweenWords = true;
}
}
if (!betweenWords)
token.push_back( string( input+start, i-start ) );
return token;
}
WORD_ID Vocabulary::storeIfNew( const WORD& word ) {
map<WORD, WORD_ID>::iterator i = lookup.find( word );
if( i != lookup.end() )
return i->second;
WORD_ID id = vocab.size();
vocab.push_back( word );
lookup[ word ] = id;
return id;
}
WORD_ID Vocabulary::getWordID( const WORD& word ) {
map<WORD, WORD_ID>::iterator i = lookup.find( word );
if( i == lookup.end() )
return 0;
return i->second;
}
PHRASE_ID PhraseTable::storeIfNew( const PHRASE& phrase ) {
map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
if( i != lookup.end() )
return i->second;
PHRASE_ID id = phraseTable.size();
phraseTable.push_back( phrase );
lookup[ phrase ] = id;
return id;
}
PHRASE_ID PhraseTable::getPhraseID( const PHRASE& phrase ) {
map< PHRASE, PHRASE_ID >::iterator i = lookup.find( phrase );
if( i == lookup.end() )
return 0;
return i->second;
}
void PhraseTable::clear() {
lookup.clear();
phraseTable.clear();
}
void DTable::init() {
for(int i = -10; i<10; i++)
dtable[i] = -abs( i );
}
/*
void DTable::load( const string& fileName ) {
ifstream inFile;
inFile.open(fileName.c_str());
istream *inFileP = &inFile;
char line[TABLE_LINE_MAX_LENGTH];
int i=0;
while(true) {
i++;
SAFE_GETLINE((*inFileP), line, TABLE_LINE_MAX_LENGTH, '\n', __FILE__);
if (inFileP->eof()) break;
vector<string> token = tokenize( line );
if (token.size() < 2) {
cerr << "line " << i << " in " << fileName << " too short, skipping\n";
continue;
}
int d = atoi( token[0].c_str() );
double prob = log( atof( token[1].c_str() ) );
dtable[ d ] = prob;
}
}
*/
double DTable::get( int distortion ) {
if (dtable.find( distortion ) == dtable.end())
return log( 0.00001 );
return dtable[ distortion ];
}

View File

@ -1,72 +0,0 @@
#pragma once
// $Id: tables-core.h 2416 2009-07-30 11:07:38Z hieuhoang1972 $
#include <iostream>
#include <fstream>
#include <assert.h>
#include <stdlib.h>
#include <string>
#include <queue>
#include <map>
#include <cmath>
using namespace std;
#define TABLE_LINE_MAX_LENGTH 1000
#define UNKNOWNSTR "UNK"
vector<string> tokenize( const char[] );
//! delete and remove every element of a collection object such as map, set, list etc
template<class COLL>
void RemoveAllInColl(COLL &coll)
{
for (typename COLL::const_iterator iter = coll.begin() ; iter != coll.end() ; ++iter)
{
delete (*iter);
}
coll.clear();
}
typedef string WORD;
typedef unsigned int WORD_ID;
class Vocabulary {
public:
map<WORD, WORD_ID> lookup;
vector< WORD > vocab;
WORD_ID storeIfNew( const WORD& );
WORD_ID getWordID( const WORD& );
inline WORD &getWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
};
typedef vector< WORD_ID > PHRASE;
typedef unsigned int PHRASE_ID;
class PhraseTable {
public:
map< PHRASE, PHRASE_ID > lookup;
vector< PHRASE > phraseTable;
PHRASE_ID storeIfNew( const PHRASE& );
PHRASE_ID getPhraseID( const PHRASE& );
void clear();
inline PHRASE &getPhrase( const PHRASE_ID id ) { return phraseTable[ id ]; }
};
typedef vector< pair< PHRASE_ID, double > > PHRASEPROBVEC;
class TTable {
public:
map< PHRASE_ID, vector< pair< PHRASE_ID, double > > > ttable;
map< PHRASE_ID, vector< pair< PHRASE_ID, vector< double > > > > ttableMulti;
};
class DTable {
public:
map< int, double > dtable;
void init();
void load( const string& );
double get( int );
};

View File

@ -1,135 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.499747849" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.798364121" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ordering}/Debug" id="cdt.managedbuild.builder.gnu.cross.1976289814" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1324749613" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1750299246" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.719498215" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.251118848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.99297656" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.106920816" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1844372739" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1178164658" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1434184833" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.974811544" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.904916320" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1005231499" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1318928675" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.604255673" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.818331963">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.818331963" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.818331963." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1489025499" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1052477856" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-ordering}/Release" id="cdt.managedbuild.builder.gnu.cross.33925527" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1884790737" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" useByScannerDiscovery="false" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.197048136" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.106898878" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1920378037" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.37950410" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" useByScannerDiscovery="false" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1197641703" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1356351201" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2053623412" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1988048517" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1494470963" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1553727957" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-ordering.cdt.managedbuild.target.gnu.cross.exe.1840421491" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.c.compiler.1505710417;cdt.managedbuild.tool.gnu.c.compiler.input.106898878">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.818331963;cdt.managedbuild.config.gnu.cross.exe.release.818331963.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.157115446;cdt.managedbuild.tool.gnu.cpp.compiler.input.683027595">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1317297964;cdt.managedbuild.tool.gnu.cpp.compiler.input.1327002489">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1624346127;cdt.managedbuild.config.gnu.cross.exe.debug.1624346127.;cdt.managedbuild.tool.gnu.cross.c.compiler.1699460827;cdt.managedbuild.tool.gnu.c.compiler.input.719498215">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-ordering"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -1,137 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.124769989" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.266544803" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-rules}/Debug" id="cdt.managedbuild.builder.gnu.cross.335858926" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.947547329" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.426953885" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<option id="gnu.c.compiler.option.include.paths.1671695899" name="Include paths (-I)" superClass="gnu.c.compiler.option.include.paths"/>
<option id="gnu.c.compiler.option.include.files.1838960067" name="Include files (-include)" superClass="gnu.c.compiler.option.include.files"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.985831394" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1726371873" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.899893408" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1099087456" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1616232021" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1411857637" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.109133121" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.1030374421" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.272393234" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.1391783790" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.2066621509" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.1945638157" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.1200693544." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1113964425" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1722595316" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract-rules}/Release" id="cdt.managedbuild.builder.gnu.cross.691589832" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.593530229" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.1320426973" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.947026588" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1217031668" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.1504181086" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1645775798" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1807515346" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.44234391" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1468234013" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.467923425" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1673313707" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.518252425" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract-rules.cdt.managedbuild.target.gnu.cross.exe.1916763759" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.c.compiler.1376077469;cdt.managedbuild.tool.gnu.c.compiler.input.985831394">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.1438215292;cdt.managedbuild.config.gnu.cross.exe.debug.1438215292.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.53480540;cdt.managedbuild.tool.gnu.cpp.compiler.input.88958138">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1401773863;cdt.managedbuild.tool.gnu.cpp.compiler.input.1484987112">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.1200693544;cdt.managedbuild.config.gnu.cross.exe.release.1200693544.;cdt.managedbuild.tool.gnu.cross.c.compiler.593530229;cdt.managedbuild.tool.gnu.c.compiler.input.1217031668">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract-rules"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract-rules"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -1 +0,0 @@
/Debug

View File

@ -25,26 +25,6 @@
<nature>org.eclipse.cdt.managedbuilder.core.ScannerConfigNature</nature>
</natures>
<linkedResources>
<link>
<name>ExtractedRule.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/ExtractedRule.h</locationURI>
</link>
<link>
<name>Hole.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/Hole.h</locationURI>
</link>
<link>
<name>HoleCollection.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.cpp</locationURI>
</link>
<link>
<name>HoleCollection.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/HoleCollection.h</locationURI>
</link>
<link>
<name>InputFileStream.cpp</name>
<type>1</type>
@ -65,11 +45,6 @@
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/OutputFileStream.h</locationURI>
</link>
<link>
<name>RuleExtractionOptions.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/RuleExtractionOptions.h</locationURI>
</link>
<link>
<name>SentenceAlignment.cpp</name>
<type>1</type>
@ -111,14 +86,9 @@
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/XmlTree.h</locationURI>
</link>
<link>
<name>extract-rules-main.cpp</name>
<name>extract-main.cpp</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-rules-main.cpp</locationURI>
</link>
<link>
<name>gzfilebuf.h</name>
<type>1</type>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/gzfilebuf.h</locationURI>
<locationURI>PARENT-3-PROJECT_LOC/phrase-extract/extract-main.cpp</locationURI>
</link>
<link>
<name>tables-core.cpp</name>

View File

@ -1,135 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689" name="Debug" parent="cdt.managedbuild.config.gnu.cross.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.debug.386290689." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.debug.671913278" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.debug">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.1231657738" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract}/Debug" id="cdt.managedbuild.builder.gnu.cross.571044108" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.332036857" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.option.optimization.level.1292572253" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.1873227592" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1165888615" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.698819695" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1451916947" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1702398011" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.1856691234" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.1699542791" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<option id="gnu.cpp.link.option.libs.1880730637" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="z"/>
<listOptionValue builtIn="false" value="boost_iostreams-mt"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="boost_filesystem-mt"/>
</option>
<option id="gnu.cpp.link.option.paths.298225069" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.1339210059" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.976825054" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1971927463" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.704926167" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.cross.exe.release.140124152">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.cross.exe.release.140124152" name="Release" parent="cdt.managedbuild.config.gnu.cross.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.cross.exe.release.140124152." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.cross.exe.release.1250240843" name="Cross GCC" superClass="cdt.managedbuild.toolchain.gnu.cross.exe.release">
<targetPlatform archList="all" binaryParser="org.eclipse.cdt.core.ELF" id="cdt.managedbuild.targetPlatform.gnu.cross.597335968" isAbstract="false" osList="all" superClass="cdt.managedbuild.targetPlatform.gnu.cross"/>
<builder buildPath="${workspace_loc:/extract}/Release" id="cdt.managedbuild.builder.gnu.cross.95066247" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.builder.gnu.cross"/>
<tool id="cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162" name="Cross GCC Compiler" superClass="cdt.managedbuild.tool.gnu.cross.c.compiler">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.option.optimization.level.88795016" name="Optimization Level" superClass="gnu.c.compiler.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.option.debugging.level.383328020" name="Debug Level" superClass="gnu.c.compiler.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.681105644" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544" name="Cross G++ Compiler" superClass="cdt.managedbuild.tool.gnu.cross.cpp.compiler">
<option id="gnu.cpp.compiler.option.optimization.level.553394848" name="Optimization Level" superClass="gnu.cpp.compiler.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.debugging.level.1420596769" name="Debug Level" superClass="gnu.cpp.compiler.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.c.linker.234409052" name="Cross GCC Linker" superClass="cdt.managedbuild.tool.gnu.cross.c.linker"/>
<tool id="cdt.managedbuild.tool.gnu.cross.cpp.linker.320346578" name="Cross G++ Linker" superClass="cdt.managedbuild.tool.gnu.cross.cpp.linker">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.2045242811" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.cross.archiver.417132714" name="Cross GCC Archiver" superClass="cdt.managedbuild.tool.gnu.cross.archiver"/>
<tool id="cdt.managedbuild.tool.gnu.cross.assembler.1944597759" name="Cross GCC Assembler" superClass="cdt.managedbuild.tool.gnu.cross.assembler">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.203400619" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extract.cdt.managedbuild.target.gnu.cross.exe.1220534104" name="Executable" projectType="cdt.managedbuild.target.gnu.cross.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1806684544;cdt.managedbuild.tool.gnu.cpp.compiler.input.1726759263">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.release.140124152;cdt.managedbuild.config.gnu.cross.exe.release.140124152.;cdt.managedbuild.tool.gnu.cross.c.compiler.2096762162;cdt.managedbuild.tool.gnu.c.compiler.input.681105644">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.c.compiler.332036857;cdt.managedbuild.tool.gnu.c.compiler.input.1165888615">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.cross.exe.debug.386290689;cdt.managedbuild.config.gnu.cross.exe.debug.386290689.;cdt.managedbuild.tool.gnu.cross.cpp.compiler.1342023600;cdt.managedbuild.tool.gnu.cpp.compiler.input.579278848">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extract"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extract"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -1,137 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.exe.debug.1133345948">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.debug.1133345948" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.debug.1133345948" name="Debug" parent="cdt.managedbuild.config.gnu.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.exe.debug.1133345948." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.debug.1405862229" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.debug">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.debug.605722566" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.debug"/>
<builder buildPath="${workspace_loc:/extractor/Debug}" id="cdt.managedbuild.target.gnu.builder.exe.debug.238577912" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.1956867596" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1512268277" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug">
<option id="gnu.cpp.compiler.exe.debug.option.optimization.level.2143789149" name="Optimization Level" superClass="gnu.cpp.compiler.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.debug.option.debugging.level.285958391" name="Debug Level" superClass="gnu.cpp.compiler.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.966722418" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.debug.554846982" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.exe.debug.option.optimization.level.538786560" name="Optimization Level" superClass="gnu.c.compiler.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.debug.option.debugging.level.2125704556" name="Debug Level" superClass="gnu.c.compiler.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.100176353" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.debug.1048685119" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.debug"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug.1295498016" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.debug">
<option id="gnu.cpp.link.option.paths.338150127" name="Library search path (-L)" superClass="gnu.cpp.link.option.paths" valueType="libPaths">
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/../../boost/lib64&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/mert_lib/Debug&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc:}/util/Debug&quot;"/>
</option>
<option id="gnu.cpp.link.option.libs.585257079" name="Libraries (-l)" superClass="gnu.cpp.link.option.libs" valueType="libs">
<listOptionValue builtIn="false" value="mert_lib"/>
<listOptionValue builtIn="false" value="boost_system-mt"/>
<listOptionValue builtIn="false" value="util"/>
<listOptionValue builtIn="false" value="z"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.656319745" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.debug.1361889787" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.955209559" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.gnu.exe.release.1385955159">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.exe.release.1385955159" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.exe.release.1385955159" name="Release" parent="cdt.managedbuild.config.gnu.exe.release">
<folderInfo id="cdt.managedbuild.config.gnu.exe.release.1385955159." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.exe.release.887500021" name="Linux GCC" superClass="cdt.managedbuild.toolchain.gnu.exe.release">
<targetPlatform id="cdt.managedbuild.target.gnu.platform.exe.release.1965146498" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.exe.release"/>
<builder buildPath="${workspace_loc:/extractor/Release}" id="cdt.managedbuild.target.gnu.builder.exe.release.1583162909" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.archiver.base.141140356" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2048722912" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.exe.release">
<option id="gnu.cpp.compiler.exe.release.option.optimization.level.1971624451" name="Optimization Level" superClass="gnu.cpp.compiler.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.exe.release.option.debugging.level.582466413" name="Debug Level" superClass="gnu.cpp.compiler.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1466533418" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.exe.release.328232610" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.exe.release.option.optimization.level.447164665" name="Optimization Level" superClass="gnu.c.compiler.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.exe.release.option.debugging.level.28848417" name="Debug Level" superClass="gnu.c.compiler.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1088446293" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.linker.exe.release.1134906841" name="GCC C Linker" superClass="cdt.managedbuild.tool.gnu.c.linker.exe.release"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.linker.exe.release.450257401" name="GCC C++ Linker" superClass="cdt.managedbuild.tool.gnu.cpp.linker.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.cpp.linker.input.813260151" superClass="cdt.managedbuild.tool.gnu.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.exe.release.205332755" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.337244768" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="extractor.cdt.managedbuild.target.gnu.exe.1336860963" name="Executable" projectType="cdt.managedbuild.target.gnu.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1385955159;cdt.managedbuild.config.gnu.exe.release.1385955159.;cdt.managedbuild.tool.gnu.c.compiler.exe.release.328232610;cdt.managedbuild.tool.gnu.c.compiler.input.1088446293">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1133345948;cdt.managedbuild.config.gnu.exe.debug.1133345948.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.debug.1512268277;cdt.managedbuild.tool.gnu.cpp.compiler.input.1839105433">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.release.1385955159;cdt.managedbuild.config.gnu.exe.release.1385955159.;cdt.managedbuild.tool.gnu.cpp.compiler.exe.release.2048722912;cdt.managedbuild.tool.gnu.cpp.compiler.input.1466533418">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.exe.debug.1133345948;cdt.managedbuild.config.gnu.exe.debug.1133345948.;cdt.managedbuild.tool.gnu.c.compiler.exe.debug.554846982;cdt.managedbuild.tool.gnu.c.compiler.input.100176353">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/extractor"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/extractor"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
</cproject>

View File

@ -4,6 +4,7 @@
<comment></comment>
<projects>
<project>mert_lib</project>
<project>util</project>
</projects>
<buildSpec>
<buildCommand>

View File

@ -1,144 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<?fileVersion 4.0.0?><cproject storage_type_id="org.eclipse.cdt.core.XmlProjectDescriptionStorage">
<storageModule moduleId="org.eclipse.cdt.core.settings">
<cconfiguration id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" moduleId="org.eclipse.cdt.core.settings" name="Debug">
<externalSettings>
<externalSetting>
<entry flags="VALUE_WORKSPACE_PATH" kind="includePath" name="/lm"/>
<entry flags="VALUE_WORKSPACE_PATH" kind="libraryPath" name="/lm/Debug"/>
<entry flags="RESOLVED" kind="libraryFile" name="lm" srcPrefixMapping="" srcRootPath=""/>
</externalSetting>
</externalSettings>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactExtension="a" artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.staticLib" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.debug,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.staticLib" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750" name="Debug" parent="cdt.managedbuild.config.gnu.macosx.exe.debug">
<folderInfo id="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.debug.640882096" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.debug">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.debug.793478365" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.debug"/>
<builder buildPath="${workspace_loc:/lm/Debug}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.debug.36011795" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" parallelBuildOn="true" parallelizationNumber="optimal" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug.1252826468" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.debug"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug.1024598065" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.139111896" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug.62265891" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.debug">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.588438623" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.775866405" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug">
<option id="gnu.cpp.compilermacosx.exe.debug.option.optimization.level.586969644" name="Optimization Level" superClass="gnu.cpp.compilermacosx.exe.debug.option.optimization.level" value="gnu.cpp.compiler.optimization.level.none" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level.7139692" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.debug.option.debugging.level" value="gnu.cpp.compiler.debugging.level.max" valueType="enumerated"/>
<option id="gnu.cpp.compiler.option.include.paths.1988092227" name="Include paths (-I)" superClass="gnu.cpp.compiler.option.include.paths" valueType="includePath">
<listOptionValue builtIn="false" value="/opt/local/include"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../boost/include&quot;"/>
<listOptionValue builtIn="false" value="&quot;${workspace_loc}/../../&quot;"/>
</option>
<option id="gnu.cpp.compiler.option.preprocessor.def.1980966336" name="Defined symbols (-D)" superClass="gnu.cpp.compiler.option.preprocessor.def" valueType="definedSymbols">
<listOptionValue builtIn="false" value="HAVE_BOOST"/>
<listOptionValue builtIn="false" value="KENLM_MAX_ORDER=7"/>
<listOptionValue builtIn="false" value="MAX_NUM_FACTORS=4"/>
<listOptionValue builtIn="false" value="TRACE_ENABLE"/>
</option>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug">
<option defaultValue="gnu.c.optimization.level.none" id="gnu.c.compiler.macosx.exe.debug.option.optimization.level.934764060" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.debug.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.debug.option.debugging.level.2078705375" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.debug.option.debugging.level" value="gnu.c.debugging.level.max" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1028526865" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
<sourceEntries>
<entry excluding="left_test.cc|model_test.cc" flags="VALUE_WORKSPACE_PATH|RESOLVED" kind="sourcePath" name=""/>
</sourceEntries>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
<cconfiguration id="cdt.managedbuild.config.macosx.exe.release.203229648">
<storageModule buildSystemId="org.eclipse.cdt.managedbuilder.core.configurationDataProvider" id="cdt.managedbuild.config.macosx.exe.release.203229648" moduleId="org.eclipse.cdt.core.settings" name="Release">
<externalSettings/>
<extensions>
<extension id="org.eclipse.cdt.core.MachO64" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.ELF" point="org.eclipse.cdt.core.BinaryParser"/>
<extension id="org.eclipse.cdt.core.GmakeErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.CWDLocator" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GCCErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GASErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
<extension id="org.eclipse.cdt.core.GLDErrorParser" point="org.eclipse.cdt.core.ErrorParser"/>
</extensions>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<configuration artifactName="${ProjName}" buildArtefactType="org.eclipse.cdt.build.core.buildArtefactType.exe" buildProperties="org.eclipse.cdt.build.core.buildType=org.eclipse.cdt.build.core.buildType.release,org.eclipse.cdt.build.core.buildArtefactType=org.eclipse.cdt.build.core.buildArtefactType.exe" cleanCommand="rm -rf" description="" id="cdt.managedbuild.config.macosx.exe.release.203229648" name="Release" parent="cdt.managedbuild.config.macosx.exe.release">
<folderInfo id="cdt.managedbuild.config.macosx.exe.release.203229648." name="/" resourcePath="">
<toolChain id="cdt.managedbuild.toolchain.gnu.macosx.exe.release.1942852701" name="MacOSX GCC" superClass="cdt.managedbuild.toolchain.gnu.macosx.exe.release">
<targetPlatform binaryParser="org.eclipse.cdt.core.MachO64;org.eclipse.cdt.core.ELF" id="cdt.managedbuild.target.gnu.platform.macosx.exe.release.2107180060" name="Debug Platform" superClass="cdt.managedbuild.target.gnu.platform.macosx.exe.release"/>
<builder buildPath="${workspace_loc:/lm/Release}" id="cdt.managedbuild.target.gnu.builder.macosx.exe.release.127652112" keepEnvironmentInBuildfile="false" managedBuildOn="true" name="Gnu Make Builder" superClass="cdt.managedbuild.target.gnu.builder.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release.1668850519" name="MacOS X C Linker" superClass="cdt.managedbuild.tool.macosx.c.linker.macosx.exe.release"/>
<tool id="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release.934899611" name="MacOS X C++ Linker" superClass="cdt.managedbuild.tool.macosx.cpp.linker.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.macosx.cpp.linker.input.794276660" superClass="cdt.managedbuild.tool.macosx.cpp.linker.input">
<additionalInput kind="additionalinputdependency" paths="$(USER_OBJS)"/>
<additionalInput kind="additionalinput" paths="$(LIBS)"/>
</inputType>
</tool>
<tool id="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release.362272521" name="GCC Assembler" superClass="cdt.managedbuild.tool.gnu.assembler.macosx.exe.release">
<inputType id="cdt.managedbuild.tool.gnu.assembler.input.370659018" superClass="cdt.managedbuild.tool.gnu.assembler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.archiver.macosx.base.2103660404" name="GCC Archiver" superClass="cdt.managedbuild.tool.gnu.archiver.macosx.base"/>
<tool id="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795" name="GCC C++ Compiler" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release">
<option id="gnu.cpp.compiler.macosx.exe.release.option.optimization.level.1671568858" name="Optimization Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.optimization.level" value="gnu.cpp.compiler.optimization.level.most" valueType="enumerated"/>
<option id="gnu.cpp.compiler.macosx.exe.release.option.debugging.level.230723898" name="Debug Level" superClass="gnu.cpp.compiler.macosx.exe.release.option.debugging.level" value="gnu.cpp.compiler.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602" superClass="cdt.managedbuild.tool.gnu.cpp.compiler.input"/>
</tool>
<tool id="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990" name="GCC C Compiler" superClass="cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release">
<option defaultValue="gnu.c.optimization.level.most" id="gnu.c.compiler.macosx.exe.release.option.optimization.level.1934130159" name="Optimization Level" superClass="gnu.c.compiler.macosx.exe.release.option.optimization.level" valueType="enumerated"/>
<option id="gnu.c.compiler.macosx.exe.release.option.debugging.level.1848737807" name="Debug Level" superClass="gnu.c.compiler.macosx.exe.release.option.debugging.level" value="gnu.c.debugging.level.none" valueType="enumerated"/>
<inputType id="cdt.managedbuild.tool.gnu.c.compiler.input.1294441742" superClass="cdt.managedbuild.tool.gnu.c.compiler.input"/>
</tool>
</toolChain>
</folderInfo>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.core.externalSettings"/>
</cconfiguration>
</storageModule>
<storageModule moduleId="cdtBuildSystem" version="4.0.0">
<project id="lm.cdt.managedbuild.target.macosx.exe.1399596076" name="Executable" projectType="cdt.managedbuild.target.macosx.exe"/>
</storageModule>
<storageModule moduleId="scannerConfiguration">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId=""/>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.debug.1024092140;cdt.managedbuild.tool.gnu.cpp.compiler.input.20502600">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.release.990116990;cdt.managedbuild.tool.gnu.c.compiler.input.1294441742">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.gnu.macosx.exe.debug.351042750;cdt.managedbuild.config.gnu.macosx.exe.debug.351042750.;cdt.managedbuild.tool.gnu.c.compiler.macosx.exe.debug.34201722;cdt.managedbuild.tool.gnu.c.compiler.input.1028526865">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileC"/>
</scannerConfigBuildInfo>
<scannerConfigBuildInfo instanceId="cdt.managedbuild.config.macosx.exe.release.203229648;cdt.managedbuild.config.macosx.exe.release.203229648.;cdt.managedbuild.tool.gnu.cpp.compiler.macosx.exe.release.2026817795;cdt.managedbuild.tool.gnu.cpp.compiler.input.1058671602">
<autodiscovery enabled="true" problemReportingEnabled="true" selectedProfileId="org.eclipse.cdt.managedbuilder.core.GCCManagedMakePerProjectProfileCPP"/>
</scannerConfigBuildInfo>
</storageModule>
<storageModule moduleId="refreshScope" versionNumber="2">
<configuration configurationName="Release">
<resource resourceType="PROJECT" workspacePath="/lm"/>
</configuration>
<configuration configurationName="Debug">
<resource resourceType="PROJECT" workspacePath="/lm"/>
</configuration>
</storageModule>
<storageModule moduleId="org.eclipse.cdt.make.core.buildtargets"/>
<storageModule moduleId="org.eclipse.cdt.core.LanguageSettingsProviders"/>
</cproject>

Some files were not shown because too many files have changed in this diff Show More