a lot of changes
20
Jamroot
@ -15,13 +15,15 @@
|
||||
#Note that, like language models, this is the --prefix where the library was
|
||||
#installed, not some executable within the library.
|
||||
#
|
||||
#Compact phrase table and compact lexical reordering table
|
||||
#--with-cmph=/path/to/cmph
|
||||
#
|
||||
#Thread-caching malloc (optional):
|
||||
#--with-tcmalloc
|
||||
#
|
||||
#REGRESSION TESTING
|
||||
#--with-regtest=/path/to/moses-reg-test-data
|
||||
#
|
||||
#
|
||||
#INSTALLATION
|
||||
#--prefix=/path/to/prefix sets the install prefix [default is source root].
|
||||
#--bindir=/path/to/prefix/bin sets the bin directory [PREFIX/bin]
|
||||
@ -29,6 +31,7 @@
|
||||
#--includedir=/path/to/prefix/include installs headers.
|
||||
# Does not install if missing. No argument defaults to PREFIX/include .
|
||||
#--install-scripts=/path/to/scripts copies scripts into a directory.
|
||||
# Does not install if missing. No argument defaults to PREFIX/scripts .
|
||||
#--git appends the git revision to the prefix directory.
|
||||
#
|
||||
#
|
||||
@ -41,7 +44,9 @@
|
||||
# variant=release|debug|profile builds optimized (default), for debug, or for
|
||||
# profiling
|
||||
#
|
||||
# link=static|shared controls linking (default static)
|
||||
# link=static|shared controls preferred linking (default static)
|
||||
# --static forces static linking (the default will fall
|
||||
# back to shared)
|
||||
#
|
||||
# debug-symbols=on|off include (default) or exclude debugging
|
||||
# information also known as -g
|
||||
@ -50,6 +55,9 @@
|
||||
#
|
||||
# --enable-boost-pool uses Boost pools for the memory SCFG table
|
||||
#
|
||||
# --without-libsegfault does not link with libSegFault
|
||||
#
|
||||
# --max-kenlm-order maximum ngram order that kenlm can process (default 6)
|
||||
#
|
||||
#CONTROLLING THE BUILD
|
||||
#-a to build from scratch
|
||||
@ -73,6 +81,10 @@ if [ option.get "with-tcmalloc" : : "yes" ] {
|
||||
requirements += [ option.get "notrace" : <define>TRACE_ENABLE=1 ] ;
|
||||
requirements += [ option.get "enable-boost-pool" : : <define>USE_BOOST_POOL ] ;
|
||||
|
||||
if [ option.get "with-cmph" ] {
|
||||
requirements += <define>HAVE_CMPH ;
|
||||
}
|
||||
|
||||
project : default-build
|
||||
<threading>multi
|
||||
<warnings>on
|
||||
@ -91,10 +103,10 @@ project : requirements
|
||||
#Add directories here if you want their incidental targets too (i.e. tests).
|
||||
build-projects util lm mert moses-cmd/src moses-chart-cmd/src scripts regression-testing ;
|
||||
|
||||
alias programs : lm//query lm//build_binary moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor ;
|
||||
alias programs : lm//query lm//build_binary lm//kenlm_max_order moses-chart-cmd/src//moses_chart moses-cmd/src//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt mert//programs contrib/server//mosesserver misc//programs symal phrase-extract phrase-extract//lexical-reordering phrase-extract//extract-ghkm phrase-extract//pcfg-extract phrase-extract//pcfg-score biconcor ;
|
||||
|
||||
install-bin-libs programs ;
|
||||
install-headers headers-base : [ glob-tree *.h *.hh : jam-files dist bin lib include kenlm moses ] : . ;
|
||||
install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ;
|
||||
install-headers headers-moses : moses/src//headers-to-install : moses/src ;
|
||||
|
||||
alias install : prefix-bin prefix-lib headers-base headers-moses ;
|
||||
|
3
NOTICE
Normal file
@ -0,0 +1,3 @@
|
||||
This code includes data from Daniel Naber's Language Tools (czech abbreviations).
|
||||
|
||||
This code includes data from czech wiktionary (also czech abbreviations).
|
3
bjam
@ -4,8 +4,7 @@ if
|
||||
bjam="$(which bjam 2>/dev/null)" && #exists
|
||||
[ ${#bjam} != 0 ] && #paranoia about which printing nothing then returning true
|
||||
! grep UFIHGUFIHBDJKNCFZXAEVA "${bjam}" </dev/null >/dev/null && #bjam in path isn't this script
|
||||
"${bjam}" --help >/dev/null 2>/dev/null && #bjam in path isn't broken (i.e. has boost-build)
|
||||
"${bjam}" --version |grep "Boost.Build 201" >/dev/null 2>/dev/null #It's recent enough.
|
||||
"${bjam}" --sanity-test 2>/dev/null |grep Sane >/dev/null #The test in jam-files/sanity.jam passes
|
||||
then
|
||||
#Delegate to system bjam
|
||||
exec "${bjam}" "$@"
|
||||
|
16
contrib/fuzzy-match/Makefile
Normal file
@ -0,0 +1,16 @@
|
||||
all: suffix-test fuzzy-match fuzzy-match2
|
||||
|
||||
clean:
|
||||
rm -f *.o
|
||||
|
||||
.cpp.o:
|
||||
g++ -O6 -g -c $<
|
||||
|
||||
suffix-test: Vocabulary.o SuffixArray.o suffix-test.o
|
||||
g++ Vocabulary.o SuffixArray.o suffix-test.o -o suffix-test
|
||||
|
||||
fuzzy-match: Vocabulary.o SuffixArray.o old/fuzzy-match.o
|
||||
g++ Vocabulary.o SuffixArray.o fuzzy-match.o -o fuzzy-match
|
||||
|
||||
fuzzy-match2: Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o
|
||||
g++ Vocabulary.o SuffixArray.o fuzzy-match2.o Util.o -o fuzzy-match2
|
29
contrib/fuzzy-match/Match.h
Normal file
@ -0,0 +1,29 @@
|
||||
//
|
||||
// Match.h
|
||||
// fuzzy-match
|
||||
//
|
||||
// Created by Hieu Hoang on 25/07/2012.
|
||||
// Copyright 2012 __MyCompanyName__. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef fuzzy_match_Match_h
|
||||
#define fuzzy_match_Match_h
|
||||
|
||||
/* data structure for n-gram match between input and corpus */
|
||||
|
||||
class Match {
|
||||
public:
|
||||
int input_start;
|
||||
int input_end;
|
||||
int tm_start;
|
||||
int tm_end;
|
||||
int min_cost;
|
||||
int max_cost;
|
||||
int internal_cost;
|
||||
Match( int is, int ie, int ts, int te, int min, int max, int i )
|
||||
:input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
|
||||
{}
|
||||
};
|
||||
|
||||
|
||||
#endif
|
48
contrib/fuzzy-match/SentenceAlignment.h
Normal file
@ -0,0 +1,48 @@
|
||||
//
|
||||
// SentenceAlignment.h
|
||||
// fuzzy-match
|
||||
//
|
||||
// Created by Hieu Hoang on 25/07/2012.
|
||||
// Copyright 2012 __MyCompanyName__. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef fuzzy_match_SentenceAlignment_h
|
||||
#define fuzzy_match_SentenceAlignment_h
|
||||
|
||||
#include <sstream>
|
||||
#include "Vocabulary.h"
|
||||
|
||||
extern Vocabulary vocabulary;
|
||||
|
||||
struct SentenceAlignment
|
||||
{
|
||||
int count;
|
||||
vector< WORD_ID > target;
|
||||
vector< pair<int,int> > alignment;
|
||||
|
||||
SentenceAlignment()
|
||||
{}
|
||||
|
||||
string getTargetString() const
|
||||
{
|
||||
stringstream strme;
|
||||
for (size_t i = 0; i < target.size(); ++i) {
|
||||
const WORD &word = vocabulary.GetWord(target[i]);
|
||||
strme << word << " ";
|
||||
}
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
string getAlignmentString() const
|
||||
{
|
||||
stringstream strme;
|
||||
for (size_t i = 0; i < alignment.size(); ++i) {
|
||||
const pair<int,int> &alignPair = alignment[i];
|
||||
strme << alignPair.first << "-" << alignPair.second << " ";
|
||||
}
|
||||
return strme.str();
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
#endif
|
244
contrib/fuzzy-match/SuffixArray.cpp
Normal file
@ -0,0 +1,244 @@
|
||||
#include "SuffixArray.h"
|
||||
#include <string>
|
||||
#include <stdlib.h>
|
||||
#include <cstring>
|
||||
|
||||
using namespace std;
|
||||
|
||||
SuffixArray::SuffixArray( string fileName )
|
||||
{
|
||||
m_vcb.StoreIfNew( "<uNk>" );
|
||||
m_endOfSentence = m_vcb.StoreIfNew( "<s>" );
|
||||
|
||||
ifstream extractFile;
|
||||
char line[LINE_MAX_LENGTH];
|
||||
|
||||
// count the number of words first;
|
||||
extractFile.open(fileName.c_str());
|
||||
istream *fileP = &extractFile;
|
||||
m_size = 0;
|
||||
size_t sentenceCount = 0;
|
||||
while(!fileP->eof()) {
|
||||
SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileP->eof()) break;
|
||||
vector< WORD_ID > words = m_vcb.Tokenize( line );
|
||||
m_size += words.size() + 1;
|
||||
sentenceCount++;
|
||||
}
|
||||
extractFile.close();
|
||||
cerr << m_size << " words (incl. sentence boundaries)" << endl;
|
||||
|
||||
// allocate memory
|
||||
m_array = (WORD_ID*) calloc( sizeof( WORD_ID ), m_size );
|
||||
m_index = (INDEX*) calloc( sizeof( INDEX ), m_size );
|
||||
m_wordInSentence = (char*) calloc( sizeof( char ), m_size );
|
||||
m_sentence = (size_t*) calloc( sizeof( size_t ), m_size );
|
||||
m_sentenceLength = (char*) calloc( sizeof( char ), sentenceCount );
|
||||
|
||||
// fill the array
|
||||
int wordIndex = 0;
|
||||
int sentenceId = 0;
|
||||
extractFile.open(fileName.c_str());
|
||||
fileP = &extractFile;
|
||||
while(!fileP->eof()) {
|
||||
SAFE_GETLINE((*fileP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileP->eof()) break;
|
||||
vector< WORD_ID > words = m_vcb.Tokenize( line );
|
||||
vector< WORD_ID >::const_iterator i;
|
||||
|
||||
for( i=words.begin(); i!=words.end(); i++)
|
||||
{
|
||||
m_index[ wordIndex ] = wordIndex;
|
||||
m_sentence[ wordIndex ] = sentenceId;
|
||||
m_wordInSentence[ wordIndex ] = i-words.begin();
|
||||
m_array[ wordIndex++ ] = *i;
|
||||
}
|
||||
m_index[ wordIndex ] = wordIndex;
|
||||
m_array[ wordIndex++ ] = m_endOfSentence;
|
||||
m_sentenceLength[ sentenceId++ ] = words.size();
|
||||
}
|
||||
extractFile.close();
|
||||
cerr << "done reading " << wordIndex << " words, " << sentenceId << " sentences." << endl;
|
||||
// List(0,9);
|
||||
|
||||
// sort
|
||||
m_buffer = (INDEX*) calloc( sizeof( INDEX ), m_size );
|
||||
Sort( 0, m_size-1 );
|
||||
free( m_buffer );
|
||||
cerr << "done sorting" << endl;
|
||||
}
|
||||
|
||||
// good ol' quick sort
|
||||
void SuffixArray::Sort(INDEX start, INDEX end) {
|
||||
if (start == end) return;
|
||||
INDEX mid = (start+end+1)/2;
|
||||
Sort( start, mid-1 );
|
||||
Sort( mid, end );
|
||||
|
||||
// merge
|
||||
int i = start;
|
||||
int j = mid;
|
||||
int k = 0;
|
||||
int length = end-start+1;
|
||||
while( k<length )
|
||||
{
|
||||
if (i == mid )
|
||||
{
|
||||
m_buffer[ k++ ] = m_index[ j++ ];
|
||||
}
|
||||
else if (j > end )
|
||||
{
|
||||
m_buffer[ k++ ] = m_index[ i++ ];
|
||||
}
|
||||
else {
|
||||
if (CompareIndex( m_index[i], m_index[j] ) < 0)
|
||||
{
|
||||
m_buffer[ k++ ] = m_index[ i++ ];
|
||||
}
|
||||
else
|
||||
{
|
||||
m_buffer[ k++ ] = m_index[ j++ ];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memcpy( ((char*)m_index) + sizeof( INDEX ) * start,
|
||||
((char*)m_buffer), sizeof( INDEX ) * (end-start+1) );
|
||||
}
|
||||
|
||||
SuffixArray::~SuffixArray()
|
||||
{
|
||||
free(m_index);
|
||||
free(m_array);
|
||||
}
|
||||
|
||||
int SuffixArray::CompareIndex( INDEX a, INDEX b ) const
|
||||
{
|
||||
// skip over identical words
|
||||
INDEX offset = 0;
|
||||
while( a+offset < m_size &&
|
||||
b+offset < m_size &&
|
||||
m_array[ a+offset ] == m_array[ b+offset ] )
|
||||
{ offset++; }
|
||||
|
||||
if( a+offset == m_size ) return -1;
|
||||
if( b+offset == m_size ) return 1;
|
||||
return CompareWord( m_array[ a+offset ], m_array[ b+offset ] );
|
||||
}
|
||||
|
||||
inline int SuffixArray::CompareWord( WORD_ID a, WORD_ID b ) const
|
||||
{
|
||||
// cerr << "c(" << m_vcb.GetWord(a) << ":" << m_vcb.GetWord(b) << ")=" << m_vcb.GetWord(a).compare( m_vcb.GetWord(b) ) << endl;
|
||||
return m_vcb.GetWord(a).compare( m_vcb.GetWord(b) );
|
||||
}
|
||||
|
||||
int SuffixArray::Count( const vector< WORD > &phrase )
|
||||
{
|
||||
INDEX dummy;
|
||||
return LimitedCount( phrase, m_size, dummy, dummy, 0, m_size-1 );
|
||||
}
|
||||
|
||||
bool SuffixArray::MinCount( const vector< WORD > &phrase, INDEX min )
|
||||
{
|
||||
INDEX dummy;
|
||||
return LimitedCount( phrase, min, dummy, dummy, 0, m_size-1 ) >= min;
|
||||
}
|
||||
|
||||
bool SuffixArray::Exists( const vector< WORD > &phrase )
|
||||
{
|
||||
INDEX dummy;
|
||||
return LimitedCount( phrase, 1, dummy, dummy, 0, m_size-1 ) == 1;
|
||||
}
|
||||
|
||||
int SuffixArray::FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
|
||||
{
|
||||
return LimitedCount( phrase, m_size, firstMatch, lastMatch, search_start, search_end );
|
||||
}
|
||||
|
||||
int SuffixArray::LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start, INDEX search_end )
|
||||
{
|
||||
// cerr << "FindFirst\n";
|
||||
INDEX start = search_start;
|
||||
INDEX end = (search_end == -1) ? (m_size-1) : search_end;
|
||||
INDEX mid = FindFirst( phrase, start, end );
|
||||
// cerr << "done\n";
|
||||
if (mid == m_size) return 0; // no matches
|
||||
if (min == 1) return 1; // only existance check
|
||||
|
||||
int matchCount = 1;
|
||||
|
||||
//cerr << "before...\n";
|
||||
firstMatch = FindLast( phrase, mid, start, -1 );
|
||||
matchCount += mid - firstMatch;
|
||||
|
||||
//cerr << "after...\n";
|
||||
lastMatch = FindLast( phrase, mid, end, 1 );
|
||||
matchCount += lastMatch - mid;
|
||||
|
||||
return matchCount;
|
||||
}
|
||||
|
||||
SuffixArray::INDEX SuffixArray::FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction )
|
||||
{
|
||||
end += direction;
|
||||
while(true)
|
||||
{
|
||||
INDEX mid = ( start + end + (direction>0 ? 0 : 1) )/2;
|
||||
|
||||
int match = Match( phrase, mid );
|
||||
int matchNext = Match( phrase, mid+direction );
|
||||
//cerr << "\t" << start << ";" << mid << ";" << end << " -> " << match << "," << matchNext << endl;
|
||||
|
||||
if (match == 0 && matchNext != 0) return mid;
|
||||
|
||||
if (match == 0) // mid point is a match
|
||||
start = mid;
|
||||
else
|
||||
end = mid;
|
||||
}
|
||||
}
|
||||
|
||||
SuffixArray::INDEX SuffixArray::FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end )
|
||||
{
|
||||
while(true)
|
||||
{
|
||||
INDEX mid = ( start + end + 1 )/2;
|
||||
//cerr << "FindFirst(" << start << ";" << mid << ";" << end << ")\n";
|
||||
int match = Match( phrase, mid );
|
||||
|
||||
if (match == 0) return mid;
|
||||
if (start >= end && match != 0 ) return m_size;
|
||||
|
||||
if (match > 0)
|
||||
start = mid+1;
|
||||
else
|
||||
end = mid-1;
|
||||
}
|
||||
}
|
||||
|
||||
int SuffixArray::Match( const vector< WORD > &phrase, INDEX index )
|
||||
{
|
||||
INDEX pos = m_index[ index ];
|
||||
for(INDEX i=0; i<phrase.size() && i+pos<m_size; i++)
|
||||
{
|
||||
int match = CompareWord( m_vcb.GetWordID( phrase[i] ), m_array[ pos+i ] );
|
||||
// cerr << "{" << index << "+" << i << "," << pos+i << ":" << match << "}" << endl;
|
||||
if (match != 0)
|
||||
return match;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void SuffixArray::List(INDEX start, INDEX end)
|
||||
{
|
||||
for(INDEX i=start; i<=end; i++)
|
||||
{
|
||||
INDEX pos = m_index[ i ];
|
||||
// cerr << i << ":" << pos << "\t";
|
||||
for(int j=0; j<5 && j+pos<m_size; j++)
|
||||
{
|
||||
cout << " " << m_vcb.GetWord( m_array[ pos+j ] );
|
||||
}
|
||||
// cerr << "\n";
|
||||
}
|
||||
}
|
45
contrib/fuzzy-match/SuffixArray.h
Normal file
@ -0,0 +1,45 @@
|
||||
#include "Vocabulary.h"
|
||||
|
||||
#pragma once
|
||||
|
||||
#define LINE_MAX_LENGTH 10000
|
||||
|
||||
|
||||
class SuffixArray
|
||||
{
|
||||
public:
|
||||
typedef unsigned int INDEX;
|
||||
|
||||
private:
|
||||
WORD_ID *m_array;
|
||||
INDEX *m_index;
|
||||
INDEX *m_buffer;
|
||||
char *m_wordInSentence;
|
||||
size_t *m_sentence;
|
||||
char *m_sentenceLength;
|
||||
WORD_ID m_endOfSentence;
|
||||
Vocabulary m_vcb;
|
||||
INDEX m_size;
|
||||
|
||||
public:
|
||||
SuffixArray( string fileName );
|
||||
~SuffixArray();
|
||||
|
||||
void Sort(INDEX start, INDEX end);
|
||||
int CompareIndex( INDEX a, INDEX b ) const;
|
||||
inline int CompareWord( WORD_ID a, WORD_ID b ) const;
|
||||
int Count( const vector< WORD > &phrase );
|
||||
bool MinCount( const vector< WORD > &phrase, INDEX min );
|
||||
bool Exists( const vector< WORD > &phrase );
|
||||
int FindMatches( const vector< WORD > &phrase, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = 0, INDEX search_end = -1 );
|
||||
int LimitedCount( const vector< WORD > &phrase, INDEX min, INDEX &firstMatch, INDEX &lastMatch, INDEX search_start = -1, INDEX search_end = 0 );
|
||||
INDEX FindFirst( const vector< WORD > &phrase, INDEX &start, INDEX &end );
|
||||
INDEX FindLast( const vector< WORD > &phrase, INDEX start, INDEX end, int direction );
|
||||
int Match( const vector< WORD > &phrase, INDEX index );
|
||||
void List( INDEX start, INDEX end );
|
||||
inline INDEX GetPosition( INDEX index ) { return m_index[ index ]; }
|
||||
inline size_t GetSentence( INDEX position ) { return m_sentence[position]; }
|
||||
inline char GetWordInSentence( INDEX position ) { return m_wordInSentence[position]; }
|
||||
inline char GetSentenceLength( size_t sentenceId ) { return m_sentenceLength[sentenceId]; }
|
||||
inline INDEX GetSize() { return m_size; }
|
||||
};
|
147
contrib/fuzzy-match/Util.cpp
Normal file
@ -0,0 +1,147 @@
|
||||
//
|
||||
// Util.cpp
|
||||
// fuzzy-match
|
||||
//
|
||||
// Created by Hieu Hoang on 26/07/2012.
|
||||
// Copyright 2012 __MyCompanyName__. All rights reserved.
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <stdio.h>
|
||||
#include "Util.h"
|
||||
#include "SentenceAlignment.h"
|
||||
#include "SuffixArray.h"
|
||||
|
||||
void load_corpus( const char* fileName, vector< vector< WORD_ID > > &corpus )
|
||||
{ // source
|
||||
ifstream fileStream;
|
||||
fileStream.open(fileName);
|
||||
if (!fileStream) {
|
||||
cerr << "file not found: " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
cerr << "loading " << fileName << endl;
|
||||
|
||||
istream *fileStreamP = &fileStream;
|
||||
|
||||
char line[LINE_MAX_LENGTH];
|
||||
while(true)
|
||||
{
|
||||
SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileStreamP->eof()) break;
|
||||
corpus.push_back( vocabulary.Tokenize( line ) );
|
||||
}
|
||||
}
|
||||
|
||||
void load_target( const char* fileName, vector< vector< SentenceAlignment > > &corpus)
|
||||
{
|
||||
ifstream fileStream;
|
||||
fileStream.open(fileName);
|
||||
if (!fileStream) {
|
||||
cerr << "file not found: " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
cerr << "loading " << fileName << endl;
|
||||
|
||||
istream *fileStreamP = &fileStream;
|
||||
|
||||
WORD_ID delimiter = vocabulary.StoreIfNew("|||");
|
||||
|
||||
int lineNum = 0;
|
||||
char line[LINE_MAX_LENGTH];
|
||||
while(true)
|
||||
{
|
||||
SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileStreamP->eof()) break;
|
||||
|
||||
vector<WORD_ID> toks = vocabulary.Tokenize( line );
|
||||
|
||||
corpus.push_back(vector< SentenceAlignment >());
|
||||
vector< SentenceAlignment > &vec = corpus.back();
|
||||
|
||||
vec.push_back(SentenceAlignment());
|
||||
SentenceAlignment *sentence = &vec.back();
|
||||
|
||||
const WORD &countStr = vocabulary.GetWord(toks[0]);
|
||||
sentence->count = atoi(countStr.c_str());
|
||||
|
||||
for (size_t i = 1; i < toks.size(); ++i) {
|
||||
WORD_ID wordId = toks[i];
|
||||
|
||||
if (wordId == delimiter) {
|
||||
// target and alignments can have multiple sentences.
|
||||
vec.push_back(SentenceAlignment());
|
||||
sentence = &vec.back();
|
||||
|
||||
// count
|
||||
++i;
|
||||
|
||||
const WORD &countStr = vocabulary.GetWord(toks[i]);
|
||||
sentence->count = atoi(countStr.c_str());
|
||||
}
|
||||
else {
|
||||
// just a normal word, add
|
||||
sentence->target.push_back(wordId);
|
||||
}
|
||||
}
|
||||
|
||||
++lineNum;
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void load_alignment( const char* fileName, vector< vector< SentenceAlignment > > &corpus )
|
||||
{
|
||||
ifstream fileStream;
|
||||
fileStream.open(fileName);
|
||||
if (!fileStream) {
|
||||
cerr << "file not found: " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
cerr << "loading " << fileName << endl;
|
||||
|
||||
istream *fileStreamP = &fileStream;
|
||||
|
||||
string delimiter = "|||";
|
||||
|
||||
int lineNum = 0;
|
||||
char line[LINE_MAX_LENGTH];
|
||||
while(true)
|
||||
{
|
||||
SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileStreamP->eof()) break;
|
||||
|
||||
vector< SentenceAlignment > &vec = corpus[lineNum];
|
||||
size_t targetInd = 0;
|
||||
SentenceAlignment *sentence = &vec[targetInd];
|
||||
|
||||
vector<string> toks = Tokenize(line);
|
||||
|
||||
for (size_t i = 0; i < toks.size(); ++i) {
|
||||
string &tok = toks[i];
|
||||
|
||||
if (tok == delimiter) {
|
||||
// target and alignments can have multiple sentences.
|
||||
++targetInd;
|
||||
sentence = &vec[targetInd];
|
||||
|
||||
++i;
|
||||
}
|
||||
else {
|
||||
// just a normal alignment, add
|
||||
vector<int> alignPoint = Tokenize<int>(tok, "-");
|
||||
assert(alignPoint.size() == 2);
|
||||
sentence->alignment.push_back(pair<int,int>(alignPoint[0], alignPoint[1]));
|
||||
}
|
||||
}
|
||||
|
||||
++lineNum;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
87
contrib/fuzzy-match/Util.h
Normal file
@ -0,0 +1,87 @@
|
||||
//
|
||||
// Util.h
|
||||
// fuzzy-match
|
||||
//
|
||||
// Created by Hieu Hoang on 25/07/2012.
|
||||
// Copyright 2012 __MyCompanyName__. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef fuzzy_match_Util_h
|
||||
#define fuzzy_match_Util_h
|
||||
|
||||
#include <vector>
|
||||
#include <sstream>
|
||||
#include "Vocabulary.h"
|
||||
|
||||
class SentenceAlignment;
|
||||
|
||||
void load_corpus( const char* fileName, std::vector< std::vector< WORD_ID > > &corpus );
|
||||
void load_target( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus);
|
||||
void load_alignment( const char* fileName, std::vector< std::vector< SentenceAlignment > > &corpus );
|
||||
|
||||
/**
|
||||
* Convert vector of type T to string
|
||||
*/
|
||||
template <typename T>
|
||||
std::string Join(const std::string& delimiter, const std::vector<T>& items)
|
||||
{
|
||||
std::ostringstream outstr;
|
||||
if(items.size() == 0) return "";
|
||||
outstr << items[0];
|
||||
for(unsigned int i = 1; i < items.size(); i++)
|
||||
outstr << delimiter << items[i];
|
||||
return outstr.str();
|
||||
}
|
||||
|
||||
//! convert string to variable of type T. Used to reading floats, int etc from files
|
||||
template<typename T>
|
||||
inline T Scan(const std::string &input)
|
||||
{
|
||||
std::stringstream stream(input);
|
||||
T ret;
|
||||
stream >> ret;
|
||||
return ret;
|
||||
}
|
||||
|
||||
//! convert vectors of string to vectors of type T variables
|
||||
template<typename T>
|
||||
inline std::vector<T> Scan(const std::vector< std::string > &input)
|
||||
{
|
||||
std::vector<T> output(input.size());
|
||||
for (size_t i = 0 ; i < input.size() ; i++) {
|
||||
output[i] = Scan<T>( input[i] );
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
inline std::vector<std::string> Tokenize(const std::string& str,
|
||||
const std::string& delimiters = " \t")
|
||||
{
|
||||
std::vector<std::string> tokens;
|
||||
// Skip delimiters at beginning.
|
||||
std::string::size_type lastPos = str.find_first_not_of(delimiters, 0);
|
||||
// Find first "non-delimiter".
|
||||
std::string::size_type pos = str.find_first_of(delimiters, lastPos);
|
||||
|
||||
while (std::string::npos != pos || std::string::npos != lastPos) {
|
||||
// Found a token, add it to the vector.
|
||||
tokens.push_back(str.substr(lastPos, pos - lastPos));
|
||||
// Skip delimiters. Note the "not_of"
|
||||
lastPos = str.find_first_not_of(delimiters, pos);
|
||||
// Find next "non-delimiter"
|
||||
pos = str.find_first_of(delimiters, lastPos);
|
||||
}
|
||||
|
||||
return tokens;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline std::vector<T> Tokenize( const std::string &input
|
||||
, const std::string& delimiters = " \t")
|
||||
{
|
||||
std::vector<std::string> stringVector = Tokenize(input, delimiters);
|
||||
return Scan<T>( stringVector );
|
||||
}
|
||||
|
||||
|
||||
#endif
|
45
contrib/fuzzy-match/Vocabulary.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
// $Id: Vocabulary.cpp 1565 2008-02-22 14:42:01Z bojar $
|
||||
#include "Vocabulary.h"
|
||||
|
||||
// as in beamdecoder/tables.cpp
|
||||
vector<WORD_ID> Vocabulary::Tokenize( const char input[] ) {
|
||||
vector< WORD_ID > token;
|
||||
bool betweenWords = true;
|
||||
int start=0;
|
||||
int i=0;
|
||||
for(; input[i] != '\0'; i++) {
|
||||
bool isSpace = (input[i] == ' ' || input[i] == '\t');
|
||||
|
||||
if (!isSpace && betweenWords) {
|
||||
start = i;
|
||||
betweenWords = false;
|
||||
}
|
||||
else if (isSpace && !betweenWords) {
|
||||
token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
|
||||
betweenWords = true;
|
||||
}
|
||||
}
|
||||
if (!betweenWords)
|
||||
token.push_back( StoreIfNew ( string( input+start, i-start ) ) );
|
||||
return token;
|
||||
}
|
||||
|
||||
WORD_ID Vocabulary::StoreIfNew( const WORD& word ) {
|
||||
map<WORD, WORD_ID>::iterator i = lookup.find( word );
|
||||
|
||||
if( i != lookup.end() )
|
||||
return i->second;
|
||||
|
||||
WORD_ID id = vocab.size();
|
||||
vocab.push_back( word );
|
||||
lookup[ word ] = id;
|
||||
return id;
|
||||
}
|
||||
|
||||
WORD_ID Vocabulary::GetWordID( const WORD &word ) {
|
||||
map<WORD, WORD_ID>::iterator i = lookup.find( word );
|
||||
if( i == lookup.end() )
|
||||
return 0;
|
||||
WORD_ID w= (WORD_ID) i->second;
|
||||
return w;
|
||||
}
|
40
contrib/fuzzy-match/Vocabulary.h
Normal file
@ -0,0 +1,40 @@
|
||||
// $Id: tables-core.h 1470 2007-10-02 21:43:54Z redpony $
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string>
|
||||
#include <queue>
|
||||
#include <map>
|
||||
#include <cmath>
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define MAX_LENGTH 10000
|
||||
|
||||
#define SAFE_GETLINE(_IS, _LINE, _SIZE, _DELIM) { \
|
||||
_IS.getline(_LINE, _SIZE, _DELIM); \
|
||||
if(_IS.fail() && !_IS.bad() && !_IS.eof()) _IS.clear(); \
|
||||
if (_IS.gcount() == _SIZE-1) { \
|
||||
cerr << "Line too long! Buffer overflow. Delete lines >=" \
|
||||
<< _SIZE << " chars or raise MAX_LENGTH in phrase-extract/tables-core.cpp" \
|
||||
<< endl; \
|
||||
exit(1); \
|
||||
} \
|
||||
}
|
||||
|
||||
typedef string WORD;
|
||||
typedef unsigned int WORD_ID;
|
||||
|
||||
class Vocabulary {
|
||||
public:
|
||||
map<WORD, WORD_ID> lookup;
|
||||
vector< WORD > vocab;
|
||||
WORD_ID StoreIfNew( const WORD& );
|
||||
WORD_ID GetWordID( const WORD& );
|
||||
vector<WORD_ID> Tokenize( const char[] );
|
||||
inline WORD &GetWord( WORD_ID id ) const { WORD &i = (WORD&) vocab[ id ]; return i; }
|
||||
};
|
460
contrib/fuzzy-match/fuzzy-match2.cpp
Normal file
@ -0,0 +1,460 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <time.h>
|
||||
#include <fstream>
|
||||
#include "SentenceAlignment.h"
|
||||
#include "fuzzy-match2.h"
|
||||
#include "SuffixArray.h"
|
||||
|
||||
/** This implementation is explained in
|
||||
Koehn and Senellart: "Fast Approximate String Matching
|
||||
with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
vector< vector< WORD_ID > > source, input;
|
||||
vector< vector< SentenceAlignment > > targetAndAlignment;
|
||||
|
||||
|
||||
while(1) {
|
||||
static struct option long_options[] = {
|
||||
{"basic", no_argument, &basic_flag, 1},
|
||||
{"word", no_argument, &lsed_flag, 0},
|
||||
{"unrefined", no_argument, &refined_flag, 0},
|
||||
{"nolengthfilter", no_argument, &length_filter_flag, 0},
|
||||
{"noparse", no_argument, &parse_flag, 0},
|
||||
{"multiple", no_argument, &multiple_flag, 1},
|
||||
{"minmatch", required_argument, 0, 'm'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
int option_index = 0;
|
||||
int c = getopt_long (argc, argv, "m:", long_options, &option_index);
|
||||
if (c == -1) break;
|
||||
switch (c) {
|
||||
case 0:
|
||||
// if (long_options[option_index].flag != 0)
|
||||
// break;
|
||||
// printf ("option %s", long_options[option_index].name);
|
||||
// if (optarg)
|
||||
// printf (" with arg %s", optarg);
|
||||
// printf ("\n");
|
||||
break;
|
||||
case 'm':
|
||||
min_match = atoi(optarg);
|
||||
if (min_match < 1 || min_match > 100) {
|
||||
cerr << "error: --minmatch must have value in range 1..100\n";
|
||||
exit(1);
|
||||
}
|
||||
cerr << "setting min match to " << min_match << endl;
|
||||
break;
|
||||
default:
|
||||
cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (lsed_flag) { cerr << "lsed\n"; }
|
||||
if (basic_flag) { cerr << "basic\n"; }
|
||||
if (refined_flag) { cerr << "refined\n"; }
|
||||
if (length_filter_flag) { cerr << "length filter\n"; }
|
||||
if (parse_flag) { cerr << "parse\n"; }
|
||||
// exit(1);
|
||||
|
||||
|
||||
if (optind+4 != argc) {
|
||||
cerr << "syntax: ./fuzzy-match input source target alignment [--basic] [--word] [--minmatch 1..100]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
load_corpus(argv[optind], input);
|
||||
load_corpus(argv[optind+1], source);
|
||||
load_target(argv[optind+2], targetAndAlignment);
|
||||
load_alignment(argv[optind+3], targetAndAlignment);
|
||||
|
||||
// ./fuzzy-match input corpus [-basic]
|
||||
|
||||
// load_corpus("../corpus/tm.truecased.4.en", source);
|
||||
// load_corpus("../corpus/tm.truecased.4.it", target);
|
||||
// load_corpus("../evaluation/test.input.tc.4", input);
|
||||
|
||||
// load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
|
||||
// load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
|
||||
|
||||
// load_corpus("../corpus/tm.truecased.16.en", source);
|
||||
// load_corpus("../evaluation/test.input.tc.16", input);
|
||||
|
||||
if (basic_flag) {
|
||||
cerr << "using basic method\n";
|
||||
clock_t start_main_clock2 = clock();
|
||||
basic_fuzzy_match( source, input );
|
||||
cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cerr << "number of input sentences " << input.size() << endl;
|
||||
|
||||
cerr << "creating suffix array...\n";
|
||||
// SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
|
||||
// SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
|
||||
SuffixArray suffixArray( argv[optind+1] );
|
||||
|
||||
clock_t start_main_clock = clock();
|
||||
|
||||
// looping through all input sentences...
|
||||
cerr << "looping...\n";
|
||||
for(unsigned int sentenceInd = 0; sentenceInd < input.size(); sentenceInd++)
|
||||
{
|
||||
clock_t start_clock = clock();
|
||||
// if (i % 10 == 0) cerr << ".";
|
||||
|
||||
// establish some basic statistics
|
||||
|
||||
// int input_length = compute_length( input[i] );
|
||||
int input_length = input[sentenceInd].size();
|
||||
int best_cost = input_length * (100-min_match) / 100 + 1;
|
||||
|
||||
int match_count = 0; // how many substring matches to be considered
|
||||
//cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
|
||||
|
||||
// find match ranges in suffix array
|
||||
vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
|
||||
for(size_t start=0;start<input[sentenceInd].size();start++)
|
||||
{
|
||||
SuffixArray::INDEX prior_first_match = 0;
|
||||
SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
|
||||
vector< string > substring;
|
||||
bool stillMatched = true;
|
||||
vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
|
||||
//cerr << "start: " << start;
|
||||
for(int word=start; stillMatched && word<input[sentenceInd].size(); word++)
|
||||
{
|
||||
substring.push_back( vocabulary.GetWord( input[sentenceInd][word] ) );
|
||||
|
||||
// only look up, if needed (i.e. no unnecessary short gram lookups)
|
||||
// if (! word-start+1 <= short_match_max_length( input_length ) )
|
||||
// {
|
||||
SuffixArray::INDEX first_match, last_match;
|
||||
stillMatched = false;
|
||||
if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
|
||||
{
|
||||
stillMatched = true;
|
||||
matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
|
||||
//cerr << " (" << first_match << "," << last_match << ")";
|
||||
//cerr << " " << ( last_match - first_match + 1 );
|
||||
prior_first_match = first_match;
|
||||
prior_last_match = last_match;
|
||||
}
|
||||
//}
|
||||
}
|
||||
//cerr << endl;
|
||||
match_range.push_back( matchedAtThisStart );
|
||||
}
|
||||
|
||||
clock_t clock_range = clock();
|
||||
|
||||
map< int, vector< Match > > sentence_match;
|
||||
map< int, int > sentence_match_word_count;
|
||||
|
||||
// go through all matches, longest first
|
||||
for(int length = input[sentenceInd].size(); length >= 1; length--)
|
||||
{
|
||||
// do not create matches, if these are handled by the short match function
|
||||
if (length <= short_match_max_length( input_length ) )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned int count = 0;
|
||||
for(int start = 0; start <= input[sentenceInd].size() - length; start++)
|
||||
{
|
||||
if (match_range[start].size() >= length)
|
||||
{
|
||||
pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
|
||||
// cerr << " (" << range.first << "," << range.second << ")";
|
||||
count += range.second - range.first + 1;
|
||||
|
||||
for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
|
||||
{
|
||||
int position = suffixArray.GetPosition( i );
|
||||
|
||||
// sentence length mismatch
|
||||
size_t sentence_id = suffixArray.GetSentence( position );
|
||||
int sentence_length = suffixArray.GetSentenceLength( sentence_id );
|
||||
int diff = abs( (int)sentence_length - (int)input_length );
|
||||
// cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
|
||||
//if (length <= 2 && input_length>=5 &&
|
||||
// sentence_match.find( sentence_id ) == sentence_match.end())
|
||||
// continue;
|
||||
|
||||
if (diff > best_cost)
|
||||
continue;
|
||||
|
||||
// compute minimal cost
|
||||
int start_pos = suffixArray.GetWordInSentence( position );
|
||||
int end_pos = start_pos + length-1;
|
||||
// cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. "
|
||||
// << start << "-" << (start+length-1) << " (" << input_length << ")";
|
||||
// different number of prior words -> cost is at least diff
|
||||
int min_cost = abs( start - start_pos );
|
||||
|
||||
// same number of words, but not sent. start -> cost is at least 1
|
||||
if (start == start_pos && start>0)
|
||||
min_cost++;
|
||||
|
||||
// different number of remaining words -> cost is at least diff
|
||||
min_cost += abs( ( sentence_length-1 - end_pos ) -
|
||||
( input_length-1 - (start+length-1) ) );
|
||||
|
||||
// same number of words, but not sent. end -> cost is at least 1
|
||||
if ( sentence_length-1 - end_pos ==
|
||||
input_length-1 - (start+length-1)
|
||||
&& end_pos != sentence_length-1 )
|
||||
min_cost++;
|
||||
|
||||
// cerr << " -> min_cost " << min_cost;
|
||||
if (min_cost > best_cost)
|
||||
continue;
|
||||
|
||||
// valid match
|
||||
match_count++;
|
||||
|
||||
// compute maximal cost
|
||||
int max_cost = max( start, start_pos )
|
||||
+ max( sentence_length-1 - end_pos,
|
||||
input_length-1 - (start+length-1) );
|
||||
// cerr << ", max_cost " << max_cost;
|
||||
|
||||
Match m = Match( start, start+length-1,
|
||||
start_pos, start_pos+length-1,
|
||||
min_cost, max_cost, 0);
|
||||
sentence_match[ sentence_id ].push_back( m );
|
||||
sentence_match_word_count[ sentence_id ] += length;
|
||||
|
||||
if (max_cost < best_cost)
|
||||
{
|
||||
best_cost = max_cost;
|
||||
if (best_cost == 0) break;
|
||||
}
|
||||
//if (match_count >= MAX_MATCH_COUNT) break;
|
||||
}
|
||||
}
|
||||
// cerr << endl;
|
||||
if (best_cost == 0) break;
|
||||
//if (match_count >= MAX_MATCH_COUNT) break;
|
||||
}
|
||||
// cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
|
||||
|
||||
if (best_cost == 0) break;
|
||||
//if (match_count >= MAX_MATCH_COUNT) break;
|
||||
}
|
||||
cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
|
||||
|
||||
clock_t clock_matches = clock();
|
||||
|
||||
// consider each sentence for which we have matches
|
||||
int old_best_cost = best_cost;
|
||||
int tm_count_word_match = 0;
|
||||
int tm_count_word_match2 = 0;
|
||||
int pruned_match_count = 0;
|
||||
if (short_match_max_length( input_length ))
|
||||
{
|
||||
init_short_matches( input[sentenceInd] );
|
||||
}
|
||||
vector< int > best_tm;
|
||||
typedef map< int, vector< Match > >::iterator I;
|
||||
|
||||
clock_t clock_validation_sum = 0;
|
||||
|
||||
for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
|
||||
{
|
||||
int tmID = tm->first;
|
||||
int tm_length = suffixArray.GetSentenceLength(tmID);
|
||||
vector< Match > &match = tm->second;
|
||||
add_short_matches( match, source[tmID], input_length, best_cost );
|
||||
|
||||
//cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
|
||||
|
||||
// quick look: how many words are matched
|
||||
int words_matched = 0;
|
||||
for(int m=0;m<match.size();m++) {
|
||||
|
||||
if (match[m].min_cost <= best_cost) // makes no difference
|
||||
words_matched += match[m].input_end - match[m].input_start + 1;
|
||||
}
|
||||
if (max(input_length,tm_length) - words_matched > best_cost)
|
||||
{
|
||||
if (length_filter_flag) continue;
|
||||
}
|
||||
tm_count_word_match++;
|
||||
|
||||
// prune, check again how many words are matched
|
||||
vector< Match > pruned = prune_matches( match, best_cost );
|
||||
words_matched = 0;
|
||||
for(int p=0;p<pruned.size();p++) {
|
||||
words_matched += pruned[p].input_end - pruned[p].input_start + 1;
|
||||
}
|
||||
if (max(input_length,tm_length) - words_matched > best_cost)
|
||||
{
|
||||
if (length_filter_flag) continue;
|
||||
}
|
||||
tm_count_word_match2++;
|
||||
|
||||
pruned_match_count += pruned.size();
|
||||
int prior_best_cost = best_cost;
|
||||
int cost;
|
||||
|
||||
clock_t clock_validation_start = clock();
|
||||
if (! parse_flag ||
|
||||
pruned.size()>=10) // to prevent worst cases
|
||||
{
|
||||
string path;
|
||||
cost = sed( input[sentenceInd], source[tmID], path, false );
|
||||
if (cost < best_cost)
|
||||
{
|
||||
best_cost = cost;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
cost = parse_matches( pruned, input_length, tm_length, best_cost );
|
||||
if (prior_best_cost != best_cost)
|
||||
{
|
||||
best_tm.clear();
|
||||
}
|
||||
}
|
||||
clock_validation_sum += clock() - clock_validation_start;
|
||||
if (cost == best_cost)
|
||||
{
|
||||
best_tm.push_back( tmID );
|
||||
}
|
||||
}
|
||||
cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
|
||||
cerr << "tm considered: " << sentence_match.size()
|
||||
<< " word-matched: " << tm_count_word_match
|
||||
<< " word-matched2: " << tm_count_word_match2
|
||||
<< " best: " << best_tm.size() << endl;
|
||||
|
||||
cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
|
||||
|
||||
// create xml and extract files
|
||||
string inputStr, sourceStr;
|
||||
for (size_t pos = 0; pos < input_length; ++pos) {
|
||||
inputStr += vocabulary.GetWord(input[sentenceInd][pos]) + " ";
|
||||
}
|
||||
|
||||
// do not try to find the best ... report multiple matches
|
||||
if (multiple_flag) {
|
||||
int input_letter_length = compute_length( input[sentenceInd] );
|
||||
for(int si=0; si<best_tm.size(); si++) {
|
||||
int s = best_tm[si];
|
||||
string path;
|
||||
unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
|
||||
// do not report multiple identical sentences, but just their count
|
||||
cout << sentenceInd << " "; // sentence number
|
||||
cout << letter_cost << "/" << input_letter_length << " ";
|
||||
cout << "(" << best_cost <<"/" << input_length <<") ";
|
||||
cout << "||| " << s << " ||| " << path << endl;
|
||||
|
||||
vector<WORD_ID> &sourceSentence = source[s];
|
||||
vector<SentenceAlignment> &targets = targetAndAlignment[s];
|
||||
create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, path);
|
||||
|
||||
}
|
||||
} // if (multiple_flag)
|
||||
else {
|
||||
|
||||
// find the best matches according to letter sed
|
||||
string best_path = "";
|
||||
int best_match = -1;
|
||||
int best_letter_cost;
|
||||
if (lsed_flag) {
|
||||
best_letter_cost = compute_length( input[sentenceInd] ) * min_match / 100 + 1;
|
||||
for(int si=0; si<best_tm.size(); si++)
|
||||
{
|
||||
int s = best_tm[si];
|
||||
string path;
|
||||
unsigned int letter_cost = sed( input[sentenceInd], source[s], path, true );
|
||||
if (letter_cost < best_letter_cost)
|
||||
{
|
||||
best_letter_cost = letter_cost;
|
||||
best_path = path;
|
||||
best_match = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
// if letter sed turned off, just compute path for first match
|
||||
else {
|
||||
if (best_tm.size() > 0) {
|
||||
string path;
|
||||
sed( input[sentenceInd], source[best_tm[0]], path, false );
|
||||
best_path = path;
|
||||
best_match = best_tm[0];
|
||||
}
|
||||
}
|
||||
cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
|
||||
<< " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
|
||||
<< " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
|
||||
<< " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
|
||||
<< " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
|
||||
<< " )" << endl;
|
||||
if (lsed_flag) {
|
||||
cout << best_letter_cost << "/" << compute_length( input[sentenceInd] ) << " (";
|
||||
}
|
||||
cout << best_cost <<"/" << input_length;
|
||||
if (lsed_flag) cout << ")";
|
||||
cout << " ||| " << best_match << " ||| " << best_path << endl;
|
||||
|
||||
// creat xml & extracts
|
||||
vector<WORD_ID> &sourceSentence = source[best_match];
|
||||
vector<SentenceAlignment> &targets = targetAndAlignment[best_match];
|
||||
create_extract(sentenceInd, best_cost, sourceSentence, targets, inputStr, best_path);
|
||||
|
||||
} // else if (multiple_flag)
|
||||
|
||||
|
||||
}
|
||||
cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
|
||||
|
||||
}
|
||||
|
||||
void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path)
|
||||
{
|
||||
string sourceStr;
|
||||
for (size_t pos = 0; pos < sourceSentence.size(); ++pos) {
|
||||
WORD_ID wordId = sourceSentence[pos];
|
||||
sourceStr += vocabulary.GetWord(wordId) + " ";
|
||||
}
|
||||
|
||||
char *inputFileName = tmpnam(NULL);
|
||||
ofstream inputFile(inputFileName);
|
||||
|
||||
for (size_t targetInd = 0; targetInd < targets.size(); ++targetInd) {
|
||||
const SentenceAlignment &sentenceAlignment = targets[targetInd];
|
||||
string targetStr = sentenceAlignment.getTargetString();
|
||||
string alignStr = sentenceAlignment.getAlignmentString();
|
||||
|
||||
inputFile
|
||||
<< sentenceInd << endl
|
||||
<< cost << endl
|
||||
<< sourceStr << endl
|
||||
<< inputStr << endl
|
||||
<< targetStr << endl
|
||||
<< alignStr << endl
|
||||
<< path << endl
|
||||
<< sentenceAlignment.count << endl;
|
||||
|
||||
}
|
||||
|
||||
string cmd = string("perl create_xml.perl < ") + inputFileName;
|
||||
cerr << cmd << endl;
|
||||
inputFile.close();
|
||||
|
||||
}
|
561
contrib/fuzzy-match/fuzzy-match2.h
Normal file
@ -0,0 +1,561 @@
|
||||
//
|
||||
// fuzzy-match2.h
|
||||
// fuzzy-match
|
||||
//
|
||||
// Created by Hieu Hoang on 25/07/2012.
|
||||
// Copyright 2012 __MyCompanyName__. All rights reserved.
|
||||
//
|
||||
|
||||
#ifndef fuzzy_match_fuzzy_match2_h
|
||||
#define fuzzy_match_fuzzy_match2_h
|
||||
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
#include <vector>
|
||||
#include "Vocabulary.h"
|
||||
#include "SuffixArray.h"
|
||||
#include "Util.h"
|
||||
#include "Match.h"
|
||||
|
||||
#define MAX_MATCH_COUNT 10000000
|
||||
|
||||
Vocabulary vocabulary;
|
||||
|
||||
int basic_flag = false;
|
||||
int lsed_flag = true;
|
||||
int refined_flag = true;
|
||||
int length_filter_flag = true;
|
||||
int parse_flag = true;
|
||||
int min_match = 70;
|
||||
int multiple_flag = false;
|
||||
int multiple_slack = 0;
|
||||
int multiple_max = 100;
|
||||
map< WORD_ID,vector< int > > single_word_index;
|
||||
// global cache for word pairs
|
||||
map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
|
||||
|
||||
void create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path);
|
||||
|
||||
|
||||
|
||||
/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
|
||||
|
||||
unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
|
||||
{
|
||||
// check if already computed -> lookup in cache
|
||||
pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
|
||||
map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
|
||||
if (lookup != lsed.end())
|
||||
{
|
||||
return (lookup->second);
|
||||
}
|
||||
|
||||
// get surface strings for word indices
|
||||
const string &a = vocabulary.GetWord( aIdx );
|
||||
const string &b = vocabulary.GetWord( bIdx );
|
||||
|
||||
// initialize cost matrix
|
||||
unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
|
||||
cost[i][0] = i;
|
||||
}
|
||||
for( unsigned int j=0; j<=b.size(); j++ ) {
|
||||
cost[0][j] = j;
|
||||
}
|
||||
|
||||
// core string edit distance loop
|
||||
for( unsigned int i=1; i<=a.size(); i++ ) {
|
||||
for( unsigned int j=1; j<=b.size(); j++ ) {
|
||||
|
||||
unsigned int ins = cost[i-1][j] + 1;
|
||||
unsigned int del = cost[i][j-1] + 1;
|
||||
bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
|
||||
unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
|
||||
|
||||
unsigned int min = (ins < del) ? ins : del;
|
||||
min = (diag < min) ? diag : min;
|
||||
|
||||
cost[i][j] = min;
|
||||
}
|
||||
}
|
||||
|
||||
// clear out memory
|
||||
unsigned int final = cost[a.size()][b.size()];
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
free( cost[i] );
|
||||
}
|
||||
free( cost );
|
||||
|
||||
// cache and return result
|
||||
lsed[ pIdx ] = final;
|
||||
return final;
|
||||
}
|
||||
|
||||
/* string edit distance implementation */
|
||||
|
||||
unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
|
||||
|
||||
// initialize cost and path matrices
|
||||
unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
|
||||
char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
|
||||
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
|
||||
path[i] = (char*) calloc( sizeof(char), b.size()+1 );
|
||||
if (i>0)
|
||||
{
|
||||
cost[i][0] = cost[i-1][0];
|
||||
if (use_letter_sed)
|
||||
{
|
||||
cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[i][0]++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[i][0] = 0;
|
||||
}
|
||||
path[i][0] = 'I';
|
||||
}
|
||||
|
||||
for( unsigned int j=0; j<=b.size(); j++ ) {
|
||||
if (j>0)
|
||||
{
|
||||
cost[0][j] = cost[0][j-1];
|
||||
if (use_letter_sed)
|
||||
{
|
||||
cost[0][j] += vocabulary.GetWord( b[j-1] ).size();
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[0][j]++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[0][j] = 0;
|
||||
}
|
||||
path[0][j] = 'D';
|
||||
}
|
||||
|
||||
// core string edit distance algorithm
|
||||
for( unsigned int i=1; i<=a.size(); i++ ) {
|
||||
for( unsigned int j=1; j<=b.size(); j++ ) {
|
||||
unsigned int ins = cost[i-1][j];
|
||||
unsigned int del = cost[i][j-1];
|
||||
unsigned int match;
|
||||
if (use_letter_sed)
|
||||
{
|
||||
ins += vocabulary.GetWord( a[i-1] ).size();
|
||||
del += vocabulary.GetWord( b[j-1] ).size();
|
||||
match = letter_sed( a[i-1], b[j-1] );
|
||||
}
|
||||
else
|
||||
{
|
||||
ins++;
|
||||
del++;
|
||||
match = ( a[i-1] == b[j-1] ) ? 0 : 1;
|
||||
}
|
||||
unsigned int diag = cost[i-1][j-1] + match;
|
||||
|
||||
char action = (ins < del) ? 'I' : 'D';
|
||||
unsigned int min = (ins < del) ? ins : del;
|
||||
if (diag < min)
|
||||
{
|
||||
action = (match>0) ? 'S' : 'M';
|
||||
min = diag;
|
||||
}
|
||||
|
||||
cost[i][j] = min;
|
||||
path[i][j] = action;
|
||||
}
|
||||
}
|
||||
|
||||
// construct string for best path
|
||||
unsigned int i = a.size();
|
||||
unsigned int j = b.size();
|
||||
best_path = "";
|
||||
while( i>0 || j>0 )
|
||||
{
|
||||
best_path = path[i][j] + best_path;
|
||||
if (path[i][j] == 'I')
|
||||
{
|
||||
i--;
|
||||
}
|
||||
else if (path[i][j] == 'D')
|
||||
{
|
||||
j--;
|
||||
}
|
||||
else
|
||||
{
|
||||
i--;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// clear out memory
|
||||
unsigned int final = cost[a.size()][b.size()];
|
||||
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
free( cost[i] );
|
||||
free( path[i] );
|
||||
}
|
||||
free( cost );
|
||||
free( path );
|
||||
|
||||
// return result
|
||||
return final;
|
||||
}
|
||||
|
||||
/* utlility function: compute length of sentence in characters
|
||||
(spaces do not count) */
|
||||
|
||||
unsigned int compute_length( const vector< WORD_ID > &sentence )
|
||||
{
|
||||
unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
|
||||
{
|
||||
length += vocabulary.GetWord( sentence[i] ).size();
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
/* brute force method: compare input to all corpus sentences */
|
||||
|
||||
int basic_fuzzy_match( vector< vector< WORD_ID > > source,
|
||||
vector< vector< WORD_ID > > input )
|
||||
{
|
||||
// go through input set...
|
||||
for(unsigned int i=0;i<input.size();i++)
|
||||
{
|
||||
bool use_letter_sed = false;
|
||||
|
||||
// compute sentence length and worst allowed cost
|
||||
unsigned int input_length;
|
||||
if (use_letter_sed)
|
||||
{
|
||||
input_length = compute_length( input[i] );
|
||||
}
|
||||
else
|
||||
{
|
||||
input_length = input[i].size();
|
||||
}
|
||||
unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
|
||||
string best_path = "";
|
||||
int best_match = -1;
|
||||
|
||||
// go through all corpus sentences
|
||||
for(unsigned int s=0;s<source.size();s++)
|
||||
{
|
||||
int source_length;
|
||||
if (use_letter_sed)
|
||||
{
|
||||
source_length = compute_length( source[s] );
|
||||
}
|
||||
else
|
||||
{
|
||||
source_length = source[s].size();
|
||||
}
|
||||
int diff = abs((int)source_length - (int)input_length);
|
||||
if (length_filter_flag && (diff >= best_cost))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// compute string edit distance
|
||||
string path;
|
||||
unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
|
||||
|
||||
// update if new best
|
||||
if (cost < best_cost)
|
||||
{
|
||||
best_cost = cost;
|
||||
best_path = path;
|
||||
best_match = s;
|
||||
}
|
||||
}
|
||||
cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
|
||||
}
|
||||
}
|
||||
|
||||
/* definition of short matches
|
||||
very short n-gram matches (1-grams) will not be looked up in
|
||||
the suffix array, since there are too many matches
|
||||
and for longer sentences, at least one 2-gram match must occur */
|
||||
|
||||
inline int short_match_max_length( int input_length )
|
||||
{
|
||||
if ( ! refined_flag )
|
||||
return 0;
|
||||
if ( input_length >= 5 )
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* if we have non-short matches in a sentence, we need to
|
||||
take a closer look at it.
|
||||
this function creates a hash map for all input words and their positions
|
||||
(to be used by the next function)
|
||||
(done here, because this has be done only once for an input sentence) */
|
||||
|
||||
void init_short_matches( const vector< WORD_ID > &input )
|
||||
{
|
||||
int max_length = short_match_max_length( input.size() );
|
||||
if (max_length == 0)
|
||||
return;
|
||||
|
||||
single_word_index.clear();
|
||||
|
||||
// store input words and their positions in hash map
|
||||
for(int i=0; i<input.size(); i++)
|
||||
{
|
||||
if (single_word_index.find( input[i] ) == single_word_index.end())
|
||||
{
|
||||
vector< int > position_vector;
|
||||
single_word_index[ input[i] ] = position_vector;
|
||||
}
|
||||
single_word_index[ input[i] ].push_back( i );
|
||||
}
|
||||
}
|
||||
|
||||
/* add all short matches to list of matches for a sentence */
|
||||
|
||||
void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
|
||||
{
|
||||
int max_length = short_match_max_length( input_length );
|
||||
if (max_length == 0)
|
||||
return;
|
||||
|
||||
int tm_length = tm.size();
|
||||
map< WORD_ID,vector< int > >::iterator input_word_hit;
|
||||
for(int t_pos=0; t_pos<tm.size(); t_pos++)
|
||||
{
|
||||
input_word_hit = single_word_index.find( tm[t_pos] );
|
||||
if (input_word_hit != single_word_index.end())
|
||||
{
|
||||
vector< int > &position_vector = input_word_hit->second;
|
||||
for(int j=0; j<position_vector.size(); j++)
|
||||
{
|
||||
int &i_pos = position_vector[j];
|
||||
|
||||
// before match
|
||||
int max_cost = max( i_pos , t_pos );
|
||||
int min_cost = abs( i_pos - t_pos );
|
||||
if ( i_pos>0 && i_pos == t_pos )
|
||||
min_cost++;
|
||||
|
||||
// after match
|
||||
max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
|
||||
min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
|
||||
if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
|
||||
min_cost++;
|
||||
|
||||
if (min_cost <= best_cost)
|
||||
{
|
||||
Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
|
||||
match.push_back( new_match );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* remove matches that are subsumed by a larger match */
|
||||
|
||||
vector< Match > prune_matches( const vector< Match > &match, int best_cost )
|
||||
{
|
||||
//cerr << "\tpruning";
|
||||
vector< Match > pruned;
|
||||
for(int i=match.size()-1; i>=0; i--)
|
||||
{
|
||||
//cerr << " (" << match[i].input_start << "," << match[i].input_end
|
||||
// << " ; " << match[i].tm_start << "," << match[i].tm_end
|
||||
// << " * " << match[i].min_cost << ")";
|
||||
|
||||
//if (match[i].min_cost > best_cost)
|
||||
// continue;
|
||||
|
||||
bool subsumed = false;
|
||||
for(int j=match.size()-1; j>=0; j--)
|
||||
{
|
||||
if (i!=j // do not compare match with itself
|
||||
&& ( match[i].input_end - match[i].input_start <=
|
||||
match[j].input_end - match[j].input_start ) // i shorter than j
|
||||
&& ((match[i].input_start == match[j].input_start &&
|
||||
match[i].tm_start == match[j].tm_start ) ||
|
||||
(match[i].input_end == match[j].input_end &&
|
||||
match[i].tm_end == match[j].tm_end) ) )
|
||||
{
|
||||
subsumed = true;
|
||||
}
|
||||
}
|
||||
if (! subsumed && match[i].min_cost <= best_cost)
|
||||
{
|
||||
//cerr << "*";
|
||||
pruned.push_back( match[i] );
|
||||
}
|
||||
}
|
||||
//cerr << endl;
|
||||
return pruned;
|
||||
}
|
||||
|
||||
/* A* parsing method to compute string edit distance */
|
||||
|
||||
int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
|
||||
{
|
||||
// cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
|
||||
|
||||
if (match.size() == 1)
|
||||
return match[0].max_cost;
|
||||
if (match.size() == 0)
|
||||
return input_length+tm_length;
|
||||
|
||||
int this_best_cost = input_length + tm_length;
|
||||
for(int i=0;i<match.size();i++)
|
||||
{
|
||||
this_best_cost = min( this_best_cost, match[i].max_cost );
|
||||
}
|
||||
// cerr << "\tthis best cost: " << this_best_cost << endl;
|
||||
|
||||
// bottom up combination of spans
|
||||
vector< vector< Match > > multi_match;
|
||||
multi_match.push_back( match );
|
||||
|
||||
int match_level = 1;
|
||||
while(multi_match[ match_level-1 ].size()>0)
|
||||
{
|
||||
// init vector
|
||||
vector< Match > empty;
|
||||
multi_match.push_back( empty );
|
||||
|
||||
for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
|
||||
{
|
||||
int second_level = match_level - first_level -1;
|
||||
//cerr << "\tcombining level " << first_level << " and " << second_level << endl;
|
||||
|
||||
vector< Match > &first_match = multi_match[ first_level ];
|
||||
vector< Match > &second_match = multi_match[ second_level ];
|
||||
|
||||
for(int i1 = 0; i1 < first_match.size(); i1++) {
|
||||
for(int i2 = 0; i2 < second_match.size(); i2++) {
|
||||
|
||||
// do not combine the same pair twice
|
||||
if (first_level == second_level && i2 <= i1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// get sorted matches (first is before second)
|
||||
Match *first, *second;
|
||||
if (first_match[i1].input_start < second_match[i2].input_start )
|
||||
{
|
||||
first = &first_match[i1];
|
||||
second = &second_match[i2];
|
||||
}
|
||||
else
|
||||
{
|
||||
second = &first_match[i1];
|
||||
first = &second_match[i2];
|
||||
}
|
||||
|
||||
//cerr << "\tcombining "
|
||||
// << "(" << first->input_start << "," << first->input_end << "), "
|
||||
// << first->tm_start << " [" << first->internal_cost << "]"
|
||||
// << " with "
|
||||
// << "(" << second->input_start << "," << second->input_end << "), "
|
||||
// << second->tm_start<< " [" << second->internal_cost << "]"
|
||||
// << endl;
|
||||
|
||||
// do not process overlapping matches
|
||||
if (first->input_end >= second->input_start)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// no overlap / mismatch in tm
|
||||
if (first->tm_end >= second->tm_start)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// compute cost
|
||||
int min_cost = 0;
|
||||
int max_cost = 0;
|
||||
|
||||
// initial
|
||||
min_cost += abs( first->input_start - first->tm_start );
|
||||
max_cost += max( first->input_start, first->tm_start );
|
||||
|
||||
// same number of words, but not sent. start -> cost is at least 1
|
||||
if (first->input_start == first->tm_start && first->input_start > 0)
|
||||
{
|
||||
min_cost++;
|
||||
}
|
||||
|
||||
// in-between
|
||||
int skipped_words = second->input_start - first->input_end -1;
|
||||
int skipped_words_tm = second->tm_start - first->tm_end -1;
|
||||
int internal_cost = max( skipped_words, skipped_words_tm );
|
||||
internal_cost += first->internal_cost + second->internal_cost;
|
||||
min_cost += internal_cost;
|
||||
max_cost += internal_cost;
|
||||
|
||||
// final
|
||||
min_cost += abs( (tm_length-1 - second->tm_end) -
|
||||
(input_length-1 - second->input_end) );
|
||||
max_cost += max( (tm_length-1 - second->tm_end),
|
||||
(input_length-1 - second->input_end) );
|
||||
|
||||
// same number of words, but not sent. end -> cost is at least 1
|
||||
if ( ( input_length-1 - second->input_end
|
||||
== tm_length-1 - second->tm_end )
|
||||
&& input_length-1 != second->input_end )
|
||||
{
|
||||
min_cost++;
|
||||
}
|
||||
|
||||
// cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
|
||||
|
||||
// if worst than best cost, forget it
|
||||
if (min_cost > best_cost)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// add match
|
||||
Match new_match( first->input_start,
|
||||
second->input_end,
|
||||
first->tm_start,
|
||||
second->tm_end,
|
||||
min_cost,
|
||||
max_cost,
|
||||
internal_cost);
|
||||
multi_match[ match_level ].push_back( new_match );
|
||||
// cerr << "\tstored\n";
|
||||
|
||||
// possibly updating this_best_cost
|
||||
if (max_cost < this_best_cost)
|
||||
{
|
||||
// cerr << "\tupdating this best cost to " << max_cost << "\n";
|
||||
this_best_cost = max_cost;
|
||||
|
||||
// possibly updating best_cost
|
||||
if (max_cost < best_cost)
|
||||
{
|
||||
// cerr << "\tupdating best cost to " << max_cost << "\n";
|
||||
best_cost = max_cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
match_level++;
|
||||
}
|
||||
return this_best_cost;
|
||||
}
|
||||
|
||||
#endif
|
214
contrib/fuzzy-match/make-xml-from-match.perl
Normal file
@ -0,0 +1,214 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $DEBUG = 1;
|
||||
|
||||
my $match_file = "tm/BEST.acquis-xml-escaped.4.uniq";
|
||||
my $source_file = "data/acquis.truecased.4.en.uniq";
|
||||
my $target_file = "data/acquis.truecased.4.fr.uniq.most-frequent";
|
||||
my $alignment_file = "data/acquis.truecased.4.align.uniq.most-frequent";
|
||||
my $out_file = "data/ac-test.input.xml.4.uniq";
|
||||
my $in_file = "evaluation/ac-test.input.tc.4";
|
||||
|
||||
#my $match_file = "tm/BEST.acquis-xml-escaped.4";
|
||||
#my $source_file = "corpus/acquis.truecased.4.en";
|
||||
#my $target_file = "corpus/acquis.truecased.4.fr";
|
||||
#my $alignment_file = "model/aligned.4.grow-diag-final-and";
|
||||
#my $out_file = "data/ac-test.input.xml.4";
|
||||
#my $in_file = "evaluation/ac-test.input.tc.4";
|
||||
|
||||
#my $match_file = "tm/BEST.acquis.with";
|
||||
#my $source_file = "../acquis-truecase/corpus/acquis.truecased.190.en";
|
||||
#my $target_file = "../acquis-truecase/corpus/acquis.truecased.190.fr";
|
||||
#my $alignment_file = "../acquis-truecase/model/aligned.190.grow-diag-final-and";
|
||||
#my $out_file = "data/ac-test.input.xml";
|
||||
#my $in_file = "evaluation/ac-test.input.tc.1";
|
||||
|
||||
my @INPUT = `cat $in_file`; chop(@INPUT);
|
||||
my @SOURCE = `cat $source_file`; chop(@SOURCE);
|
||||
my @TARGET = `cat $target_file`; chop(@TARGET);
|
||||
my @ALIGNMENT = `cat $alignment_file`; chop(@ALIGNMENT);
|
||||
|
||||
open(MATCH,$match_file);
|
||||
open(FRAME,">$out_file");
|
||||
for(my $i=0;$i<4107;$i++) {
|
||||
|
||||
# get match data
|
||||
my $match = <MATCH>;
|
||||
chop($match);
|
||||
my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
|
||||
|
||||
# construct frame
|
||||
if ($sentence < 1e9 && $sentence >= 0) {
|
||||
my $frame = &create_xml($SOURCE[$sentence],
|
||||
$INPUT[$i],
|
||||
$TARGET[$sentence],
|
||||
$ALIGNMENT[$sentence],
|
||||
$path);
|
||||
print FRAME $frame."\n";
|
||||
}
|
||||
|
||||
# no frame -> output source
|
||||
else {
|
||||
print FRAME $INPUT[$i]."\n";
|
||||
}
|
||||
}
|
||||
close(FRAME);
|
||||
close(MATCH);
|
||||
|
||||
sub create_xml {
|
||||
my ($source,$input,$target,$alignment,$path) = @_;
|
||||
|
||||
my @INPUT = split(/ /,$input);
|
||||
my @SOURCE = split(/ /,$source);
|
||||
my @TARGET = split(/ /,$target);
|
||||
my %ALIGN = &create_alignment($alignment);
|
||||
|
||||
my %FRAME_INPUT;
|
||||
my @TARGET_BITMAP;
|
||||
foreach (@TARGET) { push @TARGET_BITMAP,1 }
|
||||
|
||||
### STEP 1: FIND MISMATCHES
|
||||
|
||||
my ($s,$i) = (0,0);
|
||||
my $currently_matching = 0;
|
||||
my ($start_s,$start_i) = (0,0);
|
||||
|
||||
$path .= "X"; # indicate end
|
||||
print "$input\n$source\n$target\n$path\n";
|
||||
for(my $p=0;$p<length($path);$p++) {
|
||||
my $action = substr($path,$p,1);
|
||||
|
||||
# beginning of a mismatch
|
||||
if ($currently_matching && $action ne "M" && $action ne "X") {
|
||||
$start_i = $i;
|
||||
$start_s = $s;
|
||||
$currently_matching = 0;
|
||||
}
|
||||
|
||||
# end of a mismatch
|
||||
elsif (!$currently_matching &&
|
||||
($action eq "M" || $action eq "X")) {
|
||||
|
||||
# remove use of affected target words
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$TARGET_BITMAP[$tt] = 0;
|
||||
}
|
||||
|
||||
# also remove enclosed unaligned words?
|
||||
}
|
||||
|
||||
# are there input words that need to be inserted ?
|
||||
print "($start_i<$i)?\n";
|
||||
if ($start_i<$i) {
|
||||
|
||||
# take note of input words to be inserted
|
||||
my $insertion = "";
|
||||
for(my $ii = $start_i; $ii<$i; $ii++) {
|
||||
$insertion .= $INPUT[$ii]." ";
|
||||
}
|
||||
|
||||
# find position for inserted input words
|
||||
|
||||
# find first removed target word
|
||||
my $start_t = 1000;
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt < $start_t;
|
||||
}
|
||||
}
|
||||
|
||||
# end of sentence? add to end
|
||||
if ($start_t == 1000 && $i > $#INPUT) {
|
||||
$start_t = $#TARGET;
|
||||
}
|
||||
|
||||
# backtrack to previous words if unaligned
|
||||
if ($start_t == 1000) {
|
||||
$start_t = -1;
|
||||
for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt > $start_t;
|
||||
}
|
||||
}
|
||||
}
|
||||
$FRAME_INPUT{$start_t} .= $insertion;
|
||||
}
|
||||
|
||||
$currently_matching = 1;
|
||||
}
|
||||
|
||||
print "$action $s $i ($start_s $start_i) $currently_matching";
|
||||
if ($action ne "I") {
|
||||
print " ->";
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
|
||||
print " ".$tt;
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
$s++ unless $action eq "I";
|
||||
$i++ unless $action eq "D";
|
||||
}
|
||||
|
||||
|
||||
print $target."\n";
|
||||
foreach (@TARGET_BITMAP) { print $_; } print "\n";
|
||||
foreach (sort keys %FRAME_INPUT) {
|
||||
print "$_: $FRAME_INPUT{$_}\n";
|
||||
}
|
||||
|
||||
### STEP 2: BUILD FRAME
|
||||
|
||||
# modify frame
|
||||
my $frame = "";
|
||||
$frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
|
||||
|
||||
my $currently_included = 0;
|
||||
my $start_t = -1;
|
||||
push @TARGET_BITMAP,0; # indicate end
|
||||
|
||||
for(my $t=0;$t<=scalar(@TARGET);$t++) {
|
||||
|
||||
# beginning of tm target inclusion
|
||||
if (!$currently_included && $TARGET_BITMAP[$t]) {
|
||||
$start_t = $t;
|
||||
$currently_included = 1;
|
||||
}
|
||||
|
||||
# end of tm target inclusion (not included word or inserted input)
|
||||
elsif ($currently_included &&
|
||||
(!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
|
||||
# add xml (unless change is at the beginning of the sentence
|
||||
if ($start_t >= 0) {
|
||||
my $target = "";
|
||||
print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
|
||||
for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
|
||||
$target .= $TARGET[$tt] . " ";
|
||||
}
|
||||
chop($target);
|
||||
$frame .= "<xml translation=\"$target\"> x </xml> ";
|
||||
}
|
||||
$currently_included = 0;
|
||||
}
|
||||
|
||||
$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
|
||||
print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
|
||||
}
|
||||
|
||||
print $frame."\n-------------------------------------\n";
|
||||
return $frame;
|
||||
}
|
||||
|
||||
sub create_alignment {
|
||||
my ($line) = @_;
|
||||
my (@ALIGNED_TO_S,@ALIGNED_TO_T);
|
||||
foreach my $point (split(/ /,$line)) {
|
||||
my ($s,$t) = split(/\-/,$point);
|
||||
$ALIGNED_TO_S[$s]{$t}++;
|
||||
$ALIGNED_TO_T[$t]{$s}++;
|
||||
}
|
||||
my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
|
||||
return %ALIGNMENT;
|
||||
}
|
982
contrib/fuzzy-match/old/fuzzy-match.cpp
Normal file
@ -0,0 +1,982 @@
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <getopt.h>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <cstring>
|
||||
#include <time.h>
|
||||
|
||||
#include "Vocabulary.h"
|
||||
#include "SuffixArray.h"
|
||||
|
||||
/** This implementation is explained in
|
||||
Koehn and Senellart: "Fast Approximate String Matching
|
||||
with Suffix Arrays and A* Parsing" (AMTA 2010) ***/
|
||||
|
||||
using namespace std;
|
||||
|
||||
Vocabulary vocabulary;
|
||||
|
||||
int basic_flag = false;
|
||||
int lsed_flag = true;
|
||||
int refined_flag = true;
|
||||
int length_filter_flag = true;
|
||||
int parse_flag = true;
|
||||
int min_match = 70;
|
||||
int multiple_flag = false;
|
||||
int multiple_slack = 0;
|
||||
int multiple_max = 100;
|
||||
|
||||
void load_corpus( char* fileName, vector< vector< WORD_ID > > &corpus )
|
||||
{
|
||||
ifstream fileStream;
|
||||
fileStream.open(fileName);
|
||||
if (!fileStream) {
|
||||
cerr << "file not found: " << fileName << endl;
|
||||
exit(1);
|
||||
}
|
||||
istream *fileStreamP = &fileStream;
|
||||
|
||||
char line[LINE_MAX_LENGTH];
|
||||
while(true)
|
||||
{
|
||||
SAFE_GETLINE((*fileStreamP), line, LINE_MAX_LENGTH, '\n');
|
||||
if (fileStreamP->eof()) break;
|
||||
corpus.push_back( vocabulary.Tokenize( line ) );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* Letter string edit distance, e.g. sub 'their' to 'there' costs 2 */
|
||||
|
||||
// global cache for word pairs
|
||||
map< pair< WORD_ID, WORD_ID >, unsigned int > lsed;
|
||||
|
||||
unsigned int letter_sed( WORD_ID aIdx, WORD_ID bIdx )
|
||||
{
|
||||
// check if already computed -> lookup in cache
|
||||
pair< WORD_ID, WORD_ID > pIdx = make_pair( aIdx, bIdx );
|
||||
map< pair< WORD_ID, WORD_ID >, unsigned int >::const_iterator lookup = lsed.find( pIdx );
|
||||
if (lookup != lsed.end())
|
||||
{
|
||||
return (lookup->second);
|
||||
}
|
||||
|
||||
// get surface strings for word indices
|
||||
const string &a = vocabulary.GetWord( aIdx );
|
||||
const string &b = vocabulary.GetWord( bIdx );
|
||||
|
||||
// initialize cost matrix
|
||||
unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
|
||||
cost[i][0] = i;
|
||||
}
|
||||
for( unsigned int j=0; j<=b.size(); j++ ) {
|
||||
cost[0][j] = j;
|
||||
}
|
||||
|
||||
// core string edit distance loop
|
||||
for( unsigned int i=1; i<=a.size(); i++ ) {
|
||||
for( unsigned int j=1; j<=b.size(); j++ ) {
|
||||
|
||||
unsigned int ins = cost[i-1][j] + 1;
|
||||
unsigned int del = cost[i][j-1] + 1;
|
||||
bool match = (a.substr(i-1,1).compare( b.substr(j-1,1) ) == 0);
|
||||
unsigned int diag = cost[i-1][j-1] + (match ? 0 : 1);
|
||||
|
||||
unsigned int min = (ins < del) ? ins : del;
|
||||
min = (diag < min) ? diag : min;
|
||||
|
||||
cost[i][j] = min;
|
||||
}
|
||||
}
|
||||
|
||||
// clear out memory
|
||||
unsigned int final = cost[a.size()][b.size()];
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
free( cost[i] );
|
||||
}
|
||||
free( cost );
|
||||
|
||||
// cache and return result
|
||||
lsed[ pIdx ] = final;
|
||||
return final;
|
||||
}
|
||||
|
||||
/* string edit distance implementation */
|
||||
|
||||
unsigned int sed( const vector< WORD_ID > &a, const vector< WORD_ID > &b, string &best_path, bool use_letter_sed ) {
|
||||
|
||||
// initialize cost and path matrices
|
||||
unsigned int **cost = (unsigned int**) calloc( sizeof( unsigned int* ), a.size()+1 );
|
||||
char **path = (char**) calloc( sizeof( char* ), a.size()+1 );
|
||||
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
cost[i] = (unsigned int*) calloc( sizeof(unsigned int), b.size()+1 );
|
||||
path[i] = (char*) calloc( sizeof(char), b.size()+1 );
|
||||
if (i>0)
|
||||
{
|
||||
cost[i][0] = cost[i-1][0];
|
||||
if (use_letter_sed)
|
||||
{
|
||||
cost[i][0] += vocabulary.GetWord( a[i-1] ).size();
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[i][0]++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[i][0] = 0;
|
||||
}
|
||||
path[i][0] = 'I';
|
||||
}
|
||||
|
||||
for( unsigned int j=0; j<=b.size(); j++ ) {
|
||||
if (j>0)
|
||||
{
|
||||
cost[0][j] = cost[0][j-1];
|
||||
if (use_letter_sed)
|
||||
{
|
||||
cost[0][j] += vocabulary.GetWord( b[j-1] ).size();
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[0][j]++;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cost[0][j] = 0;
|
||||
}
|
||||
path[0][j] = 'D';
|
||||
}
|
||||
|
||||
// core string edit distance algorithm
|
||||
for( unsigned int i=1; i<=a.size(); i++ ) {
|
||||
for( unsigned int j=1; j<=b.size(); j++ ) {
|
||||
unsigned int ins = cost[i-1][j];
|
||||
unsigned int del = cost[i][j-1];
|
||||
unsigned int match;
|
||||
if (use_letter_sed)
|
||||
{
|
||||
ins += vocabulary.GetWord( a[i-1] ).size();
|
||||
del += vocabulary.GetWord( b[j-1] ).size();
|
||||
match = letter_sed( a[i-1], b[j-1] );
|
||||
}
|
||||
else
|
||||
{
|
||||
ins++;
|
||||
del++;
|
||||
match = ( a[i-1] == b[j-1] ) ? 0 : 1;
|
||||
}
|
||||
unsigned int diag = cost[i-1][j-1] + match;
|
||||
|
||||
char action = (ins < del) ? 'I' : 'D';
|
||||
unsigned int min = (ins < del) ? ins : del;
|
||||
if (diag < min)
|
||||
{
|
||||
action = (match>0) ? 'S' : 'M';
|
||||
min = diag;
|
||||
}
|
||||
|
||||
cost[i][j] = min;
|
||||
path[i][j] = action;
|
||||
}
|
||||
}
|
||||
|
||||
// construct string for best path
|
||||
unsigned int i = a.size();
|
||||
unsigned int j = b.size();
|
||||
best_path = "";
|
||||
while( i>0 || j>0 )
|
||||
{
|
||||
best_path = path[i][j] + best_path;
|
||||
if (path[i][j] == 'I')
|
||||
{
|
||||
i--;
|
||||
}
|
||||
else if (path[i][j] == 'D')
|
||||
{
|
||||
j--;
|
||||
}
|
||||
else
|
||||
{
|
||||
i--;
|
||||
j--;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// clear out memory
|
||||
unsigned int final = cost[a.size()][b.size()];
|
||||
|
||||
for( unsigned int i=0; i<=a.size(); i++ ) {
|
||||
free( cost[i] );
|
||||
free( path[i] );
|
||||
}
|
||||
free( cost );
|
||||
free( path );
|
||||
|
||||
// return result
|
||||
return final;
|
||||
}
|
||||
|
||||
/* utlility function: compute length of sentence in characters
|
||||
(spaces do not count) */
|
||||
|
||||
unsigned int compute_length( const vector< WORD_ID > &sentence )
|
||||
{
|
||||
unsigned int length = 0; for( unsigned int i=0; i<sentence.size(); i++ )
|
||||
{
|
||||
length += vocabulary.GetWord( sentence[i] ).size();
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
/* brute force method: compare input to all corpus sentences */
|
||||
|
||||
int basic_fuzzy_match( vector< vector< WORD_ID > > source,
|
||||
vector< vector< WORD_ID > > input )
|
||||
{
|
||||
// go through input set...
|
||||
for(unsigned int i=0;i<input.size();i++)
|
||||
{
|
||||
bool use_letter_sed = false;
|
||||
|
||||
// compute sentence length and worst allowed cost
|
||||
unsigned int input_length;
|
||||
if (use_letter_sed)
|
||||
{
|
||||
input_length = compute_length( input[i] );
|
||||
}
|
||||
else
|
||||
{
|
||||
input_length = input[i].size();
|
||||
}
|
||||
unsigned int best_cost = input_length * (100-min_match) / 100 + 2;
|
||||
string best_path = "";
|
||||
int best_match = -1;
|
||||
|
||||
// go through all corpus sentences
|
||||
for(unsigned int s=0;s<source.size();s++)
|
||||
{
|
||||
int source_length;
|
||||
if (use_letter_sed)
|
||||
{
|
||||
source_length = compute_length( source[s] );
|
||||
}
|
||||
else
|
||||
{
|
||||
source_length = source[s].size();
|
||||
}
|
||||
int diff = abs((int)source_length - (int)input_length);
|
||||
if (length_filter_flag && (diff >= best_cost))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// compute string edit distance
|
||||
string path;
|
||||
unsigned int cost = sed( input[i], source[s], path, use_letter_sed );
|
||||
|
||||
// update if new best
|
||||
if (cost < best_cost)
|
||||
{
|
||||
best_cost = cost;
|
||||
best_path = path;
|
||||
best_match = s;
|
||||
}
|
||||
}
|
||||
cout << best_cost << " ||| " << best_match << " ||| " << best_path << endl;
|
||||
}
|
||||
}
|
||||
|
||||
#define MAX_MATCH_COUNT 10000000
|
||||
|
||||
/* data structure for n-gram match between input and corpus */
|
||||
|
||||
class Match {
|
||||
public:
|
||||
int input_start;
|
||||
int input_end;
|
||||
int tm_start;
|
||||
int tm_end;
|
||||
int min_cost;
|
||||
int max_cost;
|
||||
int internal_cost;
|
||||
Match( int is, int ie, int ts, int te, int min, int max, int i )
|
||||
:input_start(is), input_end(ie), tm_start(ts), tm_end(te), min_cost(min), max_cost(max), internal_cost(i)
|
||||
{}
|
||||
};
|
||||
|
||||
map< WORD_ID,vector< int > > single_word_index;
|
||||
|
||||
/* definition of short matches
|
||||
very short n-gram matches (1-grams) will not be looked up in
|
||||
the suffix array, since there are too many matches
|
||||
and for longer sentences, at least one 2-gram match must occur */
|
||||
|
||||
inline int short_match_max_length( int input_length )
|
||||
{
|
||||
if ( ! refined_flag )
|
||||
return 0;
|
||||
if ( input_length >= 5 )
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* if we have non-short matches in a sentence, we need to
|
||||
take a closer look at it.
|
||||
this function creates a hash map for all input words and their positions
|
||||
(to be used by the next function)
|
||||
(done here, because this has be done only once for an input sentence) */
|
||||
|
||||
void init_short_matches( const vector< WORD_ID > &input )
|
||||
{
|
||||
int max_length = short_match_max_length( input.size() );
|
||||
if (max_length == 0)
|
||||
return;
|
||||
|
||||
single_word_index.clear();
|
||||
|
||||
// store input words and their positions in hash map
|
||||
for(int i=0; i<input.size(); i++)
|
||||
{
|
||||
if (single_word_index.find( input[i] ) == single_word_index.end())
|
||||
{
|
||||
vector< int > position_vector;
|
||||
single_word_index[ input[i] ] = position_vector;
|
||||
}
|
||||
single_word_index[ input[i] ].push_back( i );
|
||||
}
|
||||
}
|
||||
|
||||
/* add all short matches to list of matches for a sentence */
|
||||
|
||||
void add_short_matches( vector< Match > &match, const vector< WORD_ID > &tm, int input_length, int best_cost )
|
||||
{
|
||||
int max_length = short_match_max_length( input_length );
|
||||
if (max_length == 0)
|
||||
return;
|
||||
|
||||
int tm_length = tm.size();
|
||||
map< WORD_ID,vector< int > >::iterator input_word_hit;
|
||||
for(int t_pos=0; t_pos<tm.size(); t_pos++)
|
||||
{
|
||||
input_word_hit = single_word_index.find( tm[t_pos] );
|
||||
if (input_word_hit != single_word_index.end())
|
||||
{
|
||||
vector< int > &position_vector = input_word_hit->second;
|
||||
for(int j=0; j<position_vector.size(); j++)
|
||||
{
|
||||
int &i_pos = position_vector[j];
|
||||
|
||||
// before match
|
||||
int max_cost = max( i_pos , t_pos );
|
||||
int min_cost = abs( i_pos - t_pos );
|
||||
if ( i_pos>0 && i_pos == t_pos )
|
||||
min_cost++;
|
||||
|
||||
// after match
|
||||
max_cost += max( (input_length-i_pos) , (tm_length-t_pos));
|
||||
min_cost += abs( (input_length-i_pos) - (tm_length-t_pos));
|
||||
if ( i_pos != input_length-1 && (input_length-i_pos) == (tm_length-t_pos))
|
||||
min_cost++;
|
||||
|
||||
if (min_cost <= best_cost)
|
||||
{
|
||||
Match new_match( i_pos,i_pos, t_pos,t_pos, min_cost,max_cost,0 );
|
||||
match.push_back( new_match );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* remove matches that are subsumed by a larger match */
|
||||
|
||||
vector< Match > prune_matches( const vector< Match > &match, int best_cost )
|
||||
{
|
||||
//cerr << "\tpruning";
|
||||
vector< Match > pruned;
|
||||
for(int i=match.size()-1; i>=0; i--)
|
||||
{
|
||||
//cerr << " (" << match[i].input_start << "," << match[i].input_end
|
||||
// << " ; " << match[i].tm_start << "," << match[i].tm_end
|
||||
// << " * " << match[i].min_cost << ")";
|
||||
|
||||
//if (match[i].min_cost > best_cost)
|
||||
// continue;
|
||||
|
||||
bool subsumed = false;
|
||||
for(int j=match.size()-1; j>=0; j--)
|
||||
{
|
||||
if (i!=j // do not compare match with itself
|
||||
&& ( match[i].input_end - match[i].input_start <=
|
||||
match[j].input_end - match[j].input_start ) // i shorter than j
|
||||
&& ((match[i].input_start == match[j].input_start &&
|
||||
match[i].tm_start == match[j].tm_start ) ||
|
||||
(match[i].input_end == match[j].input_end &&
|
||||
match[i].tm_end == match[j].tm_end) ) )
|
||||
{
|
||||
subsumed = true;
|
||||
}
|
||||
}
|
||||
if (! subsumed && match[i].min_cost <= best_cost)
|
||||
{
|
||||
//cerr << "*";
|
||||
pruned.push_back( match[i] );
|
||||
}
|
||||
}
|
||||
//cerr << endl;
|
||||
return pruned;
|
||||
}
|
||||
|
||||
/* A* parsing method to compute string edit distance */
|
||||
|
||||
int parse_matches( vector< Match > &match, int input_length, int tm_length, int &best_cost )
|
||||
{
|
||||
// cerr << "sentence has " << match.size() << " matches, best cost: " << best_cost << ", lengths input: " << input_length << " tm: " << tm_length << endl;
|
||||
|
||||
if (match.size() == 1)
|
||||
return match[0].max_cost;
|
||||
if (match.size() == 0)
|
||||
return input_length+tm_length;
|
||||
|
||||
int this_best_cost = input_length + tm_length;
|
||||
for(int i=0;i<match.size();i++)
|
||||
{
|
||||
this_best_cost = min( this_best_cost, match[i].max_cost );
|
||||
}
|
||||
// cerr << "\tthis best cost: " << this_best_cost << endl;
|
||||
|
||||
// bottom up combination of spans
|
||||
vector< vector< Match > > multi_match;
|
||||
multi_match.push_back( match );
|
||||
|
||||
int match_level = 1;
|
||||
while(multi_match[ match_level-1 ].size()>0)
|
||||
{
|
||||
// init vector
|
||||
vector< Match > empty;
|
||||
multi_match.push_back( empty );
|
||||
|
||||
for(int first_level = 0; first_level <= (match_level-1)/2; first_level++)
|
||||
{
|
||||
int second_level = match_level - first_level -1;
|
||||
//cerr << "\tcombining level " << first_level << " and " << second_level << endl;
|
||||
|
||||
vector< Match > &first_match = multi_match[ first_level ];
|
||||
vector< Match > &second_match = multi_match[ second_level ];
|
||||
|
||||
for(int i1 = 0; i1 < first_match.size(); i1++) {
|
||||
for(int i2 = 0; i2 < second_match.size(); i2++) {
|
||||
|
||||
// do not combine the same pair twice
|
||||
if (first_level == second_level && i2 <= i1)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// get sorted matches (first is before second)
|
||||
Match *first, *second;
|
||||
if (first_match[i1].input_start < second_match[i2].input_start )
|
||||
{
|
||||
first = &first_match[i1];
|
||||
second = &second_match[i2];
|
||||
}
|
||||
else
|
||||
{
|
||||
second = &first_match[i1];
|
||||
first = &second_match[i2];
|
||||
}
|
||||
|
||||
//cerr << "\tcombining "
|
||||
// << "(" << first->input_start << "," << first->input_end << "), "
|
||||
// << first->tm_start << " [" << first->internal_cost << "]"
|
||||
// << " with "
|
||||
// << "(" << second->input_start << "," << second->input_end << "), "
|
||||
// << second->tm_start<< " [" << second->internal_cost << "]"
|
||||
// << endl;
|
||||
|
||||
// do not process overlapping matches
|
||||
if (first->input_end >= second->input_start)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// no overlap / mismatch in tm
|
||||
if (first->tm_end >= second->tm_start)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// compute cost
|
||||
int min_cost = 0;
|
||||
int max_cost = 0;
|
||||
|
||||
// initial
|
||||
min_cost += abs( first->input_start - first->tm_start );
|
||||
max_cost += max( first->input_start, first->tm_start );
|
||||
|
||||
// same number of words, but not sent. start -> cost is at least 1
|
||||
if (first->input_start == first->tm_start && first->input_start > 0)
|
||||
{
|
||||
min_cost++;
|
||||
}
|
||||
|
||||
// in-between
|
||||
int skipped_words = second->input_start - first->input_end -1;
|
||||
int skipped_words_tm = second->tm_start - first->tm_end -1;
|
||||
int internal_cost = max( skipped_words, skipped_words_tm );
|
||||
internal_cost += first->internal_cost + second->internal_cost;
|
||||
min_cost += internal_cost;
|
||||
max_cost += internal_cost;
|
||||
|
||||
// final
|
||||
min_cost += abs( (tm_length-1 - second->tm_end) -
|
||||
(input_length-1 - second->input_end) );
|
||||
max_cost += max( (tm_length-1 - second->tm_end),
|
||||
(input_length-1 - second->input_end) );
|
||||
|
||||
// same number of words, but not sent. end -> cost is at least 1
|
||||
if ( ( input_length-1 - second->input_end
|
||||
== tm_length-1 - second->tm_end )
|
||||
&& input_length-1 != second->input_end )
|
||||
{
|
||||
min_cost++;
|
||||
}
|
||||
|
||||
// cerr << "\tcost: " << min_cost << "-" << max_cost << endl;
|
||||
|
||||
// if worst than best cost, forget it
|
||||
if (min_cost > best_cost)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// add match
|
||||
Match new_match( first->input_start,
|
||||
second->input_end,
|
||||
first->tm_start,
|
||||
second->tm_end,
|
||||
min_cost,
|
||||
max_cost,
|
||||
internal_cost);
|
||||
multi_match[ match_level ].push_back( new_match );
|
||||
// cerr << "\tstored\n";
|
||||
|
||||
// possibly updating this_best_cost
|
||||
if (max_cost < this_best_cost)
|
||||
{
|
||||
// cerr << "\tupdating this best cost to " << max_cost << "\n";
|
||||
this_best_cost = max_cost;
|
||||
|
||||
// possibly updating best_cost
|
||||
if (max_cost < best_cost)
|
||||
{
|
||||
// cerr << "\tupdating best cost to " << max_cost << "\n";
|
||||
best_cost = max_cost;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
match_level++;
|
||||
}
|
||||
return this_best_cost;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
vector< vector< WORD_ID > > source, input;
|
||||
|
||||
while(1) {
|
||||
static struct option long_options[] = {
|
||||
{"basic", no_argument, &basic_flag, 1},
|
||||
{"word", no_argument, &lsed_flag, 0},
|
||||
{"unrefined", no_argument, &refined_flag, 0},
|
||||
{"nolengthfilter", no_argument, &length_filter_flag, 0},
|
||||
{"noparse", no_argument, &parse_flag, 0},
|
||||
{"multiple", no_argument, &multiple_flag, 1},
|
||||
{"minmatch", required_argument, 0, 'm'},
|
||||
{0, 0, 0, 0}
|
||||
};
|
||||
int option_index = 0;
|
||||
int c = getopt_long (argc, argv, "m:", long_options, &option_index);
|
||||
if (c == -1) break;
|
||||
switch (c) {
|
||||
case 0:
|
||||
// if (long_options[option_index].flag != 0)
|
||||
// break;
|
||||
// printf ("option %s", long_options[option_index].name);
|
||||
// if (optarg)
|
||||
// printf (" with arg %s", optarg);
|
||||
// printf ("\n");
|
||||
break;
|
||||
case 'm':
|
||||
min_match = atoi(optarg);
|
||||
if (min_match < 1 || min_match > 100) {
|
||||
cerr << "error: --minmatch must have value in range 1..100\n";
|
||||
exit(1);
|
||||
}
|
||||
cerr << "setting min match to " << min_match << endl;
|
||||
break;
|
||||
default:
|
||||
cerr << "usage: syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
if (lsed_flag) { cerr << "lsed\n"; }
|
||||
if (basic_flag) { cerr << "basic\n"; }
|
||||
if (refined_flag) { cerr << "refined\n"; }
|
||||
if (length_filter_flag) { cerr << "length filter\n"; }
|
||||
if (parse_flag) { cerr << "parse\n"; }
|
||||
// exit(1);
|
||||
|
||||
|
||||
if (optind+2 != argc) {
|
||||
cerr << "syntax: ./fuzzy-match input corpus [--basic] [--word] [--minmatch 1..100]\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cerr << "loading corpus...\n";
|
||||
|
||||
load_corpus(argv[optind], input);
|
||||
load_corpus(argv[optind+1], source);
|
||||
|
||||
// ./fuzzy-match input corpus [-basic]
|
||||
|
||||
// load_corpus("../corpus/tm.truecased.4.en", source);
|
||||
// load_corpus("../corpus/tm.truecased.4.it", target);
|
||||
// load_corpus("../evaluation/test.input.tc.4", input);
|
||||
|
||||
// load_corpus("../../acquis-truecase/corpus/acquis.truecased.190.en", source);
|
||||
// load_corpus("../../acquis-truecase/evaluation/ac-test.input.tc.190", input);
|
||||
|
||||
// load_corpus("../corpus/tm.truecased.16.en", source);
|
||||
// load_corpus("../evaluation/test.input.tc.16", input);
|
||||
|
||||
if (basic_flag) {
|
||||
cerr << "using basic method\n";
|
||||
clock_t start_main_clock2 = clock();
|
||||
basic_fuzzy_match( source, input );
|
||||
cerr << "total: " << (1000 * (clock()-start_main_clock2) / CLOCKS_PER_SEC) << endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cerr << "number of input sentences " << input.size() << endl;
|
||||
|
||||
cerr << "creating suffix array...\n";
|
||||
// SuffixArray suffixArray( "../corpus/tm.truecased.4.en" );
|
||||
// SuffixArray suffixArray( "../../acquis-truecase/corpus/acquis.truecased.190.en" );
|
||||
SuffixArray suffixArray( argv[optind+1] );
|
||||
|
||||
clock_t start_main_clock = clock();
|
||||
|
||||
// looping through all input sentences...
|
||||
cerr << "looping...\n";
|
||||
for(unsigned int i=0;i<input.size();i++)
|
||||
{
|
||||
clock_t start_clock = clock();
|
||||
// if (i % 10 == 0) cerr << ".";
|
||||
int input_id = i; // clean up this mess!
|
||||
|
||||
// establish some basic statistics
|
||||
|
||||
// int input_length = compute_length( input[i] );
|
||||
int input_length = input[i].size();
|
||||
int best_cost = input_length * (100-min_match) / 100 + 1;
|
||||
|
||||
int match_count = 0; // how many substring matches to be considered
|
||||
//cerr << endl << "sentence " << i << ", length " << input_length << ", best_cost " << best_cost << endl;
|
||||
|
||||
// find match ranges in suffix array
|
||||
vector< vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > > match_range;
|
||||
for(size_t start=0;start<input[i].size();start++)
|
||||
{
|
||||
SuffixArray::INDEX prior_first_match = 0;
|
||||
SuffixArray::INDEX prior_last_match = suffixArray.GetSize()-1;
|
||||
vector< string > substring;
|
||||
bool stillMatched = true;
|
||||
vector< pair< SuffixArray::INDEX, SuffixArray::INDEX > > matchedAtThisStart;
|
||||
//cerr << "start: " << start;
|
||||
for(int word=start; stillMatched && word<input[i].size(); word++)
|
||||
{
|
||||
substring.push_back( vocabulary.GetWord( input[i][word] ) );
|
||||
|
||||
// only look up, if needed (i.e. no unnecessary short gram lookups)
|
||||
// if (! word-start+1 <= short_match_max_length( input_length ) )
|
||||
// {
|
||||
SuffixArray::INDEX first_match, last_match;
|
||||
stillMatched = false;
|
||||
if (suffixArray.FindMatches( substring, first_match, last_match, prior_first_match, prior_last_match ) )
|
||||
{
|
||||
stillMatched = true;
|
||||
matchedAtThisStart.push_back( make_pair( first_match, last_match ) );
|
||||
//cerr << " (" << first_match << "," << last_match << ")";
|
||||
//cerr << " " << ( last_match - first_match + 1 );
|
||||
prior_first_match = first_match;
|
||||
prior_last_match = last_match;
|
||||
}
|
||||
//}
|
||||
}
|
||||
//cerr << endl;
|
||||
match_range.push_back( matchedAtThisStart );
|
||||
}
|
||||
|
||||
clock_t clock_range = clock();
|
||||
|
||||
map< int, vector< Match > > sentence_match;
|
||||
map< int, int > sentence_match_word_count;
|
||||
|
||||
// go through all matches, longest first
|
||||
for(int length = input[i].size(); length >= 1; length--)
|
||||
{
|
||||
// do not create matches, if these are handled by the short match function
|
||||
if (length <= short_match_max_length( input_length ) )
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
unsigned int count = 0;
|
||||
for(int start = 0; start <= input[i].size() - length; start++)
|
||||
{
|
||||
if (match_range[start].size() >= length)
|
||||
{
|
||||
pair< SuffixArray::INDEX, SuffixArray::INDEX > &range = match_range[start][length-1];
|
||||
// cerr << " (" << range.first << "," << range.second << ")";
|
||||
count += range.second - range.first + 1;
|
||||
|
||||
for(SuffixArray::INDEX i=range.first; i<=range.second; i++)
|
||||
{
|
||||
int position = suffixArray.GetPosition( i );
|
||||
|
||||
// sentence length mismatch
|
||||
size_t sentence_id = suffixArray.GetSentence( position );
|
||||
int sentence_length = suffixArray.GetSentenceLength( sentence_id );
|
||||
int diff = abs( (int)sentence_length - (int)input_length );
|
||||
// cerr << endl << i << "\tsentence " << sentence_id << ", length " << sentence_length;
|
||||
//if (length <= 2 && input_length>=5 &&
|
||||
// sentence_match.find( sentence_id ) == sentence_match.end())
|
||||
// continue;
|
||||
|
||||
if (diff > best_cost)
|
||||
continue;
|
||||
|
||||
// compute minimal cost
|
||||
int start_pos = suffixArray.GetWordInSentence( position );
|
||||
int end_pos = start_pos + length-1;
|
||||
// cerr << endl << "\t" << start_pos << "-" << end_pos << " (" << sentence_length << ") vs. "
|
||||
// << start << "-" << (start+length-1) << " (" << input_length << ")";
|
||||
// different number of prior words -> cost is at least diff
|
||||
int min_cost = abs( start - start_pos );
|
||||
|
||||
// same number of words, but not sent. start -> cost is at least 1
|
||||
if (start == start_pos && start>0)
|
||||
min_cost++;
|
||||
|
||||
// different number of remaining words -> cost is at least diff
|
||||
min_cost += abs( ( sentence_length-1 - end_pos ) -
|
||||
( input_length-1 - (start+length-1) ) );
|
||||
|
||||
// same number of words, but not sent. end -> cost is at least 1
|
||||
if ( sentence_length-1 - end_pos ==
|
||||
input_length-1 - (start+length-1)
|
||||
&& end_pos != sentence_length-1 )
|
||||
min_cost++;
|
||||
|
||||
// cerr << " -> min_cost " << min_cost;
|
||||
if (min_cost > best_cost)
|
||||
continue;
|
||||
|
||||
// valid match
|
||||
match_count++;
|
||||
|
||||
// compute maximal cost
|
||||
int max_cost = max( start, start_pos )
|
||||
+ max( sentence_length-1 - end_pos,
|
||||
input_length-1 - (start+length-1) );
|
||||
// cerr << ", max_cost " << max_cost;
|
||||
|
||||
Match m = Match( start, start+length-1,
|
||||
start_pos, start_pos+length-1,
|
||||
min_cost, max_cost, 0);
|
||||
sentence_match[ sentence_id ].push_back( m );
|
||||
sentence_match_word_count[ sentence_id ] += length;
|
||||
|
||||
if (max_cost < best_cost)
|
||||
{
|
||||
best_cost = max_cost;
|
||||
if (best_cost == 0) break;
|
||||
}
|
||||
//if (match_count >= MAX_MATCH_COUNT) break;
|
||||
}
|
||||
}
|
||||
// cerr << endl;
|
||||
if (best_cost == 0) break;
|
||||
//if (match_count >= MAX_MATCH_COUNT) break;
|
||||
}
|
||||
// cerr << count << " matches at length " << length << " in " << sentence_match.size() << " tm." << endl;
|
||||
|
||||
if (best_cost == 0) break;
|
||||
//if (match_count >= MAX_MATCH_COUNT) break;
|
||||
}
|
||||
cerr << match_count << " matches in " << sentence_match.size() << " sentences." << endl;
|
||||
|
||||
clock_t clock_matches = clock();
|
||||
|
||||
// consider each sentence for which we have matches
|
||||
int old_best_cost = best_cost;
|
||||
int tm_count_word_match = 0;
|
||||
int tm_count_word_match2 = 0;
|
||||
int pruned_match_count = 0;
|
||||
if (short_match_max_length( input_length ))
|
||||
{
|
||||
init_short_matches( input[i] );
|
||||
}
|
||||
vector< int > best_tm;
|
||||
typedef map< int, vector< Match > >::iterator I;
|
||||
|
||||
clock_t clock_validation_sum = 0;
|
||||
|
||||
for(I tm=sentence_match.begin(); tm!=sentence_match.end(); tm++)
|
||||
{
|
||||
int tmID = tm->first;
|
||||
int tm_length = suffixArray.GetSentenceLength(tmID);
|
||||
vector< Match > &match = tm->second;
|
||||
add_short_matches( match, source[tmID], input_length, best_cost );
|
||||
|
||||
//cerr << "match in sentence " << tmID << ": " << match.size() << " [" << tm_length << "]" << endl;
|
||||
|
||||
// quick look: how many words are matched
|
||||
int words_matched = 0;
|
||||
for(int m=0;m<match.size();m++) {
|
||||
|
||||
if (match[m].min_cost <= best_cost) // makes no difference
|
||||
words_matched += match[m].input_end - match[m].input_start + 1;
|
||||
}
|
||||
if (max(input_length,tm_length) - words_matched > best_cost)
|
||||
{
|
||||
if (length_filter_flag) continue;
|
||||
}
|
||||
tm_count_word_match++;
|
||||
|
||||
// prune, check again how many words are matched
|
||||
vector< Match > pruned = prune_matches( match, best_cost );
|
||||
words_matched = 0;
|
||||
for(int p=0;p<pruned.size();p++) {
|
||||
words_matched += pruned[p].input_end - pruned[p].input_start + 1;
|
||||
}
|
||||
if (max(input_length,tm_length) - words_matched > best_cost)
|
||||
{
|
||||
if (length_filter_flag) continue;
|
||||
}
|
||||
tm_count_word_match2++;
|
||||
|
||||
pruned_match_count += pruned.size();
|
||||
int prior_best_cost = best_cost;
|
||||
int cost;
|
||||
|
||||
clock_t clock_validation_start = clock();
|
||||
if (! parse_flag ||
|
||||
pruned.size()>=10) // to prevent worst cases
|
||||
{
|
||||
string path;
|
||||
cost = sed( input[input_id], source[tmID], path, false );
|
||||
if (cost < best_cost)
|
||||
{
|
||||
best_cost = cost;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
cost = parse_matches( pruned, input_length, tm_length, best_cost );
|
||||
if (prior_best_cost != best_cost)
|
||||
{
|
||||
best_tm.clear();
|
||||
}
|
||||
}
|
||||
clock_validation_sum += clock() - clock_validation_start;
|
||||
if (cost == best_cost)
|
||||
{
|
||||
best_tm.push_back( tmID );
|
||||
}
|
||||
}
|
||||
cerr << "reduced best cost from " << old_best_cost << " to " << best_cost << endl;
|
||||
cerr << "tm considered: " << sentence_match.size()
|
||||
<< " word-matched: " << tm_count_word_match
|
||||
<< " word-matched2: " << tm_count_word_match2
|
||||
<< " best: " << best_tm.size() << endl;
|
||||
|
||||
cerr << "pruned matches: " << ((float)pruned_match_count/(float)tm_count_word_match2) << endl;
|
||||
|
||||
// do not try to find the best ... report multiple matches
|
||||
if (multiple_flag) {
|
||||
int input_letter_length = compute_length( input[input_id] );
|
||||
for(int si=0; si<best_tm.size(); si++) {
|
||||
int s = best_tm[si];
|
||||
string path;
|
||||
unsigned int letter_cost = sed( input[input_id], source[s], path, true );
|
||||
// do not report multiple identical sentences, but just their count
|
||||
cout << i << " "; // sentence number
|
||||
cout << letter_cost << "/" << input_letter_length << " ";
|
||||
cout << "(" << best_cost <<"/" << input_length <<") ";
|
||||
cout << "||| " << s << " ||| " << path << endl;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// find the best matches according to letter sed
|
||||
string best_path = "";
|
||||
int best_match = -1;
|
||||
int best_letter_cost;
|
||||
if (lsed_flag) {
|
||||
best_letter_cost = compute_length( input[input_id] ) * min_match / 100 + 1;
|
||||
for(int si=0; si<best_tm.size(); si++)
|
||||
{
|
||||
int s = best_tm[si];
|
||||
string path;
|
||||
unsigned int letter_cost = sed( input[input_id], source[s], path, true );
|
||||
if (letter_cost < best_letter_cost)
|
||||
{
|
||||
best_letter_cost = letter_cost;
|
||||
best_path = path;
|
||||
best_match = s;
|
||||
}
|
||||
}
|
||||
}
|
||||
// if letter sed turned off, just compute path for first match
|
||||
else {
|
||||
if (best_tm.size() > 0) {
|
||||
string path;
|
||||
sed( input[input_id], source[best_tm[0]], path, false );
|
||||
best_path = path;
|
||||
best_match = best_tm[0];
|
||||
}
|
||||
}
|
||||
cerr << "elapsed: " << (1000 * (clock()-start_clock) / CLOCKS_PER_SEC)
|
||||
<< " ( range: " << (1000 * (clock_range-start_clock) / CLOCKS_PER_SEC)
|
||||
<< " match: " << (1000 * (clock_matches-clock_range) / CLOCKS_PER_SEC)
|
||||
<< " tm: " << (1000 * (clock()-clock_matches) / CLOCKS_PER_SEC)
|
||||
<< " (validation: " << (1000 * (clock_validation_sum) / CLOCKS_PER_SEC) << ")"
|
||||
<< " )" << endl;
|
||||
if (lsed_flag) {
|
||||
cout << best_letter_cost << "/" << compute_length( input[input_id] ) << " (";
|
||||
}
|
||||
cout << best_cost <<"/" << input_length;
|
||||
if (lsed_flag) cout << ")";
|
||||
cout << " ||| " << best_match << " ||| " << best_path << endl;
|
||||
}
|
||||
cerr << "total: " << (1000 * (clock()-start_main_clock) / CLOCKS_PER_SEC) << endl;
|
||||
|
||||
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $src_in = "corpus/acquis.truecased.4.en";
|
||||
my $tgt_in = "corpus/acquis.truecased.4.fr";
|
||||
my $align_in = "model/aligned.4.grow-diag-final-and";
|
||||
|
||||
my $src_out = "data/acquis.truecased.4.en.uniq";
|
||||
my $tgt_out = "data/acquis.truecased.4.fr.uniq";
|
||||
my $tgt_mf = "data/acquis.truecased.4.fr.uniq.most-frequent";
|
||||
my $align_out = "data/acquis.truecased.4.align.uniq";
|
||||
my $align_mf = "data/acquis.truecased.4.align.uniq.most-frequent";
|
||||
|
||||
my (%TRANS,%ALIGN);
|
||||
|
||||
open(SRC,$src_in);
|
||||
open(TGT,$tgt_in);
|
||||
open(ALIGN,$align_in);
|
||||
while(my $src = <SRC>) {
|
||||
my $tgt = <TGT>;
|
||||
my $align = <ALIGN>;
|
||||
chop($tgt);
|
||||
chop($align);
|
||||
$TRANS{$src}{$tgt}++;
|
||||
$ALIGN{$src}{$tgt} = $align;
|
||||
}
|
||||
close(SRC);
|
||||
close(TGT);
|
||||
|
||||
open(SRC_OUT,">$src_out");
|
||||
open(TGT_OUT,">$tgt_out");
|
||||
open(TGT_MF, ">$tgt_mf");
|
||||
open(ALIGN_OUT,">$align_out");
|
||||
open(ALIGN_MF, ">$align_mf");
|
||||
foreach my $src (keys %TRANS) {
|
||||
print SRC_OUT $src;
|
||||
my $first = 1;
|
||||
my ($max,$best) = (0);
|
||||
foreach my $tgt (keys %{$TRANS{$src}}) {
|
||||
print TGT_OUT " ||| " unless $first;
|
||||
print TGT_OUT $TRANS{$src}{$tgt}." ".$tgt;
|
||||
print ALIGN_OUT " ||| " unless $first;
|
||||
print ALIGN_OUT $ALIGN{$src}{$tgt};
|
||||
if ($TRANS{$src}{$tgt} > $max) {
|
||||
$max = $TRANS{$src}{$tgt};
|
||||
$best = $tgt;
|
||||
}
|
||||
$first = 0;
|
||||
}
|
||||
print TGT_OUT "\n";
|
||||
print ALIGN_OUT "\n";
|
||||
print TGT_MF $best."\n";
|
||||
print ALIGN_MF $ALIGN{$src}{$best}."\n";
|
||||
}
|
||||
close(SRC_OUT);
|
||||
close(TGT_OUT);
|
||||
|
308
contrib/fuzzy-match/old/make-pt-from-tm.perl
Executable file
@ -0,0 +1,308 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
use File::Basename;
|
||||
|
||||
my $DEBUG = 1;
|
||||
my $OUTPUT_RULES = 1;
|
||||
|
||||
#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
|
||||
my $in_file = $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
|
||||
my $source_file = $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
|
||||
my $target_file = $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
|
||||
my $alignment_file = $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
|
||||
my $lex_file = $ARGV[4]; #$data_root/in/lex.4;
|
||||
my $pt_file = $ARGV[5]; #"$data_root/out/pt";
|
||||
|
||||
my $cmd;
|
||||
|
||||
my $TMPDIR=dirname($pt_file) ."/tmp.$$";
|
||||
$cmd = "mkdir -p $TMPDIR";
|
||||
`$cmd`;
|
||||
|
||||
my $match_file = "$TMPDIR/match";
|
||||
|
||||
# suffix array creation and extraction
|
||||
$cmd = "$RealBin/fuzzy-match --multiple $in_file $source_file > $match_file";
|
||||
print STDERR "$cmd \n";
|
||||
`$cmd`;
|
||||
|
||||
# make into xml and pt
|
||||
my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
|
||||
|
||||
my @INPUT = `cat $in_file`; chop(@INPUT);
|
||||
my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
|
||||
my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
|
||||
my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
|
||||
|
||||
open(MATCH,$match_file);
|
||||
open(FRAME,">$out_file");
|
||||
open(RULE,">$out_file.extract") if $OUTPUT_RULES;
|
||||
open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
|
||||
open(INFO,">$out_file.info");
|
||||
while( my $match = <MATCH> ) {
|
||||
chop($match);
|
||||
my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
|
||||
|
||||
$score =~ /^(\d+) (.+)/ || die;
|
||||
my ($i,$match_score) = ($1,$2);
|
||||
print STDERR "i=$i match_score=$match_score\n";
|
||||
|
||||
# construct frame
|
||||
if ($sentence < 1e9 && $sentence >= 0) {
|
||||
my $SOURCE = $ALL_SOURCE[$sentence];
|
||||
my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
|
||||
my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
|
||||
|
||||
for(my $j=0;$j<scalar(@TARGET);$j++) {
|
||||
$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
|
||||
my ($target_count,$target) = ($1,$2);
|
||||
my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) =
|
||||
&create_xml($SOURCE,
|
||||
$INPUT[$i],
|
||||
$target,
|
||||
$ALIGNMENT[$j],
|
||||
$path);
|
||||
print FRAME $frame."\n";
|
||||
print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
|
||||
print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
|
||||
print INFO "$i ||| $match_score ||| $target_count\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FRAME);
|
||||
close(MATCH);
|
||||
close(RULE) if $OUTPUT_RULES;
|
||||
close(RULE_INV) if $OUTPUT_RULES;
|
||||
|
||||
`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
|
||||
`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
|
||||
|
||||
if ($OUTPUT_RULES)
|
||||
{
|
||||
$cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
|
||||
print STDERR "Executing: $cmd \n";
|
||||
`$cmd`;
|
||||
}
|
||||
|
||||
#$cmd = "rm -rf $TMPDIR";
|
||||
#`$cmd`;
|
||||
|
||||
#######################################################
|
||||
sub create_xml {
|
||||
my ($source,$input,$target,$alignment,$path) = @_;
|
||||
|
||||
print STDERR " HIEU \n $source \n $input \n $target \n $alignment \n $path \n";
|
||||
|
||||
my @INPUT = split(/ /,$input);
|
||||
my @SOURCE = split(/ /,$source);
|
||||
my @TARGET = split(/ /,$target);
|
||||
my %ALIGN = &create_alignment($alignment);
|
||||
|
||||
my %FRAME_INPUT;
|
||||
my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
|
||||
foreach (@TARGET) { push @TARGET_BITMAP,1 }
|
||||
|
||||
### STEP 1: FIND MISMATCHES
|
||||
|
||||
my ($s,$i) = (0,0);
|
||||
my $currently_matching = 0;
|
||||
my ($start_s,$start_i) = (0,0);
|
||||
|
||||
$path .= "X"; # indicate end
|
||||
print STDERR "$input\n$source\n$target\n$path\n";
|
||||
for(my $p=0;$p<length($path);$p++) {
|
||||
my $action = substr($path,$p,1);
|
||||
|
||||
# beginning of a mismatch
|
||||
if ($currently_matching && $action ne "M" && $action ne "X") {
|
||||
$start_i = $i;
|
||||
$start_s = $s;
|
||||
$currently_matching = 0;
|
||||
}
|
||||
|
||||
# end of a mismatch
|
||||
elsif (!$currently_matching &&
|
||||
($action eq "M" || $action eq "X")) {
|
||||
|
||||
# remove use of affected target words
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$TARGET_BITMAP[$tt] = 0;
|
||||
}
|
||||
|
||||
# also remove enclosed unaligned words?
|
||||
}
|
||||
|
||||
# are there input words that need to be inserted ?
|
||||
print STDERR "($start_i<$i)?\n";
|
||||
if ($start_i<$i) {
|
||||
|
||||
# take note of input words to be inserted
|
||||
my $insertion = "";
|
||||
for(my $ii = $start_i; $ii<$i; $ii++) {
|
||||
$insertion .= $INPUT[$ii]." ";
|
||||
}
|
||||
|
||||
# find position for inserted input words
|
||||
|
||||
# find first removed target word
|
||||
my $start_t = 1000;
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt < $start_t;
|
||||
}
|
||||
}
|
||||
|
||||
# end of sentence? add to end
|
||||
if ($start_t == 1000 && $i > $#INPUT) {
|
||||
$start_t = $#TARGET;
|
||||
}
|
||||
|
||||
# backtrack to previous words if unaligned
|
||||
if ($start_t == 1000) {
|
||||
$start_t = -1;
|
||||
for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt > $start_t;
|
||||
}
|
||||
}
|
||||
}
|
||||
$FRAME_INPUT{$start_t} .= $insertion;
|
||||
my %NT = ("start_t" => $start_t,
|
||||
"start_i" => $start_i );
|
||||
push @NT,\%NT;
|
||||
}
|
||||
$currently_matching = 1;
|
||||
}
|
||||
|
||||
print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
|
||||
if ($action ne "I") {
|
||||
print STDERR " ->";
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
|
||||
print STDERR " ".$tt;
|
||||
}
|
||||
}
|
||||
print STDERR "\n";
|
||||
$s++ unless $action eq "I";
|
||||
$i++ unless $action eq "D";
|
||||
$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
|
||||
push @INPUT_BITMAP, 1 if $action eq "M";
|
||||
push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
|
||||
}
|
||||
|
||||
|
||||
print STDERR $target."\n";
|
||||
foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
|
||||
foreach (sort keys %FRAME_INPUT) {
|
||||
print STDERR "$_: $FRAME_INPUT{$_}\n";
|
||||
}
|
||||
|
||||
### STEP 2: BUILD RULE AND FRAME
|
||||
|
||||
# hierarchical rule
|
||||
my $rule_s = "";
|
||||
my $rule_pos_s = 0;
|
||||
my %RULE_ALIGNMENT_S;
|
||||
for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
|
||||
if ($INPUT_BITMAP[$i]) {
|
||||
$rule_s .= $INPUT[$i]." ";
|
||||
$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($i == $$NT{"start_i"}) {
|
||||
$rule_s .= "[X][X] ";
|
||||
$$NT{"rule_pos_s"} = $rule_pos_s++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_t = "";
|
||||
my $rule_pos_t = 0;
|
||||
my %RULE_ALIGNMENT_T;
|
||||
for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
|
||||
if ($t>=0 && $TARGET_BITMAP[$t]) {
|
||||
$rule_t .= $TARGET[$t]." ";
|
||||
$RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($t == $$NT{"start_t"}) {
|
||||
$rule_t .= "[X][X] ";
|
||||
$$NT{"rule_pos_t"} = $rule_pos_t++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_alignment = "";
|
||||
foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
|
||||
foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
|
||||
next unless defined($RULE_ALIGNMENT_T{$t});
|
||||
$rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
|
||||
}
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
|
||||
}
|
||||
|
||||
chop($rule_s);
|
||||
chop($rule_t);
|
||||
chop($rule_alignment);
|
||||
|
||||
my $rule_alignment_inv = "";
|
||||
foreach (split(/ /,$rule_alignment)) {
|
||||
/^(\d+)\-(\d+)$/;
|
||||
$rule_alignment_inv .= "$2-$1 ";
|
||||
}
|
||||
chop($rule_alignment_inv);
|
||||
|
||||
# frame
|
||||
my $frame = "";
|
||||
$frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
|
||||
|
||||
my $currently_included = 0;
|
||||
my $start_t = -1;
|
||||
push @TARGET_BITMAP,0; # indicate end
|
||||
|
||||
for(my $t=0;$t<=scalar(@TARGET);$t++) {
|
||||
# beginning of tm target inclusion
|
||||
if (!$currently_included && $TARGET_BITMAP[$t]) {
|
||||
$start_t = $t;
|
||||
$currently_included = 1;
|
||||
}
|
||||
|
||||
# end of tm target inclusion (not included word or inserted input)
|
||||
elsif ($currently_included &&
|
||||
(!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
|
||||
# add xml (unless change is at the beginning of the sentence
|
||||
if ($start_t >= 0) {
|
||||
my $target = "";
|
||||
print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
|
||||
for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
|
||||
$target .= $TARGET[$tt] . " ";
|
||||
}
|
||||
chop($target);
|
||||
$frame .= "<xml translation=\"$target\"> x </xml> ";
|
||||
}
|
||||
$currently_included = 0;
|
||||
}
|
||||
|
||||
$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
|
||||
print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
|
||||
}
|
||||
|
||||
print STDERR $frame."\n-------------------------------------\n";
|
||||
return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
|
||||
}
|
||||
|
||||
sub create_alignment {
|
||||
my ($line) = @_;
|
||||
my (@ALIGNED_TO_S,@ALIGNED_TO_T);
|
||||
foreach my $point (split(/ /,$line)) {
|
||||
my ($s,$t) = split(/\-/,$point);
|
||||
$ALIGNED_TO_S[$s]{$t}++;
|
||||
$ALIGNED_TO_T[$t]{$s}++;
|
||||
}
|
||||
my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
|
||||
return %ALIGNMENT;
|
||||
}
|
300
contrib/fuzzy-match/old/make-pt-from-tm2.perl
Executable file
@ -0,0 +1,300 @@
|
||||
#!/usr/bin/perl -w -d
|
||||
|
||||
use strict;
|
||||
use FindBin qw($RealBin);
|
||||
use File::Basename;
|
||||
|
||||
my $DEBUG = 1;
|
||||
my $OUTPUT_RULES = 1;
|
||||
|
||||
#my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
|
||||
my $in_file = $ARGV[0]; #"$data_root/in/ac-test.input.tc.4";
|
||||
my $source_file = $ARGV[1]; #"$data_root/in/acquis.truecased.4.en.uniq";
|
||||
my $target_file = $ARGV[2]; #"$data_root/in/acquis.truecased.4.fr.uniq";
|
||||
my $alignment_file = $ARGV[3]; #"$data_root/in/acquis.truecased.4.align.uniq";
|
||||
my $lex_file = $ARGV[4]; #$data_root/in/lex.4;
|
||||
my $pt_file = $ARGV[5]; #"$data_root/out/pt";
|
||||
|
||||
my $cmd;
|
||||
|
||||
my $TMPDIR= "/tmp/tmp.$$";
|
||||
$cmd = "mkdir -p $TMPDIR";
|
||||
`$cmd`;
|
||||
$TMPDIR = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/out/tmp.3196";
|
||||
|
||||
my $match_file = "$TMPDIR/match";
|
||||
|
||||
# suffix array creation and extraction
|
||||
$cmd = "$RealBin/fuzzy-match --multiple $in_file $source_file > $match_file";
|
||||
`$cmd`;
|
||||
|
||||
# make into xml and pt
|
||||
my $out_file = "$TMPDIR/ac-test.input.xml.4.uniq.multi.tuning";
|
||||
|
||||
open(MATCH,$match_file);
|
||||
open(FRAME,">$out_file");
|
||||
open(RULE,">$out_file.extract") if $OUTPUT_RULES;
|
||||
open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
|
||||
open(INFO,">$out_file.info");
|
||||
while( my $match = <MATCH> ) {
|
||||
chop($match);
|
||||
my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
|
||||
|
||||
$score =~ /^(\d+) (.+)/ || die;
|
||||
my ($i,$match_score) = ($1,$2);
|
||||
|
||||
# construct frame
|
||||
if ($sentence < 1e9 && $sentence >= 0) {
|
||||
my $SOURCE = $ALL_SOURCE[$sentence];
|
||||
my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
|
||||
my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
|
||||
|
||||
for(my $j=0;$j<scalar(@TARGET);$j++) {
|
||||
$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
|
||||
my ($target_count,$target) = ($1,$2);
|
||||
my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) =
|
||||
&create_xml($SOURCE,
|
||||
$INPUT[$i],
|
||||
$target,
|
||||
$ALIGNMENT[$j],
|
||||
$path);
|
||||
print FRAME $frame."\n";
|
||||
print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
|
||||
print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
|
||||
print INFO "$i ||| $match_score ||| $target_count\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FRAME);
|
||||
close(MATCH);
|
||||
close(RULE) if $OUTPUT_RULES;
|
||||
close(RULE_INV) if $OUTPUT_RULES;
|
||||
|
||||
`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
|
||||
`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
|
||||
|
||||
if ($OUTPUT_RULES)
|
||||
{
|
||||
$cmd = "$RealBin/../../scripts/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $lex_file -phrase-translation-table $pt_file";
|
||||
print STDERR "Executing: $cmd \n";
|
||||
`$cmd`;
|
||||
}
|
||||
|
||||
#$cmd = "rm -rf $TMPDIR";
|
||||
#`$cmd`;
|
||||
|
||||
#######################################################
|
||||
sub create_xml {
|
||||
my ($source,$input,$target,$alignment,$path) = @_;
|
||||
|
||||
my @INPUT = split(/ /,$input);
|
||||
my @SOURCE = split(/ /,$source);
|
||||
my @TARGET = split(/ /,$target);
|
||||
my %ALIGN = &create_alignment($alignment);
|
||||
|
||||
my %FRAME_INPUT;
|
||||
my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
|
||||
foreach (@TARGET) { push @TARGET_BITMAP,1 }
|
||||
|
||||
### STEP 1: FIND MISMATCHES
|
||||
|
||||
my ($s,$i) = (0,0);
|
||||
my $currently_matching = 0;
|
||||
my ($start_s,$start_i) = (0,0);
|
||||
|
||||
$path .= "X"; # indicate end
|
||||
print STDERR "$input\n$source\n$target\n$path\n";
|
||||
for(my $p=0;$p<length($path);$p++) {
|
||||
my $action = substr($path,$p,1);
|
||||
|
||||
# beginning of a mismatch
|
||||
if ($currently_matching && $action ne "M" && $action ne "X") {
|
||||
$start_i = $i;
|
||||
$start_s = $s;
|
||||
$currently_matching = 0;
|
||||
}
|
||||
|
||||
# end of a mismatch
|
||||
elsif (!$currently_matching &&
|
||||
($action eq "M" || $action eq "X")) {
|
||||
|
||||
# remove use of affected target words
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$TARGET_BITMAP[$tt] = 0;
|
||||
}
|
||||
|
||||
# also remove enclosed unaligned words?
|
||||
}
|
||||
|
||||
# are there input words that need to be inserted ?
|
||||
print STDERR "($start_i<$i)?\n";
|
||||
if ($start_i<$i) {
|
||||
|
||||
# take note of input words to be inserted
|
||||
my $insertion = "";
|
||||
for(my $ii = $start_i; $ii<$i; $ii++) {
|
||||
$insertion .= $INPUT[$ii]." ";
|
||||
}
|
||||
|
||||
# find position for inserted input words
|
||||
|
||||
# find first removed target word
|
||||
my $start_t = 1000;
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt < $start_t;
|
||||
}
|
||||
}
|
||||
|
||||
# end of sentence? add to end
|
||||
if ($start_t == 1000 && $i > $#INPUT) {
|
||||
$start_t = $#TARGET;
|
||||
}
|
||||
|
||||
# backtrack to previous words if unaligned
|
||||
if ($start_t == 1000) {
|
||||
$start_t = -1;
|
||||
for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt > $start_t;
|
||||
}
|
||||
}
|
||||
}
|
||||
$FRAME_INPUT{$start_t} .= $insertion;
|
||||
my %NT = ("start_t" => $start_t,
|
||||
"start_i" => $start_i );
|
||||
push @NT,\%NT;
|
||||
}
|
||||
$currently_matching = 1;
|
||||
}
|
||||
|
||||
print STDERR "$action $s $i ($start_s $start_i) $currently_matching";
|
||||
if ($action ne "I") {
|
||||
print STDERR " ->";
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
|
||||
print STDERR " ".$tt;
|
||||
}
|
||||
}
|
||||
print STDERR "\n";
|
||||
$s++ unless $action eq "I";
|
||||
$i++ unless $action eq "D";
|
||||
$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
|
||||
push @INPUT_BITMAP, 1 if $action eq "M";
|
||||
push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
|
||||
}
|
||||
|
||||
|
||||
print STDERR $target."\n";
|
||||
foreach (@TARGET_BITMAP) { print STDERR $_; } print STDERR "\n";
|
||||
foreach (sort keys %FRAME_INPUT) {
|
||||
print STDERR "$_: $FRAME_INPUT{$_}\n";
|
||||
}
|
||||
|
||||
### STEP 2: BUILD RULE AND FRAME
|
||||
|
||||
# hierarchical rule
|
||||
my $rule_s = "";
|
||||
my $rule_pos_s = 0;
|
||||
my %RULE_ALIGNMENT_S;
|
||||
for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
|
||||
if ($INPUT_BITMAP[$i]) {
|
||||
$rule_s .= $INPUT[$i]." ";
|
||||
$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($i == $$NT{"start_i"}) {
|
||||
$rule_s .= "[X][X] ";
|
||||
$$NT{"rule_pos_s"} = $rule_pos_s++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_t = "";
|
||||
my $rule_pos_t = 0;
|
||||
my %RULE_ALIGNMENT_T;
|
||||
for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
|
||||
if ($t>=0 && $TARGET_BITMAP[$t]) {
|
||||
$rule_t .= $TARGET[$t]." ";
|
||||
$RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($t == $$NT{"start_t"}) {
|
||||
$rule_t .= "[X][X] ";
|
||||
$$NT{"rule_pos_t"} = $rule_pos_t++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_alignment = "";
|
||||
foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
|
||||
foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
|
||||
next unless defined($RULE_ALIGNMENT_T{$t});
|
||||
$rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
|
||||
}
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
|
||||
}
|
||||
|
||||
chop($rule_s);
|
||||
chop($rule_t);
|
||||
chop($rule_alignment);
|
||||
|
||||
my $rule_alignment_inv = "";
|
||||
foreach (split(/ /,$rule_alignment)) {
|
||||
/^(\d+)\-(\d+)$/;
|
||||
$rule_alignment_inv .= "$2-$1 ";
|
||||
}
|
||||
chop($rule_alignment_inv);
|
||||
|
||||
# frame
|
||||
my $frame = "";
|
||||
$frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
|
||||
|
||||
my $currently_included = 0;
|
||||
my $start_t = -1;
|
||||
push @TARGET_BITMAP,0; # indicate end
|
||||
|
||||
for(my $t=0;$t<=scalar(@TARGET);$t++) {
|
||||
# beginning of tm target inclusion
|
||||
if (!$currently_included && $TARGET_BITMAP[$t]) {
|
||||
$start_t = $t;
|
||||
$currently_included = 1;
|
||||
}
|
||||
|
||||
# end of tm target inclusion (not included word or inserted input)
|
||||
elsif ($currently_included &&
|
||||
(!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
|
||||
# add xml (unless change is at the beginning of the sentence
|
||||
if ($start_t >= 0) {
|
||||
my $target = "";
|
||||
print STDERR "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
|
||||
for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
|
||||
$target .= $TARGET[$tt] . " ";
|
||||
}
|
||||
chop($target);
|
||||
$frame .= "<xml translation=\"$target\"> x </xml> ";
|
||||
}
|
||||
$currently_included = 0;
|
||||
}
|
||||
|
||||
$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
|
||||
print STDERR "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
|
||||
}
|
||||
|
||||
print STDERR $frame."\n-------------------------------------\n";
|
||||
return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
|
||||
}
|
||||
|
||||
sub create_alignment {
|
||||
my ($line) = @_;
|
||||
my (@ALIGNED_TO_S,@ALIGNED_TO_T);
|
||||
foreach my $point (split(/ /,$line)) {
|
||||
my ($s,$t) = split(/\-/,$point);
|
||||
$ALIGNED_TO_S[$s]{$t}++;
|
||||
$ALIGNED_TO_T[$t]{$s}++;
|
||||
}
|
||||
my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
|
||||
return %ALIGNMENT;
|
||||
}
|
288
contrib/fuzzy-match/old/make-xml-from-match-multiple.perl
Executable file
@ -0,0 +1,288 @@
|
||||
#!/usr/bin/perl -w
|
||||
|
||||
use strict;
|
||||
|
||||
my $DEBUG = 1;
|
||||
my $OUTPUT_RULES = 1;
|
||||
|
||||
my $scripts_root_dir = "/Users/hieuhoang/workspace/github/hieuhoang/scripts";
|
||||
|
||||
my $data_root = "/Users/hieuhoang/workspace/experiment/data/tm-mt-integration/";
|
||||
#my $match_file = "$data_root/in/BEST.acquis-xml-escaped.4.uniq.multi.tuning";
|
||||
my $match_file = "$data_root/out/BEST";
|
||||
my $source_file = "$data_root/in/acquis.truecased.4.en.uniq";
|
||||
my $target_file = "$data_root/in/acquis.truecased.4.fr.uniq";
|
||||
my $alignment_file = "$data_root/in/acquis.truecased.4.align.uniq";
|
||||
my $out_file = "$data_root/out/ac-test.input.xml.4.uniq.multi.tuning";
|
||||
my $in_file = "$data_root/in/ac-test.input.tc.4";
|
||||
|
||||
#my $match_file = "tm/BEST.acquis-xml-escaped.4.uniq.multi";
|
||||
#my $source_file = "data/acquis.truecased.4.en.uniq";
|
||||
#my $target_file = "data/acquis.truecased.4.fr.uniq";
|
||||
#my $alignment_file = "data/acquis.truecased.4.align.uniq";
|
||||
#my $out_file = "data/ac-test.input.xml.4.uniq.multi.xxx";
|
||||
#my $in_file = "evaluation/ac-test.input.tc.4";
|
||||
|
||||
my @INPUT = `cat $in_file`; chop(@INPUT);
|
||||
my @ALL_SOURCE = `cat $source_file`; chop(@ALL_SOURCE);
|
||||
my @ALL_TARGET = `cat $target_file`; chop(@ALL_TARGET);
|
||||
my @ALL_ALIGNMENT = `cat $alignment_file`; chop(@ALL_ALIGNMENT);
|
||||
|
||||
open(MATCH,$match_file);
|
||||
open(FRAME,">$out_file");
|
||||
open(RULE,">$out_file.extract") if $OUTPUT_RULES;
|
||||
open(RULE_INV,">$out_file.extract.inv") if $OUTPUT_RULES;
|
||||
open(INFO,">$out_file.info");
|
||||
while( my $match = <MATCH> ) {
|
||||
chop($match);
|
||||
my ($score,$sentence,$path) = split(/ \|\|\| /,$match);
|
||||
|
||||
$score =~ /^(\d+) (.+)/ || die;
|
||||
my ($i,$match_score) = ($1,$2);
|
||||
|
||||
# construct frame
|
||||
if ($sentence < 1e9 && $sentence >= 0) {
|
||||
my $SOURCE = $ALL_SOURCE[$sentence];
|
||||
my @ALIGNMENT = split(/ \|\|\| /,$ALL_ALIGNMENT[$sentence]);
|
||||
my @TARGET = split(/ \|\|\| /,$ALL_TARGET[$sentence]);
|
||||
|
||||
for(my $j=0;$j<scalar(@TARGET);$j++) {
|
||||
$TARGET[$j] =~ /^(\d+) (.+)$/ || die;
|
||||
my ($target_count,$target) = ($1,$2);
|
||||
my ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv) =
|
||||
&create_xml($SOURCE,
|
||||
$INPUT[$i],
|
||||
$target,
|
||||
$ALIGNMENT[$j],
|
||||
$path);
|
||||
print FRAME $frame."\n";
|
||||
print RULE "$rule_s [X] ||| $rule_t [X] ||| $rule_alignment ||| $target_count\n" if $OUTPUT_RULES;
|
||||
print RULE_INV "$rule_t [X] ||| $rule_s [X] ||| $rule_alignment_inv ||| $target_count\n" if $OUTPUT_RULES;
|
||||
print INFO "$i ||| $match_score ||| $target_count\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
close(FRAME);
|
||||
close(MATCH);
|
||||
close(RULE) if $OUTPUT_RULES;
|
||||
close(RULE_INV) if $OUTPUT_RULES;
|
||||
|
||||
`LC_ALL=C sort $out_file.extract | gzip -c > $out_file.extract.sorted.gz`;
|
||||
`LC_ALL=C sort $out_file.extract.inv | gzip -c > $out_file.extract.inv.sorted.gz`;
|
||||
|
||||
`$scripts_root_dir/training/train-model.perl -dont-zip -first-step 6 -last-step 6 -f en -e fr -hierarchical -extract-file $out_file.extract -lexical-file $data_root/in/lex.4 -phrase-translation-table $out_file.phrase-table` if $OUTPUT_RULES;
|
||||
|
||||
sub create_xml {
|
||||
my ($source,$input,$target,$alignment,$path) = @_;
|
||||
|
||||
my @INPUT = split(/ /,$input);
|
||||
my @SOURCE = split(/ /,$source);
|
||||
my @TARGET = split(/ /,$target);
|
||||
my %ALIGN = &create_alignment($alignment);
|
||||
|
||||
my %FRAME_INPUT;
|
||||
my (@NT,@INPUT_BITMAP,@TARGET_BITMAP,%ALIGNMENT_I_TO_S);
|
||||
foreach (@TARGET) { push @TARGET_BITMAP,1 }
|
||||
|
||||
### STEP 1: FIND MISMATCHES
|
||||
|
||||
my ($s,$i) = (0,0);
|
||||
my $currently_matching = 0;
|
||||
my ($start_s,$start_i) = (0,0);
|
||||
|
||||
$path .= "X"; # indicate end
|
||||
print "$input\n$source\n$target\n$path\n";
|
||||
for(my $p=0;$p<length($path);$p++) {
|
||||
my $action = substr($path,$p,1);
|
||||
|
||||
# beginning of a mismatch
|
||||
if ($currently_matching && $action ne "M" && $action ne "X") {
|
||||
$start_i = $i;
|
||||
$start_s = $s;
|
||||
$currently_matching = 0;
|
||||
}
|
||||
|
||||
# end of a mismatch
|
||||
elsif (!$currently_matching &&
|
||||
($action eq "M" || $action eq "X")) {
|
||||
|
||||
# remove use of affected target words
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$TARGET_BITMAP[$tt] = 0;
|
||||
}
|
||||
|
||||
# also remove enclosed unaligned words?
|
||||
}
|
||||
|
||||
# are there input words that need to be inserted ?
|
||||
print "($start_i<$i)?\n";
|
||||
if ($start_i<$i) {
|
||||
|
||||
# take note of input words to be inserted
|
||||
my $insertion = "";
|
||||
for(my $ii = $start_i; $ii<$i; $ii++) {
|
||||
$insertion .= $INPUT[$ii]." ";
|
||||
}
|
||||
|
||||
# find position for inserted input words
|
||||
|
||||
# find first removed target word
|
||||
my $start_t = 1000;
|
||||
for(my $ss = $start_s; $ss<$s; $ss++) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt < $start_t;
|
||||
}
|
||||
}
|
||||
|
||||
# end of sentence? add to end
|
||||
if ($start_t == 1000 && $i > $#INPUT) {
|
||||
$start_t = $#TARGET;
|
||||
}
|
||||
|
||||
# backtrack to previous words if unaligned
|
||||
if ($start_t == 1000) {
|
||||
$start_t = -1;
|
||||
for(my $ss = $s-1; $start_t==-1 && $ss>=0; $ss--) {
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$ss]}) {
|
||||
$start_t = $tt if $tt > $start_t;
|
||||
}
|
||||
}
|
||||
}
|
||||
$FRAME_INPUT{$start_t} .= $insertion;
|
||||
my %NT = ("start_t" => $start_t,
|
||||
"start_i" => $start_i );
|
||||
push @NT,\%NT;
|
||||
}
|
||||
$currently_matching = 1;
|
||||
}
|
||||
|
||||
print "$action $s $i ($start_s $start_i) $currently_matching";
|
||||
if ($action ne "I") {
|
||||
print " ->";
|
||||
foreach my $tt (keys %{${$ALIGN{'s'}}[$s]}) {
|
||||
print " ".$tt;
|
||||
}
|
||||
}
|
||||
print "\n";
|
||||
$s++ unless $action eq "I";
|
||||
$i++ unless $action eq "D";
|
||||
$ALIGNMENT_I_TO_S{$i} = $s unless $action eq "D";
|
||||
push @INPUT_BITMAP, 1 if $action eq "M";
|
||||
push @INPUT_BITMAP, 0 if $action eq "I" || $action eq "S";
|
||||
}
|
||||
|
||||
|
||||
print $target."\n";
|
||||
foreach (@TARGET_BITMAP) { print $_; } print "\n";
|
||||
foreach (sort keys %FRAME_INPUT) {
|
||||
print "$_: $FRAME_INPUT{$_}\n";
|
||||
}
|
||||
|
||||
### STEP 2: BUILD RULE AND FRAME
|
||||
|
||||
# hierarchical rule
|
||||
my $rule_s = "";
|
||||
my $rule_pos_s = 0;
|
||||
my %RULE_ALIGNMENT_S;
|
||||
for(my $i=0;$i<scalar(@INPUT_BITMAP);$i++) {
|
||||
if ($INPUT_BITMAP[$i]) {
|
||||
$rule_s .= $INPUT[$i]." ";
|
||||
$RULE_ALIGNMENT_S{$ALIGNMENT_I_TO_S{$i}} = $rule_pos_s++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($i == $$NT{"start_i"}) {
|
||||
$rule_s .= "[X][X] ";
|
||||
$$NT{"rule_pos_s"} = $rule_pos_s++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_t = "";
|
||||
my $rule_pos_t = 0;
|
||||
my %RULE_ALIGNMENT_T;
|
||||
for(my $t=-1;$t<scalar(@TARGET_BITMAP);$t++) {
|
||||
if ($t>=0 && $TARGET_BITMAP[$t]) {
|
||||
$rule_t .= $TARGET[$t]." ";
|
||||
$RULE_ALIGNMENT_T{$t} = $rule_pos_t++;
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
if ($t == $$NT{"start_t"}) {
|
||||
$rule_t .= "[X][X] ";
|
||||
$$NT{"rule_pos_t"} = $rule_pos_t++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
my $rule_alignment = "";
|
||||
foreach my $s (sort { $a <=> $b} keys %RULE_ALIGNMENT_S) {
|
||||
foreach my $t (keys %{$ALIGN{"s"}[$s]}) {
|
||||
next unless defined($RULE_ALIGNMENT_T{$t});
|
||||
$rule_alignment .= $RULE_ALIGNMENT_S{$s}."-".$RULE_ALIGNMENT_T{$t}." ";
|
||||
}
|
||||
}
|
||||
foreach my $NT (@NT) {
|
||||
$rule_alignment .= $$NT{"rule_pos_s"}."-".$$NT{"rule_pos_t"}." ";
|
||||
}
|
||||
|
||||
chop($rule_s);
|
||||
chop($rule_t);
|
||||
chop($rule_alignment);
|
||||
|
||||
my $rule_alignment_inv = "";
|
||||
foreach (split(/ /,$rule_alignment)) {
|
||||
/^(\d+)\-(\d+)$/;
|
||||
$rule_alignment_inv .= "$2-$1 ";
|
||||
}
|
||||
chop($rule_alignment_inv);
|
||||
|
||||
# frame
|
||||
my $frame = "";
|
||||
$frame = $FRAME_INPUT{-1} if defined $FRAME_INPUT{-1};
|
||||
|
||||
my $currently_included = 0;
|
||||
my $start_t = -1;
|
||||
push @TARGET_BITMAP,0; # indicate end
|
||||
|
||||
for(my $t=0;$t<=scalar(@TARGET);$t++) {
|
||||
# beginning of tm target inclusion
|
||||
if (!$currently_included && $TARGET_BITMAP[$t]) {
|
||||
$start_t = $t;
|
||||
$currently_included = 1;
|
||||
}
|
||||
|
||||
# end of tm target inclusion (not included word or inserted input)
|
||||
elsif ($currently_included &&
|
||||
(!$TARGET_BITMAP[$t] || defined($FRAME_INPUT{$t}))) {
|
||||
# add xml (unless change is at the beginning of the sentence
|
||||
if ($start_t >= 0) {
|
||||
my $target = "";
|
||||
print "for(tt=$start_t;tt<$t+$TARGET_BITMAP[$t]);\n";
|
||||
for(my $tt=$start_t;$tt<$t+$TARGET_BITMAP[$t];$tt++) {
|
||||
$target .= $TARGET[$tt] . " ";
|
||||
}
|
||||
chop($target);
|
||||
$frame .= "<xml translation=\"$target\"> x </xml> ";
|
||||
}
|
||||
$currently_included = 0;
|
||||
}
|
||||
|
||||
$frame .= $FRAME_INPUT{$t} if defined $FRAME_INPUT{$t};
|
||||
print "$TARGET_BITMAP[$t] $t ($start_t) $currently_included\n";
|
||||
}
|
||||
|
||||
print $frame."\n-------------------------------------\n";
|
||||
return ($frame,$rule_s,$rule_t,$rule_alignment,$rule_alignment_inv);
|
||||
}
|
||||
|
||||
sub create_alignment {
|
||||
my ($line) = @_;
|
||||
my (@ALIGNED_TO_S,@ALIGNED_TO_T);
|
||||
foreach my $point (split(/ /,$line)) {
|
||||
my ($s,$t) = split(/\-/,$point);
|
||||
$ALIGNED_TO_S[$s]{$t}++;
|
||||
$ALIGNED_TO_T[$t]{$s}++;
|
||||
}
|
||||
my %ALIGNMENT = ( 's' => \@ALIGNED_TO_S, 't' => \@ALIGNED_TO_T );
|
||||
return %ALIGNMENT;
|
||||
}
|
27
contrib/fuzzy-match/suffix-test.cpp
Normal file
@ -0,0 +1,27 @@
|
||||
#include "SuffixArray.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
SuffixArray suffixArray( "/home/pkoehn/syntax/grammars/wmt09-de-en/corpus.1k.de" );
|
||||
//suffixArray.List(10,20);
|
||||
vector< string > der;
|
||||
der.push_back("der");
|
||||
vector< string > inDer;
|
||||
inDer.push_back("in");
|
||||
inDer.push_back("der");
|
||||
vector< string > zzz;
|
||||
zzz.push_back("zzz");
|
||||
vector< string > derDer;
|
||||
derDer.push_back("der");
|
||||
derDer.push_back("der");
|
||||
|
||||
cout << "count of 'der' " << suffixArray.Count( der ) << endl;
|
||||
cout << "limited count of 'der' " << suffixArray.MinCount( der, 2 ) << endl;
|
||||
cout << "count of 'in der' " << suffixArray.Count( inDer ) << endl;
|
||||
cout << "count of 'der der' " << suffixArray.Count( derDer ) << endl;
|
||||
cout << "limited count of 'der der' " << suffixArray.MinCount( derDer, 1 ) << endl;
|
||||
// cout << "count of 'zzz' " << suffixArray.Count( zzz ) << endl;
|
||||
// cout << "limited count of 'zzz' " << suffixArray.LimitedCount( zzz, 1 ) << endl;
|
||||
}
|
129
contrib/iSenWeb/index.html
Executable file
@ -0,0 +1,129 @@
|
||||
<!DOCTYPE html>
|
||||
<HTML>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>Moses Translation System</title>
|
||||
<script type="text/javascript" src="jquery-1.7.2.js"></script>
|
||||
<link href="./themes/styles/common.css" rel="stylesheet" type="text/css" />
|
||||
<link href="./themes/styles/search.css" rel="stylesheet" type="text/css"/>
|
||||
<link href="./themes/styles/fanyi.css" rel="stylesheet" type="text/css" />
|
||||
</head>
|
||||
<script language="javascript">
|
||||
$(document).ready(function()
|
||||
{
|
||||
|
||||
var targetDiv = $("#outputText");
|
||||
var input = $("#inputText");
|
||||
|
||||
$("#transForm").submit(function()
|
||||
{
|
||||
$.ajax(
|
||||
{
|
||||
type: "POST", url: 'trans_result.php',data: {input1: input.val()},
|
||||
complete: function(data)
|
||||
{
|
||||
targetDiv.html('');
|
||||
targetDiv.append(data.responseText);
|
||||
|
||||
}
|
||||
});
|
||||
return false;
|
||||
});
|
||||
});
|
||||
|
||||
</script>
|
||||
<body>
|
||||
<div class="topWrap">
|
||||
<div class="top">
|
||||
<div class="logo"><a href="/" title="English Chinese Translation Based on Moses">Home</a></div>
|
||||
|
||||
</div>
|
||||
<!-- top end -->
|
||||
</div>
|
||||
<div class="ConBox">
|
||||
<div class="hd">
|
||||
<div id="inputMod" class="column fl">
|
||||
<div class="wrapper">
|
||||
<!--
|
||||
<form action="trans_result.php" method="post" id="transForm" name="transForm">-->
|
||||
<form action="" method="post" id="transForm" name="transForm">
|
||||
<div class="row desc">
|
||||
Source Text:
|
||||
<input type="reset" name="clear" value="Clear"/>
|
||||
</div>
|
||||
<div class="row border content">
|
||||
<textarea id="inputText" class="text" dir="ltr" tabindex="1" wrap="SOFT" name="inputText"></textarea>
|
||||
|
||||
</div>
|
||||
<div class="row">
|
||||
<select>
|
||||
<option value ="en-cn">English >> Chinese </option>
|
||||
</select>
|
||||
<input type="submit" value="Translation"/>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<!-- end of wrapper -->
|
||||
</div>
|
||||
<!-- end of div inputMod -->
|
||||
<div id="outputMod" class="column fr">
|
||||
<div class="wrapper">
|
||||
<div id="translated" style="display: block;">
|
||||
<div class="row desc"><span id="outputLang">en->ch</span></div>
|
||||
<div class="row">
|
||||
<div id="outputText" class="row">
|
||||
<div class="translated_result">
|
||||
|
||||
|
||||
</div>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
<!-- end of entryList -->
|
||||
<!-- end translated -->
|
||||
</div>
|
||||
<!-- end of wrapper -->
|
||||
|
||||
|
||||
<div class="row cf" id="addons">
|
||||
<a id="feedback_link" target="_blank" href="#" class="fr">Feedback</a>
|
||||
<span id="suggestYou">
|
||||
选择<a data-pos="web.o.leftbottom" class="clog-js" data-clog="FUFEI_CLICK" href="http://nlp2ct.sftw.umac.mo/" target="_blank">人工翻译服务</a>,获得更专业的翻译结果。
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
<div id="errorHolder"><span class="error_text"></span></div>
|
||||
</div>
|
||||
<div style="clear:both"></div>
|
||||
<script type="text/javascript">
|
||||
var global = {};
|
||||
global.sessionFrom = "http://dict.youdao.com/";
|
||||
</script>
|
||||
<script type="text/javascript" src="http://impservice.dictweb.youdao.com/imp/dict_req_web_1.0.js"></script>
|
||||
<script data-main="fanyi" type="text/javascript" src="./themes/fanyi/v2.1.3.1/scripts/fanyi.js"></script>
|
||||
<div id="transBtnTip">
|
||||
<div id="transBtnTipInner">
|
||||
点击翻译按钮继续,查看网页翻译结果。
|
||||
<p class="ar">
|
||||
<a href="#" id="transBtnTipOK">I have known</a>
|
||||
</p>
|
||||
<b id="transBtnTipArrow"></b>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="Feedback"><a href="http://nlp2ct.sftw.umac.mo/" target="_blank">反馈信息给我们</a></div>
|
||||
|
||||
|
||||
<div class="footer" style="clear:both">
|
||||
<p><a href="http://nlp2ct.sftw.umac.mo/" target="_blank">Conect with us</a> <span>|</span>
|
||||
<a href="http://nlp2ct.sftw.umac.mo/" target="_blank">Mosese Translated system</a> <span>|</span>
|
||||
Copyright© 2012-2012 NLP2CT All Right to Moses Group
|
||||
</p>
|
||||
<p>More</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</HTML>
|
9405
contrib/iSenWeb/jquery-1.7.2.js
vendored
Executable file
59
contrib/iSenWeb/moses.pl
Executable file
@ -0,0 +1,59 @@
|
||||
#!/usr/bin/perl -w
|
||||
use warnings;
|
||||
use strict;
|
||||
$|++;
|
||||
|
||||
# file: daemon.pl
|
||||
|
||||
# Herve Saint-Amand
|
||||
# Universitaet des Saarlandes
|
||||
# Tue May 13 19:45:31 2008
|
||||
|
||||
# This script starts Moses to run in the background, so that it can be used by
|
||||
# the CGI script. It spawns the Moses process, then binds itself to listen on
|
||||
# some port, and when it gets a connection, reads it line by line, feeds those
|
||||
# to Moses, and sends back the translation.
|
||||
|
||||
# You can either run one instance of this on your Web server, or, if you have
|
||||
# the hardware setup for it, run several instances of this, then configure
|
||||
# translate.cgi to connect to these.
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# includes
|
||||
|
||||
use IO::Socket::INET;
|
||||
use IPC::Open2;
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# constants, global vars, config
|
||||
|
||||
my $MOSES = '/home/tianliang/research/moses-smt/scripts/training/model/moses';
|
||||
my $MOSES_INI = '/home/tianliang/research/moses-smt/scripts/training/model/moses.ini';
|
||||
|
||||
die "usage: daemon.pl <hostname> <port>" unless (@ARGV == 2);
|
||||
my $LISTEN_HOST = shift;
|
||||
my $LISTEN_PORT = shift;
|
||||
|
||||
#------------------------------------------------------------------------------
|
||||
# main
|
||||
|
||||
# spawn moses
|
||||
my ($MOSES_IN, $MOSES_OUT);
|
||||
my $pid = open2 ($MOSES_OUT, $MOSES_IN, $MOSES, '-f', $MOSES_INI);
|
||||
|
||||
# open server socket
|
||||
my $server_sock = new IO::Socket::INET
|
||||
(LocalAddr => $LISTEN_HOST, LocalPort => $LISTEN_PORT, Listen => 1)
|
||||
|| die "Can't bind server socket";
|
||||
|
||||
while (my $client_sock = $server_sock->accept) {
|
||||
while (my $line = <$client_sock>) {
|
||||
print $MOSES_IN $line;
|
||||
$MOSES_IN->flush ();
|
||||
print $client_sock scalar <$MOSES_OUT>;
|
||||
}
|
||||
|
||||
$client_sock->close ();
|
||||
}
|
||||
|
||||
#------------------------------------------------------------------------------
|
BIN
contrib/iSenWeb/themes/images/common/Logo (1000x300).png
Executable file
After Width: | Height: | Size: 53 KiB |
BIN
contrib/iSenWeb/themes/images/common/Logo (2000x2000).png
Executable file
After Width: | Height: | Size: 266 KiB |
BIN
contrib/iSenWeb/themes/images/common/Logo (250x250).png
Executable file
After Width: | Height: | Size: 23 KiB |
BIN
contrib/iSenWeb/themes/images/common/Logo (500x500).png
Executable file
After Width: | Height: | Size: 55 KiB |
BIN
contrib/iSenWeb/themes/images/common/Logo.png
Executable file
After Width: | Height: | Size: 53 KiB |
BIN
contrib/iSenWeb/themes/images/common/Logo_lab.png
Executable file
After Width: | Height: | Size: 24 KiB |
BIN
contrib/iSenWeb/themes/images/common/header_bg.png
Executable file
After Width: | Height: | Size: 3.6 KiB |
BIN
contrib/iSenWeb/themes/images/common/ico_cor10.png
Executable file
After Width: | Height: | Size: 958 B |
BIN
contrib/iSenWeb/themes/images/common/icon_feedback.png
Executable file
After Width: | Height: | Size: 6.1 KiB |
BIN
contrib/iSenWeb/themes/images/common/logo_christmas.png
Executable file
After Width: | Height: | Size: 28 KiB |
BIN
contrib/iSenWeb/themes/images/common/logo_christmas1.png
Executable file
After Width: | Height: | Size: 6.2 KiB |
BIN
contrib/iSenWeb/themes/images/common/logo_christmas2.png
Executable file
After Width: | Height: | Size: 10 KiB |
BIN
contrib/iSenWeb/themes/images/common/logo_christmas3.png
Executable file
After Width: | Height: | Size: 34 KiB |
BIN
contrib/iSenWeb/themes/images/common/nav_bgn.png
Executable file
After Width: | Height: | Size: 2.9 KiB |
BIN
contrib/iSenWeb/themes/images/common/sidebar_bg.png
Executable file
After Width: | Height: | Size: 10 KiB |
BIN
contrib/iSenWeb/themes/images/fanyi/fanyi_sprite.png
Executable file
After Width: | Height: | Size: 8.5 KiB |
BIN
contrib/iSenWeb/themes/images/fanyi/inputTextBg.png
Executable file
After Width: | Height: | Size: 501 B |
BIN
contrib/iSenWeb/themes/images/search/s.png
Executable file
After Width: | Height: | Size: 4.3 KiB |
288
contrib/iSenWeb/themes/styles/common.css
Executable file
@ -0,0 +1,288 @@
|
||||
@charset "utf-8";
|
||||
|
||||
html,body,div,span,applet,object,iframe,table,caption,tbody,tfoot,thead,tr,th,td,del,dfn,em,font,img,ins,kbd,q,s,samp,small,strike,tt,var,h1,h2,h3,h4,h5,h6,p,blockquote,pre,a,abbr,acronym,address,big,cite,code,dl,dt,dd,ol,ul,li,fieldset,form,label,legend {
|
||||
outline:0;
|
||||
padding:0;
|
||||
margin:0;
|
||||
border:0;
|
||||
text-align:left;
|
||||
font-style:normal;
|
||||
word-wrap:break-word;
|
||||
}
|
||||
:focus {
|
||||
outline:0;
|
||||
}
|
||||
body {
|
||||
font-family:"Microsoft Yahei","\534E\6587\9ED1\4F53","Arail","Verdana","Helvetica","sans-serif";
|
||||
color:#999;
|
||||
font-size:12px;
|
||||
}
|
||||
ol,ul,li {
|
||||
list-style:none;
|
||||
}
|
||||
table {
|
||||
border-collapse:collapse;
|
||||
border-spacing:0;
|
||||
width:100%;
|
||||
}
|
||||
caption,th,td {
|
||||
font-weight:normal;
|
||||
text-align:left;
|
||||
vertical-align:top;
|
||||
}
|
||||
a:link,a:visited {
|
||||
font-family:"Microsoft Yahei";
|
||||
color:#568d99;
|
||||
text-decoration:none;
|
||||
}
|
||||
a:hover {
|
||||
font-family:"Microsoft Yahei";
|
||||
color:#568d99;
|
||||
text-decoration:underline;
|
||||
}
|
||||
input.txt {
|
||||
border-top:1px solid #cdcdcd;
|
||||
border-left:1px solid #a4a4a4;
|
||||
border-bottom:1px solid #e8e8e8;
|
||||
border-right:1px solid #d9d9d9;
|
||||
font-family:Arial,Helvetica,sans-serif;
|
||||
color:#666;
|
||||
font-size:14px
|
||||
}
|
||||
body {
|
||||
background:#eeefef;
|
||||
}
|
||||
.topWrap {
|
||||
height:200px;
|
||||
background:url(../images/common/header_bg.png) repeat-x center top;
|
||||
}
|
||||
.topW {
|
||||
width:940px;
|
||||
position:relative;
|
||||
margin:0 auto;
|
||||
}
|
||||
.top {
|
||||
width:900px;
|
||||
margin:0 auto;
|
||||
height:90px;
|
||||
z-index:100;
|
||||
}
|
||||
.top .logo {
|
||||
width:20px;
|
||||
height:300px;
|
||||
background:url(../images/common/Logo.png) no-repeat;
|
||||
_background:url(../images/common/logo.gif) no-repeat;
|
||||
float:left;
|
||||
margin:0px 0 0 0;
|
||||
}
|
||||
.top .logNoLogin {
|
||||
width:159px;
|
||||
overflow:hidden
|
||||
}
|
||||
.top .logo a {
|
||||
width:165px;
|
||||
height:55px;
|
||||
float:left;
|
||||
text-indent:-9999px;
|
||||
}
|
||||
.top .nav {
|
||||
float:right;
|
||||
margin-top:37px;
|
||||
font-size:16px;
|
||||
position:relative;
|
||||
width:542px;
|
||||
}
|
||||
.top .nav a {
|
||||
height:35px;
|
||||
line-height:23px;
|
||||
margin-left:10px;
|
||||
padding:0 10px;
|
||||
float:left;
|
||||
display:block;
|
||||
overflow:hidden;
|
||||
text-decoration:none;
|
||||
text-align:center;
|
||||
color:#3c6770;
|
||||
}
|
||||
.top .nav a:hover {
|
||||
background:url(../images/common/nav_bgn.png) no-repeat 0 -40px;
|
||||
_background:url(../images/common/nav_bgn.gif) no-repeat 0 -40px;
|
||||
color:#3c6770;
|
||||
}
|
||||
.top .nav a.current {
|
||||
background:url(../images/common/nav_bgn.png) no-repeat center 0;
|
||||
_background:url(../images/common/nav_bgn.gif) no-repeat center 0;
|
||||
}
|
||||
.top .nav .uname {
|
||||
float:right
|
||||
}
|
||||
.top .nav a.username {
|
||||
height:26px;
|
||||
max-width:96px;
|
||||
padding-right:4px;
|
||||
cursor:pointer;
|
||||
display:inline-block;
|
||||
vertical-align:middle
|
||||
}
|
||||
.top .nav a.username:hover {
|
||||
background:none;
|
||||
}
|
||||
.top .nav .uname .cor {
|
||||
display:inline-block;
|
||||
width:12px;
|
||||
height:12px;
|
||||
background:url(../images/common/ico_cor10.png) 0 0 no-repeat;
|
||||
cursor:pointer;
|
||||
vertical-align:middle;
|
||||
overflow:hidden
|
||||
}
|
||||
.noLogin .nav {
|
||||
width:auto;
|
||||
margin-right:48px;
|
||||
}
|
||||
|
||||
|
||||
.ConBox {
|
||||
width:900px;
|
||||
min-height:600px;
|
||||
margin:15px auto 50px auto;
|
||||
padding-bottom:8px;
|
||||
-webkit-box-shadow:0 0 5px 0 #aeaeae;
|
||||
-moz-box-shadow:0 0 5px 0 #aeaeae;
|
||||
-box-shadow:0 0 5px 0 #aeaeae;
|
||||
-webkit-border-radius:8px;
|
||||
-moz-border-radius:8px;
|
||||
border-radius:8px;
|
||||
background:#fff
|
||||
}
|
||||
.ConBox .hd {
|
||||
padding:30px 30px 10px;
|
||||
}
|
||||
.ConBox .hd_left {
|
||||
float:left;
|
||||
width:560px;
|
||||
|
||||
}
|
||||
.ConBox .hd_right {
|
||||
float:right;
|
||||
width:260px;
|
||||
|
||||
}
|
||||
|
||||
|
||||
.ConBox .bd {
|
||||
padding:0px;
|
||||
float:right;
|
||||
}
|
||||
|
||||
.ConBox .rank-index {
|
||||
background-color: #E0EEF7;
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.ConBox .right-panel-title {
|
||||
color: #035168;
|
||||
font: bolder 16px/ 18px "Microsoft Yahei";
|
||||
margin: 0 0 5px 0;
|
||||
}
|
||||
|
||||
.searchbar {
|
||||
width:900px;
|
||||
margin:10px auto 0;
|
||||
overflow:hidden;
|
||||
*zoom:1;
|
||||
}
|
||||
.searchbar .bd {
|
||||
float:right;
|
||||
border:1px solid #CBCBCD;
|
||||
height:28px;
|
||||
position:relative;
|
||||
width:181px;
|
||||
}
|
||||
.searchbar .bd input.ipt {
|
||||
background:url(../images/common/sidebar_bg.png) no-repeat 0 -300px;
|
||||
border:0 none;
|
||||
color:#cfcfcf;
|
||||
font-family:"Microsoft Yahei",arial;
|
||||
font-size:14px;
|
||||
height:28px;
|
||||
*height:27px;
|
||||
line-height:28px;
|
||||
margin:0;
|
||||
padding:0 33px 0 9px;
|
||||
width:119px;
|
||||
_background:url(../images/common/sidebar_bg.gif) 0 -300px no-repeat;
|
||||
*background-position:0 -301px;
|
||||
}
|
||||
.searchbar .bd input.btn {
|
||||
background:url(../images/common/sidebar_bg.png) no-repeat -188px -343px;
|
||||
border:0 none;
|
||||
cursor:pointer;
|
||||
height:28px;
|
||||
position:absolute;
|
||||
right:0;
|
||||
top:0;
|
||||
width:30px;
|
||||
_background:url(../images/common/sidebar_bg.gif) -188px -343px no-repeat;
|
||||
}
|
||||
.searchbar .inpt_focus {
|
||||
border:1px solid #649C9C;
|
||||
}
|
||||
.searchbar .inpt_focus input.btn {
|
||||
background-position:-188px -473px;
|
||||
}
|
||||
.searchbar .inpt_focus input.ipt {
|
||||
color:#333;
|
||||
}
|
||||
.wrap {
|
||||
clear:both;
|
||||
}
|
||||
.container {
|
||||
width:960px;
|
||||
margin:60px auto 0;
|
||||
}
|
||||
.content .bd {
|
||||
clear:both;
|
||||
}
|
||||
.footer {
|
||||
width:960px;
|
||||
height:66px;
|
||||
padding-top:20px;
|
||||
color:#b9b8b8;
|
||||
text-align:center;
|
||||
}
|
||||
.footer p {
|
||||
text-align:center;
|
||||
line-height:23px;
|
||||
}
|
||||
.footer a,.footer a:link {
|
||||
color:#b9b8b8;
|
||||
text-decoration:none;
|
||||
}
|
||||
.footer a:hover {
|
||||
color:#b9b8b8;
|
||||
text-decoration:underline;
|
||||
}
|
||||
.top .logo {
|
||||
height:200px;
|
||||
width:550px;
|
||||
background:url(../images/common/Logo_lab.png) no-repeat;
|
||||
_background:url(../images/common/logo_christmas_ie6.png) no-repeat
|
||||
}
|
||||
|
||||
|
||||
.Feedback {
|
||||
right: 0;
|
||||
position: fixed;
|
||||
top: 40%;
|
||||
_position: absolute;
|
||||
z-index: 85;
|
||||
}
|
||||
.Feedback a {
|
||||
display: block;
|
||||
width: 41px;
|
||||
height: 127px;
|
||||
background: url(../images/common/icon_feedback.png) no-repeat;
|
||||
text-indent: -9999px;
|
||||
overflow: hidden;
|
||||
}
|
583
contrib/iSenWeb/themes/styles/fanyi.css
Executable file
@ -0,0 +1,583 @@
|
||||
|
||||
.column {
|
||||
width:50%;
|
||||
}
|
||||
.fl .wrapper {
|
||||
padding-right:20px;
|
||||
_padding-right:10px;
|
||||
}
|
||||
h2 {
|
||||
height:20px;
|
||||
font-size:1.2em;
|
||||
}
|
||||
.column .row {
|
||||
padding-top:.5em;
|
||||
}
|
||||
#transForm .user-research {
|
||||
float:right;
|
||||
}
|
||||
#transForm .user-research a {
|
||||
font-family:"宋体";
|
||||
}
|
||||
#transForm .desc {
|
||||
zoom:1;
|
||||
}
|
||||
.column .desc {
|
||||
position:relative;
|
||||
color:#333333;
|
||||
font-size:14px;
|
||||
}
|
||||
.text {
|
||||
width:100%;
|
||||
padding:0;
|
||||
background:#fff;
|
||||
}
|
||||
input.text {
|
||||
padding:3px 0;
|
||||
}
|
||||
.button {
|
||||
width:5em;
|
||||
*height:23px;
|
||||
*padding-top:2px;
|
||||
}
|
||||
.actions a {
|
||||
display:none;
|
||||
}
|
||||
#inputText {
|
||||
display:block;
|
||||
border-width:0 1px 1px 0;
|
||||
border-color:#E5E5E5;
|
||||
border-style:solid;
|
||||
background:url("../images/fanyi/inputTextBg.png") no-repeat 0 0;
|
||||
_background-attachment:fixed;
|
||||
font-size:14px;
|
||||
line-height:140%;
|
||||
padding:10px 0 10px 10px;
|
||||
height:187px;
|
||||
resize:none;
|
||||
outline:none;
|
||||
font-family:arial,sans-serif;
|
||||
}
|
||||
*+html #inputText {
|
||||
background:none;
|
||||
border-width:2px 1px 1px 2px;
|
||||
height:185px;
|
||||
}
|
||||
@-moz-document url-prefix() {
|
||||
#inputText {
|
||||
padding:3px 0 1px 10px;
|
||||
height:204px;
|
||||
}
|
||||
}#customSelectBtn {
|
||||
position:relative;
|
||||
*float:left;
|
||||
display:inline-block;
|
||||
width:85px;
|
||||
height:22px;
|
||||
padding:1px 20px 1px 5px;
|
||||
margin-right:5px;
|
||||
line-height:22px;
|
||||
border:1px solid #9fc7e3;
|
||||
vertical-align:bottom;
|
||||
cursor:pointer;
|
||||
color:#000000;
|
||||
}
|
||||
#customSelectBtn .btn_arrow {
|
||||
position:absolute;
|
||||
top:10px;
|
||||
right:5px;
|
||||
border-width:5px;
|
||||
border-style:solid dashed dashed dashed;
|
||||
border-color:#9fc7e3 transparent transparent transparent;
|
||||
line-height:0;
|
||||
font-size:0;
|
||||
width:0;
|
||||
height:0;
|
||||
}
|
||||
#customSelectBtn.focus .btn_arrow {
|
||||
top:4px;
|
||||
border-style:dashed dashed solid dashed;
|
||||
border-color:transparent transparent #9fc7e3 transparent;
|
||||
}
|
||||
#customSelectOption {
|
||||
width:110px;
|
||||
padding:0;
|
||||
margin:1px 0 0;
|
||||
list-style:none;
|
||||
font-size:12px;
|
||||
border:1px solid #9fc7e3;
|
||||
background:#fff;
|
||||
position:absolute;
|
||||
z-index:9999;
|
||||
left:-1px;
|
||||
top:23px;
|
||||
display:none;
|
||||
}
|
||||
#customSelectOption a {
|
||||
display:block;
|
||||
height:22px;
|
||||
padding:0 5px;
|
||||
line-height:22px;
|
||||
text-decoration:none;
|
||||
color:#2a2a2a;
|
||||
}
|
||||
#customSelectOption a:hover,#customSelectOption .on a {
|
||||
background:#9fc7e3;
|
||||
}
|
||||
#translateBtn {
|
||||
width:74px;
|
||||
height:26px;
|
||||
text-indent:-999em;
|
||||
overflow:hidden;
|
||||
background:#fff url(../images/fanyi/fanyi_sprite.png) left -42px;
|
||||
cursor:pointer;
|
||||
outline:none;
|
||||
display:inline-block;
|
||||
vertical-align:top;
|
||||
}
|
||||
#translateBtn:hover {
|
||||
background-position:-74px -42px;
|
||||
}
|
||||
#translateBtn:active {
|
||||
background-position:-148px -42px;
|
||||
}
|
||||
#outputMod {
|
||||
position:relative;
|
||||
}
|
||||
#speech {
|
||||
display:inline-block;
|
||||
width:16px;
|
||||
height:0;
|
||||
padding-top:13px;
|
||||
margin:0 5px -2px;
|
||||
overflow:hidden;
|
||||
background:url(../images/fanyi/fanyi_sprite.png) no-repeat -168px top;
|
||||
}
|
||||
#speech:hover,#speech.on {
|
||||
background-position:-168px -13px;
|
||||
}
|
||||
#outputMod .desc {
|
||||
position:relative;
|
||||
zoom:1;
|
||||
height:14px;
|
||||
}
|
||||
#entryList {
|
||||
padding:40px 0 0;
|
||||
margin:0 0 0 18px;
|
||||
list-style:none;
|
||||
}
|
||||
#entryList li {
|
||||
position:relative;
|
||||
height:42px;
|
||||
line-height:42px;
|
||||
padding-left:40px;
|
||||
margin-bottom:5px;
|
||||
white-space:nowrap;
|
||||
color:#666;
|
||||
}
|
||||
#entryList .sp {
|
||||
position:absolute;
|
||||
left:0;
|
||||
top:0;
|
||||
width:36px;
|
||||
padding-top:42px;
|
||||
background:url(../images/fanyi/fanyi_sprite.png) no-repeat right top;
|
||||
}
|
||||
#translated {
|
||||
display:none;
|
||||
zoom:1;
|
||||
}
|
||||
#copyit {
|
||||
vertical-align:middle;
|
||||
margin-top:-2px;
|
||||
}
|
||||
#outputText {
|
||||
padding:15px 20px 0;
|
||||
line-height:140%;
|
||||
word-wrap:break-word;
|
||||
overflow-y:auto;
|
||||
background-color:#fafafa;
|
||||
height:193px;
|
||||
font-family:arial,sans-serif;
|
||||
}
|
||||
#translated .small_font .translated_result .tgt {
|
||||
font-size:14px;
|
||||
font-weight:normal;
|
||||
margin-bottom:.4em;
|
||||
}
|
||||
#translated .small_font {
|
||||
padding:10px 12px;
|
||||
height:188px;
|
||||
}
|
||||
#outputText .src {
|
||||
color:#787878;
|
||||
font-size:1em;
|
||||
margin-bottom:2px;
|
||||
}
|
||||
#outputText .tgt {
|
||||
margin-bottom:10px;
|
||||
font-size:1.5em;
|
||||
font-weight:bold;
|
||||
line-height:150%;
|
||||
}
|
||||
#outputText .selected {
|
||||
background-color:#316ac5;
|
||||
color:#fff;
|
||||
}
|
||||
.smart_result {
|
||||
padding:.5em .8em 0 0;
|
||||
border-top:1px solid #e0e0e0;
|
||||
color:#000;
|
||||
}
|
||||
.smart_src_title {
|
||||
color:#777;
|
||||
font-size:1.2em;
|
||||
margin-bottom:.6em;
|
||||
}
|
||||
.smart_result p {
|
||||
margin:5px 0 5px 0;
|
||||
line-height:125%;
|
||||
}
|
||||
.smart_result p a {
|
||||
float:right;
|
||||
margin-left:6px;
|
||||
}
|
||||
.smart_result p span {
|
||||
overflow:hidden;
|
||||
zoom:1;
|
||||
display:block;
|
||||
}
|
||||
.smartresult_more {
|
||||
font-size:12px;
|
||||
margin-top:5px;
|
||||
font-family:"宋体";
|
||||
}
|
||||
.compare-mode {
|
||||
font-weight:bold;
|
||||
}
|
||||
#modeWrapper {
|
||||
margin-top:-3px;
|
||||
padding:3px 0;
|
||||
*padding:0;
|
||||
}
|
||||
.read-mode {
|
||||
float:right;
|
||||
display:none;
|
||||
}
|
||||
.read-mode .title {
|
||||
background:url("../images/fanyi/fanyi_sprite.png") no-repeat -168px -28px;
|
||||
padding-left:18px;
|
||||
outline:none;
|
||||
}
|
||||
.compare-mode input {
|
||||
vertical-align:top;
|
||||
*vertical-align:middle;
|
||||
margin:0 3px 0 0;
|
||||
border:0;
|
||||
padding:0;
|
||||
}
|
||||
#errorHolder {
|
||||
display:none;
|
||||
position:absolute;
|
||||
z-index:9999;
|
||||
top:-25px;
|
||||
left:50%;
|
||||
text-align:center;
|
||||
font-size:12px;
|
||||
}
|
||||
#errorHolder.nullError {
|
||||
left:20%;
|
||||
top:120px;
|
||||
}
|
||||
#errorHolder .error_text {
|
||||
background:#3b7fc2;
|
||||
display:inline-block;
|
||||
padding:5px 10px;
|
||||
height:15px;
|
||||
line-height:15px;
|
||||
color:#fff;
|
||||
}
|
||||
#errorHolder .error_text a {
|
||||
text-decoration:underline;
|
||||
}
|
||||
#errorHolder.nullError .error_text {
|
||||
width:72px;
|
||||
text-align:center;
|
||||
}
|
||||
#errorHolder .add-fav {
|
||||
color:white;
|
||||
}
|
||||
#errorHolder #closeit {
|
||||
margin-left:8px;
|
||||
}
|
||||
.tip-close {
|
||||
cursor:pointer;
|
||||
}
|
||||
#addons {
|
||||
display:none;
|
||||
}
|
||||
#transBtnTip {
|
||||
display:none;
|
||||
position:absolute;
|
||||
z-index:999;
|
||||
left:100px;
|
||||
top:100px;
|
||||
font-size:12px;
|
||||
*background:#4570e0;
|
||||
}
|
||||
#transBtnTipInner {
|
||||
position:relative;
|
||||
padding:10px 15px;
|
||||
*margin:-1px 1px;
|
||||
color:#fff;
|
||||
background:#4570e0;
|
||||
-moz-border-radius:7px;
|
||||
-khtml-border-radius:7px;
|
||||
-webkit-border-radius:7px;
|
||||
border-radius:7px;
|
||||
}
|
||||
#transBtnTip .ar {
|
||||
margin-top:10px;
|
||||
}
|
||||
#transBtnTipOK {
|
||||
font-weight:bold;
|
||||
color:#fff;
|
||||
}
|
||||
#transBtnTipArrow {
|
||||
position:absolute;
|
||||
left:50px;
|
||||
top:100%;
|
||||
display:block;
|
||||
border-color:transparent transparent transparent #4570e0;
|
||||
border-width:0 0 20px 20px;
|
||||
border-style:dashed dashed dashed solid;
|
||||
font-size:0;
|
||||
}
|
||||
#sponsor {
|
||||
padding:1em 0 0;
|
||||
clear:both;
|
||||
}
|
||||
#sponsor .desc {
|
||||
white-space:normal;
|
||||
zoom:1;
|
||||
}
|
||||
#sponsor .fr {
|
||||
overflow:hidden;
|
||||
}
|
||||
#sponsor .more-services {
|
||||
background-color:#eff7fd;
|
||||
padding-left:10px;
|
||||
height:26px;
|
||||
line-height:26px;
|
||||
text-align:left;
|
||||
}
|
||||
#sponsor .more-services-list {
|
||||
margin-bottom:1em;
|
||||
border:1px #eff7fd solid;
|
||||
padding:5px 12px 4px 22px;
|
||||
}
|
||||
#sponsor .more-services-icon-sprite {
|
||||
background:url("../images/fanyi/fanyi_sprite.png") no-repeat 0 0;
|
||||
float:left;
|
||||
padding-left:40px;
|
||||
padding-top:40px;
|
||||
line-height:0;
|
||||
font-size:0;
|
||||
}
|
||||
#sponsor .icon1 {
|
||||
background-position:0 0;
|
||||
}
|
||||
#sponsor .icon2 {
|
||||
background-position:-40px 0;
|
||||
}
|
||||
#sponsor .icon3 {
|
||||
background-position:-80px 0;
|
||||
}
|
||||
#sponsor .icon4 {
|
||||
background-position:-120px 0;
|
||||
}
|
||||
#trans_tools {
|
||||
width:100%;
|
||||
}
|
||||
#trans_tools td {
|
||||
margin:0;
|
||||
padding:0;
|
||||
width:25%;
|
||||
}
|
||||
#trans_tools h3 {
|
||||
float:left;
|
||||
margin-left:10px;
|
||||
padding:0 10px 0 0;
|
||||
line-height:40px;
|
||||
font-size:1.2em;
|
||||
}
|
||||
#trans_tools p {
|
||||
padding:5px 10px 0 0;
|
||||
color:#777;
|
||||
font-size:1.2em;
|
||||
}
|
||||
#suggestYou {
|
||||
color:#777;
|
||||
font-family:"宋体";
|
||||
}
|
||||
#feedback_link {
|
||||
font-family:"宋体";
|
||||
}
|
||||
.new {
|
||||
color:#e60012;
|
||||
font-size:12px;
|
||||
}
|
||||
.close-reading-mode {
|
||||
display:none;
|
||||
}
|
||||
.open-reading-mode {
|
||||
display:none;
|
||||
}
|
||||
.for-close {
|
||||
display:none;
|
||||
}
|
||||
.show-reading-mode .open-reading-mode {
|
||||
display:inline-block;
|
||||
}
|
||||
.reading-mode #inputMod {
|
||||
display:none;
|
||||
}
|
||||
.reading-mode #outputMod {
|
||||
margin:0 auto;
|
||||
float:none;
|
||||
}
|
||||
.reading-mode .column {
|
||||
width:65%;
|
||||
}
|
||||
.reading-mode #outputMod #addons {
|
||||
display:none;
|
||||
}
|
||||
.reading-mode #outputMod #outputText {
|
||||
background-color:transparent;
|
||||
border-top:1px solid #e5e5e5;
|
||||
border-bottom:1px solid #e5e5e5;
|
||||
}
|
||||
.reading-mode #sponsor {
|
||||
display:none;
|
||||
}
|
||||
.reading-mode #translated .small_font {
|
||||
height:auto;
|
||||
padding:10px 0;
|
||||
}
|
||||
.reading-mode .for-close {
|
||||
display:block;
|
||||
}
|
||||
.reading-mode .close-reading-mode {
|
||||
display:inline-block;
|
||||
}
|
||||
.reading-mode .open-reading-mode {
|
||||
display:none;
|
||||
}
|
||||
.reading-mode #translated .small_font .translated_result .tgt {
|
||||
margin-bottom:.6em;
|
||||
padding-bottom:.6em;
|
||||
}
|
||||
#selectorSwitcher {
|
||||
float:right;
|
||||
margin-top:-3px;
|
||||
height:20px;
|
||||
line-height:20px;
|
||||
cursor:pointer;
|
||||
}
|
||||
#selectorStatus {
|
||||
margin-left:21px;
|
||||
margin-right:6px;
|
||||
color:#1e50a2;
|
||||
}
|
||||
.selector-sprite {
|
||||
background:url("../p/switcher.png") no-repeat 0 0;
|
||||
}
|
||||
.selector-enable {
|
||||
background-position:-51px -22px;
|
||||
}
|
||||
.selector-enable.hover {
|
||||
background-position:0 -22px;
|
||||
}
|
||||
.selector-disable {
|
||||
background-position:-51px 0;
|
||||
}
|
||||
.selector-disable.hover {
|
||||
background-position:0 0;
|
||||
}
|
||||
.show-translate #addons {
|
||||
display:block;
|
||||
}
|
||||
#b {
|
||||
border-top:0 solid;
|
||||
max-width:960px;
|
||||
min-width:500px;
|
||||
_width:960px;
|
||||
font-family:arial sans-serif;
|
||||
}
|
||||
#transForm .content {
|
||||
position:relative;
|
||||
zoom:1;
|
||||
}
|
||||
.typo-suggest {
|
||||
display:none;
|
||||
position:absolute;
|
||||
bottom:10px;
|
||||
left:12px;
|
||||
font-size:1.2em;
|
||||
font-family:verdana,sens-serif;
|
||||
color:#dc143c;
|
||||
}
|
||||
.typo-suggest a.spell-corrected {
|
||||
text-decoration:underline;
|
||||
}
|
||||
.typo-suggest b {
|
||||
font-style:italic;
|
||||
font-weight:bold;
|
||||
}
|
||||
.ads {
|
||||
background-color:#FEFEEE;
|
||||
}
|
||||
#outputMod .wrapper {
|
||||
_padding-right:15px;
|
||||
}
|
||||
#addons {
|
||||
_padding-right:15px;
|
||||
}
|
||||
#microBlog {
|
||||
float:right;
|
||||
padding-right:5px;
|
||||
}
|
||||
#microBlog dd,#microBlog dt {
|
||||
float:left;
|
||||
padding-top:4px;
|
||||
height:20px;
|
||||
line-height:20px;
|
||||
}
|
||||
#microBlog dd {
|
||||
padding-top:4px;
|
||||
height:20px;
|
||||
}
|
||||
#microBlog .blog {
|
||||
display:inline-block;
|
||||
background:url('../images/fanyi/anyi_sprite.png') no-repeat;
|
||||
width:20px;
|
||||
height:20px;
|
||||
}
|
||||
#microBlog a.netease {
|
||||
background-position:-110px -69px;
|
||||
}
|
||||
#microBlog a.sina {
|
||||
background-position:-132px -69px;
|
||||
}
|
||||
#microBlog a.tencent {
|
||||
background-position:-155px -69px;
|
||||
}
|
||||
#microBlog a.kaixin001 {
|
||||
background-position:-177px -69px;
|
||||
}
|
||||
.fl {
|
||||
float:left;
|
||||
}
|
||||
.fr {
|
||||
float:right;
|
||||
}
|
31
contrib/iSenWeb/themes/styles/search.css
Executable file
@ -0,0 +1,31 @@
|
||||
/* TOP SEARCH */
|
||||
#ts{position:relative;float: right; font-size:10px;}
|
||||
/* query form */
|
||||
.fc,.aca,.qb,.rqb{background:url(/MosesServer-cgi/themes/images/search/s.png) no-repeat}
|
||||
.fc{position:relative;width:415px;height:33px;padding:2px 0 2px 2px;background-position:-3px -3px}
|
||||
.fc input{font-family:Arial,sans-serif;border:none}
|
||||
.qc{position:relative;float:left;width:325px;padding:3px 2px;border-right:1px solid #6a8aae}
|
||||
.q{width:294px;height:23px;padding:3px 0 0 2px;*margin:-1px 0;font-size:1.6em;background:transparent;*border:1px solid #fff;outline:none}
|
||||
.aca{position:absolute;right:2px;top:3px;width:26px;height:0;padding-top:26px;overflow:hidden;text-indent:-9999em;background-position:-415px -3px;cursor:pointer}
|
||||
.qb{width:81px;height:33px;padding:0 0 2px 1px;*padding:2px 0 0 1px;margin:0;_margin-left:-3px;font-weight:bold;font-size:1.4em;word-spacing:4px;color:#fff;background-position:right -50px;background-color:transparent;cursor:pointer}
|
||||
.no-suggest .q{width:320px}
|
||||
/* BOTTOM SEARCH */
|
||||
#bs{margin:15px 0 20px;font-size:10px;}
|
||||
#bs .q{width:320px}
|
||||
input.rqb{position:absolute;right:-110px;top:2px;width:102px;height:32px;padding-top:32px;overflow:hidden;text-indent:-9999em;background-color:transparent;background-position:left -50px;cursor:pointer}
|
||||
|
||||
|
||||
/* suggest */
|
||||
.sw{font-size:1.4em;border:1px solid #8cbbdd}
|
||||
.sw table{background:#fff;border-collapse:collapse}
|
||||
.remindtt75,.jstxlan{padding-left: .2em;font-size: 14px;height: 23px;line-height: 23px;}
|
||||
.remindtt752{padding:.2em;color:#808080;font-size:14px}
|
||||
.jstxlan{color:#808080;font-size:13px;cursor:pointer; float:right}
|
||||
.jstxhuitiaoyou{margin:-1px 0;border-top:1px solid #dbeffe;background:#eaf1fd}
|
||||
.aa_highlight{color:#fff;background:#3971bf}
|
||||
/* MODULES */
|
||||
.pm{display:none;width:70px;border:1px solid;font-size:13px;border-color:#8cbbdd;background:#fff}
|
||||
.pm ul{padding:0;margin:0;list-style:none}
|
||||
.pm a{display:block;padding:4px 3px;text-decoration:none;zoom:1}
|
||||
.pm a:hover{color:#fff;background:#3971bf}
|
||||
.pm .sl{height:0;margin:0 1px;*margin-top:-10px;font-size:0;border-bottom:1px solid #8cbbdd}
|
10
contrib/iSenWeb/trans_result.php
Executable file
@ -0,0 +1,10 @@
|
||||
<?php
|
||||
$result = "";
|
||||
$Content = $_POST['input1'];
|
||||
$ereg='/\n/';
|
||||
$arr_str = preg_split($ereg,$Content);
|
||||
foreach($arr_str as $value){
|
||||
$result = ` echo $value | nc 161.64.89.129 1986`;
|
||||
echo $result.'<br>';
|
||||
}
|
||||
?>
|
@ -43,11 +43,13 @@
|
||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
|
||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
|
||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
@ -55,35 +57,37 @@
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
<AdditionalLibraryDirectories>C:\boost\boost_1_47\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>zdll.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)/$(Configuration)/moses.lib;$(SolutionDir)/$(Configuration)/kenlm.lib;$(SolutionDir)/$(Configuration)/OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
<AdditionalLibraryDirectories>C:\boost\boost_1_47\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
@ -69,7 +69,7 @@
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
@ -77,21 +77,21 @@
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
<AdditionalIncludeDirectories>$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
292
contrib/other-builds/fuzzy-match.xcodeproj/project.pbxproj
Normal file
@ -0,0 +1,292 @@
|
||||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 46;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
1E42EFB615BEFAEB00E937EB /* fuzzy-match2.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */; };
|
||||
1E42EFB715BEFAEB00E937EB /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */; };
|
||||
1E42EFB815BEFAEB00E937EB /* SuffixArray.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DD015BED3D4001914A2 /* SuffixArray.h */; };
|
||||
1E42EFB915BEFAEB00E937EB /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */; };
|
||||
1E42EFBA15BEFAEB00E937EB /* Vocabulary.h in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCB15BED3AC001914A2 /* Vocabulary.h */; };
|
||||
1E806DCC15BED3AC001914A2 /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */; };
|
||||
1E806DD115BED3D4001914A2 /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */; };
|
||||
1ECD60A815C15E28004172A4 /* Util.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1ECD60A515C15D3A004172A4 /* Util.cpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
1E42EFAA15BEFAD300E937EB /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
dstPath = /usr/share/man/man1/;
|
||||
dstSubfolderSpec = 0;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 1;
|
||||
};
|
||||
1ED87EEB15BED331003E47AA /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
dstPath = /usr/share/man/man1/;
|
||||
dstSubfolderSpec = 0;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 1;
|
||||
};
|
||||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = "fuzzy-match2.cpp"; path = "../tm-mt-integration/fuzzy-match2.cpp"; sourceTree = "<group>"; };
|
||||
1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "fuzzy-match2"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1E42EFD115C00AC100E937EB /* fuzzy-match2.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = "fuzzy-match2.h"; path = "../tm-mt-integration/fuzzy-match2.h"; sourceTree = "<group>"; };
|
||||
1E42EFD215C00BAE00E937EB /* Util.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Util.h; path = "../tm-mt-integration/Util.h"; sourceTree = "<group>"; };
|
||||
1E42EFD315C00C0A00E937EB /* SentenceAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SentenceAlignment.h; path = "../tm-mt-integration/SentenceAlignment.h"; sourceTree = "<group>"; };
|
||||
1E42EFD715C00D6300E937EB /* Match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Match.h; path = "../tm-mt-integration/Match.h"; sourceTree = "<group>"; };
|
||||
1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocabulary.cpp; path = "../tm-mt-integration/Vocabulary.cpp"; sourceTree = "<group>"; };
|
||||
1E806DCB15BED3AC001914A2 /* Vocabulary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocabulary.h; path = "../tm-mt-integration/Vocabulary.h"; sourceTree = "<group>"; };
|
||||
1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SuffixArray.cpp; path = "../tm-mt-integration/SuffixArray.cpp"; sourceTree = "<group>"; };
|
||||
1E806DD015BED3D4001914A2 /* SuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SuffixArray.h; path = "../tm-mt-integration/SuffixArray.h"; sourceTree = "<group>"; };
|
||||
1ECD60A515C15D3A004172A4 /* Util.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Util.cpp; path = "../tm-mt-integration/Util.cpp"; sourceTree = "<group>"; };
|
||||
1ED87EED15BED331003E47AA /* fuzzy-match */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "fuzzy-match"; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
1E42EFA915BEFAD300E937EB /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
1ED87EEA15BED331003E47AA /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
1ED87EE215BED32F003E47AA = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1E42EFD715C00D6300E937EB /* Match.h */,
|
||||
1E42EFD315C00C0A00E937EB /* SentenceAlignment.h */,
|
||||
1E42EFD215C00BAE00E937EB /* Util.h */,
|
||||
1ECD60A515C15D3A004172A4 /* Util.cpp */,
|
||||
1E806DCF15BED3D4001914A2 /* SuffixArray.cpp */,
|
||||
1E806DD015BED3D4001914A2 /* SuffixArray.h */,
|
||||
1E42EFD115C00AC100E937EB /* fuzzy-match2.h */,
|
||||
1E42EFA515BEFABD00E937EB /* fuzzy-match2.cpp */,
|
||||
1E806DCA15BED3AC001914A2 /* Vocabulary.cpp */,
|
||||
1E806DCB15BED3AC001914A2 /* Vocabulary.h */,
|
||||
1ED87EEE15BED331003E47AA /* Products */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1ED87EEE15BED331003E47AA /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1ED87EED15BED331003E47AA /* fuzzy-match */,
|
||||
1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
1E42EFAB15BEFAD300E937EB /* fuzzy-match2 */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1E42EFB315BEFAD300E937EB /* Build configuration list for PBXNativeTarget "fuzzy-match2" */;
|
||||
buildPhases = (
|
||||
1E42EFA815BEFAD300E937EB /* Sources */,
|
||||
1E42EFA915BEFAD300E937EB /* Frameworks */,
|
||||
1E42EFAA15BEFAD300E937EB /* CopyFiles */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = "fuzzy-match2";
|
||||
productName = "fuzzy-match2";
|
||||
productReference = 1E42EFAC15BEFAD300E937EB /* fuzzy-match2 */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
1ED87EEC15BED331003E47AA /* fuzzy-match */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1ED87EF715BED331003E47AA /* Build configuration list for PBXNativeTarget "fuzzy-match" */;
|
||||
buildPhases = (
|
||||
1ED87EE915BED331003E47AA /* Sources */,
|
||||
1ED87EEA15BED331003E47AA /* Frameworks */,
|
||||
1ED87EEB15BED331003E47AA /* CopyFiles */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = "fuzzy-match";
|
||||
productName = "fuzzy-match";
|
||||
productReference = 1ED87EED15BED331003E47AA /* fuzzy-match */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
1ED87EE415BED32F003E47AA /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
buildConfigurationList = 1ED87EE715BED32F003E47AA /* Build configuration list for PBXProject "fuzzy-match" */;
|
||||
compatibilityVersion = "Xcode 3.2";
|
||||
developmentRegion = English;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
);
|
||||
mainGroup = 1ED87EE215BED32F003E47AA;
|
||||
productRefGroup = 1ED87EEE15BED331003E47AA /* Products */;
|
||||
projectDirPath = "";
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
1ED87EEC15BED331003E47AA /* fuzzy-match */,
|
||||
1E42EFAB15BEFAD300E937EB /* fuzzy-match2 */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
1E42EFA815BEFAD300E937EB /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1ECD60A815C15E28004172A4 /* Util.cpp in Sources */,
|
||||
1E42EFB615BEFAEB00E937EB /* fuzzy-match2.cpp in Sources */,
|
||||
1E42EFB715BEFAEB00E937EB /* SuffixArray.cpp in Sources */,
|
||||
1E42EFB815BEFAEB00E937EB /* SuffixArray.h in Sources */,
|
||||
1E42EFB915BEFAEB00E937EB /* Vocabulary.cpp in Sources */,
|
||||
1E42EFBA15BEFAEB00E937EB /* Vocabulary.h in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
1ED87EE915BED331003E47AA /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1E806DCC15BED3AC001914A2 /* Vocabulary.cpp in Sources */,
|
||||
1E806DD115BED3D4001914A2 /* SuffixArray.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
1E42EFB415BEFAD300E937EB /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1E42EFB515BEFAD300E937EB /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
1ED87EF515BED331003E47AA /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
|
||||
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.7;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = macosx;
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1ED87EF615BED331003E47AA /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
COPY_PHASE_STRIP = YES;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
|
||||
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.7;
|
||||
SDKROOT = macosx;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
1ED87EF815BED331003E47AA /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1ED87EF915BED331003E47AA /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
1E42EFB315BEFAD300E937EB /* Build configuration list for PBXNativeTarget "fuzzy-match2" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1E42EFB415BEFAD300E937EB /* Debug */,
|
||||
1E42EFB515BEFAD300E937EB /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
1ED87EE715BED32F003E47AA /* Build configuration list for PBXProject "fuzzy-match" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1ED87EF515BED331003E47AA /* Debug */,
|
||||
1ED87EF615BED331003E47AA /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
1ED87EF715BED331003E47AA /* Build configuration list for PBXNativeTarget "fuzzy-match" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1ED87EF815BED331003E47AA /* Debug */,
|
||||
1ED87EF915BED331003E47AA /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 1ED87EE415BED32F003E47AA /* Project object */;
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Bucket
|
||||
type = "1"
|
||||
version = "1.0">
|
||||
<FileBreakpoints>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/contrib/tm-mt-integration/fuzzy-match2.cpp"
|
||||
timestampString = "364996019.762643"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "456"
|
||||
endingLineNumber = "456"
|
||||
landmarkName = "create_extract(int sentenceInd, int cost, const vector< WORD_ID > &sourceSentence, const vector<SentenceAlignment> &targets, const string &inputStr, const string &path)"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
</FileBreakpoints>
|
||||
</Bucket>
|
@ -0,0 +1,78 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
|
||||
BuildableName = "fuzzy-match"
|
||||
BlueprintName = "fuzzy-match"
|
||||
ReferencedContainer = "container:fuzzy-match.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
buildConfiguration = "Debug">
|
||||
<Testables>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Debug">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
|
||||
BuildableName = "fuzzy-match"
|
||||
BlueprintName = "fuzzy-match"
|
||||
ReferencedContainer = "container:fuzzy-match.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
<CommandLineArguments>
|
||||
<CommandLineArgument
|
||||
argument = "--multiple /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/ac-test.input.tc.4 /Users/hieuhoang/workspace/experiment/data/tm-mt-integration//in/acquis.truecased.4.en.uniq"
|
||||
isEnabled = "YES">
|
||||
</CommandLineArgument>
|
||||
</CommandLineArguments>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Release">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1ED87EEC15BED331003E47AA"
|
||||
BuildableName = "fuzzy-match"
|
||||
BlueprintName = "fuzzy-match"
|
||||
ReferencedContainer = "container:fuzzy-match.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -0,0 +1,79 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
|
||||
BuildableName = "fuzzy-match2"
|
||||
BlueprintName = "fuzzy-match2"
|
||||
ReferencedContainer = "container:fuzzy-match.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
buildConfiguration = "Debug">
|
||||
<Testables>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "YES"
|
||||
customWorkingDirectory = "/Users/hieuhoang/unison/workspace/experiment/data/tm-mt-integration/in"
|
||||
buildConfiguration = "Debug">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
|
||||
BuildableName = "fuzzy-match2"
|
||||
BlueprintName = "fuzzy-match2"
|
||||
ReferencedContainer = "container:fuzzy-match.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
<CommandLineArguments>
|
||||
<CommandLineArgument
|
||||
argument = "--multiple ac-test.input.tc.4 acquis.truecased.4.en.uniq acquis.truecased.4.fr.uniq acquis.truecased.4.align.uniq"
|
||||
isEnabled = "YES">
|
||||
</CommandLineArgument>
|
||||
</CommandLineArguments>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Release">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1E42EFAB15BEFAD300E937EB"
|
||||
BuildableName = "fuzzy-match2"
|
||||
BlueprintName = "fuzzy-match2"
|
||||
ReferencedContainer = "container:fuzzy-match.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>SchemeUserState</key>
|
||||
<dict>
|
||||
<key>fuzzy-match.xcscheme</key>
|
||||
<dict>
|
||||
<key>orderHint</key>
|
||||
<integer>0</integer>
|
||||
</dict>
|
||||
<key>fuzzy-match2.xcscheme</key>
|
||||
<dict>
|
||||
<key>orderHint</key>
|
||||
<integer>1</integer>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>SuppressBuildableAutocreation</key>
|
||||
<dict>
|
||||
<key>1E42EFAB15BEFAD300E937EB</key>
|
||||
<dict>
|
||||
<key>primary</key>
|
||||
<true/>
|
||||
</dict>
|
||||
<key>1ED87EEC15BED331003E47AA</key>
|
||||
<dict>
|
||||
<key>primary</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -1,4 +1,4 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
@ -28,14 +28,12 @@
|
||||
<None Include="..\..\lm\max_order.hh" />
|
||||
<None Include="..\..\lm\model.hh" />
|
||||
<None Include="..\..\lm\model_type.hh" />
|
||||
<None Include="..\..\lm\ngram_query.hh" />
|
||||
<None Include="..\..\lm\quantize.hh" />
|
||||
<None Include="..\..\lm\README" />
|
||||
<None Include="..\..\lm\read_arpa.hh" />
|
||||
<None Include="..\..\lm\return.hh" />
|
||||
<None Include="..\..\lm\search_hashed.hh" />
|
||||
<None Include="..\..\lm\search_trie.hh" />
|
||||
<None Include="..\..\lm\state.hh" />
|
||||
<None Include="..\..\lm\test.arpa" />
|
||||
<None Include="..\..\lm\test.sh" />
|
||||
<None Include="..\..\lm\test_nounk.arpa" />
|
||||
@ -49,6 +47,8 @@
|
||||
<None Include="..\..\lm\word_index.hh" />
|
||||
<None Include="..\..\util\bit_packing.hh" />
|
||||
<None Include="..\..\util\check.hh" />
|
||||
<None Include="..\..\util\COPYING" />
|
||||
<None Include="..\..\util\COPYING.LESSER" />
|
||||
<None Include="..\..\util\ersatz_progress.hh" />
|
||||
<None Include="..\..\util\exception.hh" />
|
||||
<None Include="..\..\util\file.hh" />
|
||||
@ -68,7 +68,6 @@
|
||||
<None Include="..\..\util\sorted_uniform.hh" />
|
||||
<None Include="..\..\util\string_piece.hh" />
|
||||
<None Include="..\..\util\tokenize_piece.hh" />
|
||||
<None Include="..\..\util\usage.hh" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\lm\bhiksha.cc" />
|
||||
@ -78,7 +77,6 @@
|
||||
<ClCompile Include="..\..\lm\left_test.cc" />
|
||||
<ClCompile Include="..\..\lm\lm_exception.cc" />
|
||||
<ClCompile Include="..\..\lm\model.cc" />
|
||||
<ClCompile Include="..\..\lm\model_test.cc" />
|
||||
<ClCompile Include="..\..\lm\ngram_query.cc" />
|
||||
<ClCompile Include="..\..\lm\quantize.cc" />
|
||||
<ClCompile Include="..\..\lm\read_arpa.cc" />
|
||||
@ -97,7 +95,6 @@
|
||||
<ClCompile Include="..\..\util\getopt.c" />
|
||||
<ClCompile Include="..\..\util\mmap.cc" />
|
||||
<ClCompile Include="..\..\util\murmur_hash.cc" />
|
||||
<ClCompile Include="..\..\util\usage.cc" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}</ProjectGuid>
|
||||
@ -126,15 +123,20 @@
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<IncludePath>C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<IncludePath>C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
@ -149,8 +151,9 @@
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_LIB;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../..</AdditionalIncludeDirectories>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Windows</SubSystem>
|
||||
|
@ -539,6 +539,7 @@
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
EXECUTABLE_PREFIX = lib;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = "KENLM_MAX_ORDER=7";
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
|
||||
@ -556,6 +557,7 @@
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
EXECUTABLE_PREFIX = lib;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = "KENLM_MAX_ORDER=7";
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
"$(inherited)",
|
||||
"\"$(SRCROOT)/../../lm/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
|
||||
|
@ -0,0 +1,54 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1EE8C2E81476A48E002496F2"
|
||||
BuildableName = "liblm.a"
|
||||
BlueprintName = "lm"
|
||||
ReferencedContainer = "container:lm.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
buildConfiguration = "Debug">
|
||||
<Testables>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Debug">
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Release">
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>SchemeUserState</key>
|
||||
<dict>
|
||||
<key>lm.xcscheme</key>
|
||||
<dict>
|
||||
<key>orderHint</key>
|
||||
<integer>0</integer>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>SuppressBuildableAutocreation</key>
|
||||
<dict>
|
||||
<key>1EE8C2E81476A48E002496F2</key>
|
||||
<dict>
|
||||
<key>primary</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -7,6 +7,8 @@
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
1E1D826915AC641600FE42E9 /* extractor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E1D825915AC63ED00FE42E9 /* extractor.cpp */; };
|
||||
1E1D826A15AC642B00FE42E9 /* libmert_lib.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E2B6B141593A6F30028137E /* libmert_lib.a */; };
|
||||
1E2B6ADE1593A5500028137E /* mert.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E2B6ADD1593A5500028137E /* mert.cpp */; };
|
||||
1E2B6B1F1593CA8A0028137E /* libmert_lib.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1E2B6B141593A6F30028137E /* libmert_lib.a */; };
|
||||
/* End PBXBuildFile section */
|
||||
@ -22,6 +24,15 @@
|
||||
/* End PBXContainerItemProxy section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
1E1D825D15AC640800FE42E9 /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
dstPath = /usr/share/man/man1/;
|
||||
dstSubfolderSpec = 0;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 1;
|
||||
};
|
||||
1EB0AF031593A2180007E2A4 /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
@ -34,12 +45,22 @@
|
||||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
1E1D825915AC63ED00FE42E9 /* extractor.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = extractor.cpp; path = ../../mert/extractor.cpp; sourceTree = "<group>"; };
|
||||
1E1D825F15AC640800FE42E9 /* extractor */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = extractor; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1E2B6ADD1593A5500028137E /* mert.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = mert.cpp; path = ../../mert/mert.cpp; sourceTree = "<group>"; };
|
||||
1E2B6B0F1593A6F30028137E /* mert_lib.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = mert_lib.xcodeproj; sourceTree = "<group>"; };
|
||||
1EB0AF051593A2180007E2A4 /* mert */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = mert; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
1E1D825C15AC640800FE42E9 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1E1D826A15AC642B00FE42E9 /* libmert_lib.a in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
1EB0AF021593A2180007E2A4 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
@ -64,6 +85,7 @@
|
||||
children = (
|
||||
1E2B6B0F1593A6F30028137E /* mert_lib.xcodeproj */,
|
||||
1E2B6ADD1593A5500028137E /* mert.cpp */,
|
||||
1E1D825915AC63ED00FE42E9 /* extractor.cpp */,
|
||||
1EB0AF061593A2180007E2A4 /* Products */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
@ -72,6 +94,7 @@
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1EB0AF051593A2180007E2A4 /* mert */,
|
||||
1E1D825F15AC640800FE42E9 /* extractor */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
@ -79,6 +102,23 @@
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
1E1D825E15AC640800FE42E9 /* extractor */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1E1D826615AC640800FE42E9 /* Build configuration list for PBXNativeTarget "extractor" */;
|
||||
buildPhases = (
|
||||
1E1D825B15AC640800FE42E9 /* Sources */,
|
||||
1E1D825C15AC640800FE42E9 /* Frameworks */,
|
||||
1E1D825D15AC640800FE42E9 /* CopyFiles */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
);
|
||||
name = extractor;
|
||||
productName = extractor;
|
||||
productReference = 1E1D825F15AC640800FE42E9 /* extractor */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
1EB0AF041593A2180007E2A4 /* mert */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1EB0AF0F1593A2180007E2A4 /* Build configuration list for PBXNativeTarget "mert" */;
|
||||
@ -120,6 +160,7 @@
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
1EB0AF041593A2180007E2A4 /* mert */,
|
||||
1E1D825E15AC640800FE42E9 /* extractor */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
@ -135,6 +176,14 @@
|
||||
/* End PBXReferenceProxy section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
1E1D825B15AC640800FE42E9 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1E1D826915AC641600FE42E9 /* extractor.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
1EB0AF011593A2180007E2A4 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
@ -146,6 +195,28 @@
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
1E1D826715AC640800FE42E9 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../..,
|
||||
/opt/local/include,
|
||||
);
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1E1D826815AC640800FE42E9 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../..,
|
||||
/opt/local/include,
|
||||
);
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
1EB0AF0D1593A2180007E2A4 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
@ -234,6 +305,15 @@
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
1E1D826615AC640800FE42E9 /* Build configuration list for PBXNativeTarget "extractor" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1E1D826715AC640800FE42E9 /* Debug */,
|
||||
1E1D826815AC640800FE42E9 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
1EB0AEFF1593A2180007E2A4 /* Build configuration list for PBXProject "mert" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
|
7
contrib/other-builds/mert.xcodeproj/project.xcworkspace/contents.xcworkspacedata
generated
Normal file
@ -0,0 +1,7 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Workspace
|
||||
version = "1.0">
|
||||
<FileRef
|
||||
location = "self:mert.xcodeproj">
|
||||
</FileRef>
|
||||
</Workspace>
|
@ -0,0 +1,35 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Bucket
|
||||
type = "1"
|
||||
version = "1.0">
|
||||
<FileBreakpoints>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp"
|
||||
timestampString = "363625029.073606"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "316"
|
||||
endingLineNumber = "316"
|
||||
landmarkName = "main(int argc, char **argv)"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
<FileBreakpoint
|
||||
shouldBeEnabled = "Yes"
|
||||
ignoreCount = "0"
|
||||
continueAfterRunningActions = "No"
|
||||
isPathRelative = "0"
|
||||
filePath = "/Users/hieuhoang/unison/workspace/github/hieuhoang/mert/mert.cpp"
|
||||
timestampString = "363625081.848519"
|
||||
startingColumnNumber = "9223372036854775807"
|
||||
endingColumnNumber = "9223372036854775807"
|
||||
startingLineNumber = "326"
|
||||
endingLineNumber = "326"
|
||||
landmarkName = "main(int argc, char **argv)"
|
||||
landmarkType = "7">
|
||||
</FileBreakpoint>
|
||||
</FileBreakpoints>
|
||||
</Bucket>
|
@ -0,0 +1,72 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
|
||||
BuildableName = "extractor"
|
||||
BlueprintName = "extractor"
|
||||
ReferencedContainer = "container:mert.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
buildConfiguration = "Debug">
|
||||
<Testables>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Debug">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
|
||||
BuildableName = "extractor"
|
||||
BlueprintName = "extractor"
|
||||
ReferencedContainer = "container:mert.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Release">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1E1D825E15AC640800FE42E9"
|
||||
BuildableName = "extractor"
|
||||
BlueprintName = "extractor"
|
||||
ReferencedContainer = "container:mert.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -0,0 +1,72 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
|
||||
BuildableName = "mert"
|
||||
BlueprintName = "mert"
|
||||
ReferencedContainer = "container:mert.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
buildConfiguration = "Debug">
|
||||
<Testables>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Debug">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
|
||||
BuildableName = "mert"
|
||||
BlueprintName = "mert"
|
||||
ReferencedContainer = "container:mert.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Release">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1EB0AF041593A2180007E2A4"
|
||||
BuildableName = "mert"
|
||||
BlueprintName = "mert"
|
||||
ReferencedContainer = "container:mert.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -0,0 +1,32 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>SchemeUserState</key>
|
||||
<dict>
|
||||
<key>extractor.xcscheme</key>
|
||||
<dict>
|
||||
<key>orderHint</key>
|
||||
<integer>1</integer>
|
||||
</dict>
|
||||
<key>mert.xcscheme</key>
|
||||
<dict>
|
||||
<key>orderHint</key>
|
||||
<integer>2</integer>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>SuppressBuildableAutocreation</key>
|
||||
<dict>
|
||||
<key>1E1D825E15AC640800FE42E9</key>
|
||||
<dict>
|
||||
<key>primary</key>
|
||||
<true/>
|
||||
</dict>
|
||||
<key>1EB0AF041593A2180007E2A4</key>
|
||||
<dict>
|
||||
<key>primary</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -0,0 +1,54 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "1E2CCF3215939E2D00D858D1"
|
||||
BuildableName = "libmert_lib.a"
|
||||
BlueprintName = "mert_lib"
|
||||
ReferencedContainer = "container:mert_lib.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
buildConfiguration = "Debug">
|
||||
<Testables>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Debug">
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Release">
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>SchemeUserState</key>
|
||||
<dict>
|
||||
<key>mert_lib.xcscheme</key>
|
||||
<dict>
|
||||
<key>orderHint</key>
|
||||
<integer>0</integer>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>SuppressBuildableAutocreation</key>
|
||||
<dict>
|
||||
<key>1E2CCF3215939E2D00D858D1</key>
|
||||
<dict>
|
||||
<key>primary</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -308,6 +308,7 @@
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
../../cmph/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
@ -318,6 +319,9 @@
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lboost_thread-mt",
|
||||
"-lboost_filesystem-mt",
|
||||
"-lboost_system-mt",
|
||||
"-lcmph",
|
||||
);
|
||||
PRODUCT_NAME = "moses-chart-cmd";
|
||||
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
|
||||
@ -341,6 +345,7 @@
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
../../cmph/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
@ -351,6 +356,9 @@
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lboost_thread-mt",
|
||||
"-lboost_filesystem-mt",
|
||||
"-lboost_system-mt",
|
||||
"-lcmph",
|
||||
);
|
||||
PRODUCT_NAME = "moses-chart-cmd";
|
||||
USER_HEADER_SEARCH_PATHS = "../../ ../../moses/src";
|
||||
|
@ -43,12 +43,16 @@
|
||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
|
||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
|
||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
<LibraryPath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47\lib;$(LibraryPath)</LibraryPath>
|
||||
<LibraryPath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47\lib;$(LibraryPath)</LibraryPath>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
@ -58,19 +62,20 @@
|
||||
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention>
|
||||
</DataExecutionPrevention>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
<AdditionalLibraryDirectories>C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
@ -78,7 +83,7 @@
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>zdll.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies>C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
@ -87,6 +92,7 @@
|
||||
<DataExecutionPrevention>
|
||||
</DataExecutionPrevention>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
<AdditionalLibraryDirectories>C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
|
@ -326,15 +326,20 @@
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
../../cmph/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lflm",
|
||||
"-lmisc",
|
||||
"-loolm",
|
||||
"-ldstruct",
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lmisc",
|
||||
"-ldstruct",
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lboost_thread-mt",
|
||||
"-lboost_filesystem-mt",
|
||||
"-lboost_system-mt",
|
||||
"-lcmph",
|
||||
);
|
||||
PREBINDING = NO;
|
||||
PRODUCT_NAME = "moses-cmd";
|
||||
@ -369,15 +374,20 @@
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
../../cmph/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lflm",
|
||||
"-lmisc",
|
||||
"-loolm",
|
||||
"-ldstruct",
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lmisc",
|
||||
"-ldstruct",
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lboost_thread-mt",
|
||||
"-lboost_filesystem-mt",
|
||||
"-lboost_system-mt",
|
||||
"-lcmph",
|
||||
);
|
||||
PREBINDING = NO;
|
||||
PRODUCT_NAME = "moses-cmd";
|
||||
@ -409,15 +419,20 @@
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
/opt/local/lib,
|
||||
../../cmph/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lflm",
|
||||
"-lmisc",
|
||||
"-loolm",
|
||||
"-ldstruct",
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lmisc",
|
||||
"-ldstruct",
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lboost_thread-mt",
|
||||
"-lboost_filesystem-mt",
|
||||
"-lboost_system-mt",
|
||||
"-lcmph",
|
||||
);
|
||||
PREBINDING = NO;
|
||||
PRODUCT_NAME = "moses-cmd";
|
||||
|
@ -0,0 +1,72 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Scheme
|
||||
version = "1.3">
|
||||
<BuildAction
|
||||
parallelizeBuildables = "YES"
|
||||
buildImplicitDependencies = "YES">
|
||||
<BuildActionEntries>
|
||||
<BuildActionEntry
|
||||
buildForTesting = "YES"
|
||||
buildForRunning = "YES"
|
||||
buildForProfiling = "YES"
|
||||
buildForArchiving = "YES"
|
||||
buildForAnalyzing = "YES">
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "8DD76F620486A84900D96B5E"
|
||||
BuildableName = "moses-cmd"
|
||||
BlueprintName = "moses-cmd"
|
||||
ReferencedContainer = "container:moses-cmd.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildActionEntry>
|
||||
</BuildActionEntries>
|
||||
</BuildAction>
|
||||
<TestAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
buildConfiguration = "Debug">
|
||||
<Testables>
|
||||
</Testables>
|
||||
</TestAction>
|
||||
<LaunchAction
|
||||
selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.GDB"
|
||||
selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.GDB"
|
||||
launchStyle = "0"
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Debug">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "8DD76F620486A84900D96B5E"
|
||||
BuildableName = "moses-cmd"
|
||||
BlueprintName = "moses-cmd"
|
||||
ReferencedContainer = "container:moses-cmd.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
<AdditionalOptions>
|
||||
</AdditionalOptions>
|
||||
</LaunchAction>
|
||||
<ProfileAction
|
||||
shouldUseLaunchSchemeArgsEnv = "YES"
|
||||
savedToolIdentifier = ""
|
||||
useCustomWorkingDirectory = "NO"
|
||||
buildConfiguration = "Release">
|
||||
<BuildableProductRunnable>
|
||||
<BuildableReference
|
||||
BuildableIdentifier = "primary"
|
||||
BlueprintIdentifier = "8DD76F620486A84900D96B5E"
|
||||
BuildableName = "moses-cmd"
|
||||
BlueprintName = "moses-cmd"
|
||||
ReferencedContainer = "container:moses-cmd.xcodeproj">
|
||||
</BuildableReference>
|
||||
</BuildableProductRunnable>
|
||||
</ProfileAction>
|
||||
<AnalyzeAction
|
||||
buildConfiguration = "Debug">
|
||||
</AnalyzeAction>
|
||||
<ArchiveAction
|
||||
buildConfiguration = "Release"
|
||||
revealArchiveInOrganizer = "YES">
|
||||
</ArchiveAction>
|
||||
</Scheme>
|
@ -0,0 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
||||
<plist version="1.0">
|
||||
<dict>
|
||||
<key>SchemeUserState</key>
|
||||
<dict>
|
||||
<key>moses-cmd.xcscheme</key>
|
||||
<dict>
|
||||
<key>orderHint</key>
|
||||
<integer>2</integer>
|
||||
</dict>
|
||||
</dict>
|
||||
<key>SuppressBuildableAutocreation</key>
|
||||
<dict>
|
||||
<key>8DD76F620486A84900D96B5E</key>
|
||||
<dict>
|
||||
<key>primary</key>
|
||||
<true/>
|
||||
</dict>
|
||||
</dict>
|
||||
</dict>
|
||||
</plist>
|
@ -20,6 +20,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "CreateOnDisk", "CreateOnDis
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "kenlm", "kenlm.vcxproj", "{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mosesserver", "mosesserver.vcxproj", "{85811FDF-8AD1-4490-A545-B2F51931A18C}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
@ -46,6 +48,10 @@ Global
|
||||
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{A5402E0B-6ED7-465C-9669-E4124A0CDDCB}.Release|Win32.Build.0 = Release|Win32
|
||||
{85811FDF-8AD1-4490-A545-B2F51931A18C}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{85811FDF-8AD1-4490-A545-B2F51931A18C}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{85811FDF-8AD1-4490-A545-B2F51931A18C}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{85811FDF-8AD1-4490-A545-B2F51931A18C}.Release|Win32.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
@ -13,6 +13,7 @@
|
||||
<ItemGroup>
|
||||
<ClInclude Include="..\..\moses\src\AlignmentInfo.h" />
|
||||
<ClInclude Include="..\..\moses\src\AlignmentInfoCollection.h" />
|
||||
<ClInclude Include="..\..\moses\src\BilingualDynSuffixArray.h" />
|
||||
<ClInclude Include="..\..\moses\src\BitmapContainer.h" />
|
||||
<ClInclude Include="..\..\moses\src\CellCollection.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartCell.h" />
|
||||
@ -22,7 +23,6 @@
|
||||
<ClInclude Include="..\..\moses\src\ChartHypothesis.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartHypothesisCollection.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartManager.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartRuleLookupManager.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartTranslationOption.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartTranslationOptionCollection.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartTranslationOptionList.h" />
|
||||
@ -32,6 +32,12 @@
|
||||
<ClInclude Include="..\..\moses\src\ChartTrellisPath.h" />
|
||||
<ClInclude Include="..\..\moses\src\ChartTrellisPathList.h" />
|
||||
<ClInclude Include="..\..\moses\src\ConfusionNet.h" />
|
||||
<ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerCYKPlus.h" />
|
||||
<ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerMemory.h" />
|
||||
<ClInclude Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerOnDisk.h" />
|
||||
<ClInclude Include="..\..\moses\src\CYKPlusParser\DotChart.h" />
|
||||
<ClInclude Include="..\..\moses\src\CYKPlusParser\DotChartInMemory.h" />
|
||||
<ClInclude Include="..\..\moses\src\CYKPlusParser\DotChartOnDisk.h" />
|
||||
<ClInclude Include="..\..\moses\src\DecodeFeature.h" />
|
||||
<ClInclude Include="..\..\moses\src\DecodeGraph.h" />
|
||||
<ClInclude Include="..\..\moses\src\DecodeStep.h" />
|
||||
@ -39,6 +45,11 @@
|
||||
<ClInclude Include="..\..\moses\src\DecodeStepTranslation.h" />
|
||||
<ClInclude Include="..\..\moses\src\Dictionary.h" />
|
||||
<ClInclude Include="..\..\moses\src\DummyScoreProducers.h" />
|
||||
<ClInclude Include="..\..\moses\src\DynSAInclude\file.h" />
|
||||
<ClInclude Include="..\..\moses\src\DynSAInclude\FileHandler.h" />
|
||||
<ClInclude Include="..\..\moses\src\DynSAInclude\onlineRLM.h" />
|
||||
<ClInclude Include="..\..\moses\src\DynSAInclude\quantizer.h" />
|
||||
<ClInclude Include="..\..\moses\src\DynSAInclude\vocab.h" />
|
||||
<ClInclude Include="..\..\moses\src\DynSuffixArray.h" />
|
||||
<ClInclude Include="..\..\moses\src\Factor.h" />
|
||||
<ClInclude Include="..\..\moses\src\FactorCollection.h" />
|
||||
@ -68,6 +79,7 @@
|
||||
<ClInclude Include="..\..\moses\src\LM\Joint.h" />
|
||||
<ClInclude Include="..\..\moses\src\LM\Ken.h" />
|
||||
<ClInclude Include="..\..\moses\src\LM\MultiFactor.h" />
|
||||
<ClInclude Include="..\..\moses\src\LM\ORLM.h" />
|
||||
<ClInclude Include="..\..\moses\src\LM\SingleFactor.h" />
|
||||
<ClInclude Include="..\..\moses\src\LVoc.h" />
|
||||
<ClInclude Include="..\..\moses\src\Manager.h" />
|
||||
@ -97,13 +109,29 @@
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\LoaderFactory.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\LoaderHiero.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\LoaderStandard.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryALSuffixArray.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryNodeSCFG.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionaryOnDisk.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\PhraseDictionarySCFG.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\Trie.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\UTrie.h" />
|
||||
<ClInclude Include="..\..\moses\src\RuleTable\UTrieNode.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\ApplicableRuleTrie.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\IntermediateVarSpanNode.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\Parser.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\SentenceMap.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\StackLattice.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\StackLatticeBuilder.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\StackLatticeSearcher.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\VarSpanNode.h" />
|
||||
<ClInclude Include="..\..\moses\src\Scope3Parser\VarSpanTrieBuilder.h" />
|
||||
<ClInclude Include="..\..\moses\src\ScoreComponentCollection.h" />
|
||||
<ClInclude Include="..\..\moses\src\ScoreIndexManager.h" />
|
||||
<ClInclude Include="..\..\moses\src\ScoreProducer.h" />
|
||||
<ClInclude Include="..\..\moses\src\Search.h" />
|
||||
<ClInclude Include="..\..\moses\src\SearchCubePruning.h" />
|
||||
<ClInclude Include="..\..\moses\src\SearchNormal.h" />
|
||||
<ClInclude Include="..\..\moses\src\SearchNormalBatch.h" />
|
||||
<ClInclude Include="..\..\moses\src\Sentence.h" />
|
||||
<ClInclude Include="..\..\moses\src\SentenceStats.h" />
|
||||
<ClInclude Include="..\..\moses\src\SquareMatrix.h" />
|
||||
@ -135,6 +163,7 @@
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\..\moses\src\AlignmentInfo.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\AlignmentInfoCollection.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\BilingualDynSuffixArray.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\BitmapContainer.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\ChartCell.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\ChartCellCollection.cpp" />
|
||||
@ -149,6 +178,11 @@
|
||||
<ClCompile Include="..\..\moses\src\ChartTrellisNode.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\ChartTrellisPath.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\ConfusionNet.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerCYKPlus.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerMemory.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\CYKPlusParser\ChartRuleLookupManagerOnDisk.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\CYKPlusParser\DotChartInMemory.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\CYKPlusParser\DotChartOnDisk.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\DecodeFeature.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\DecodeGraph.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\DecodeStep.cpp" />
|
||||
@ -156,6 +190,8 @@
|
||||
<ClCompile Include="..\..\moses\src\DecodeStepTranslation.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Dictionary.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\DummyScoreProducers.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\DynSAInclude\FileHandler.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\DynSAInclude\vocab.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\DynSuffixArray.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Factor.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\FactorCollection.cpp" />
|
||||
@ -183,6 +219,7 @@
|
||||
<ClCompile Include="..\..\moses\src\LM\Joint.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\LM\Ken.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\LM\MultiFactor.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\LM\ORLM.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\LM\SingleFactor.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\LVoc.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Manager.cpp" />
|
||||
@ -207,13 +244,24 @@
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\LoaderFactory.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\LoaderHiero.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\LoaderStandard.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryALSuffixArray.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryNodeSCFG.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionaryOnDisk.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\PhraseDictionarySCFG.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\Trie.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\UTrie.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\RuleTable\UTrieNode.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Scope3Parser\ApplicableRuleTrie.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Scope3Parser\Parser.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Scope3Parser\StackLatticeBuilder.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Scope3Parser\VarSpanTrieBuilder.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\ScoreComponentCollection.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\ScoreIndexManager.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\ScoreProducer.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Search.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\SearchCubePruning.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\SearchNormal.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\SearchNormalBatch.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\Sentence.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\SentenceStats.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\SquareMatrix.cpp" />
|
||||
@ -239,6 +287,9 @@
|
||||
<ClCompile Include="..\..\moses\src\WordsRange.cpp" />
|
||||
<ClCompile Include="..\..\moses\src\XmlOption.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="..\..\util\file.hh" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{8122157A-0DE5-44FF-8E5B-024ED6ACE7AF}</ProjectGuid>
|
||||
<RootNamespace>moses</RootNamespace>
|
||||
@ -270,17 +321,17 @@
|
||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
|
||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
|
||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;C:\GnuWin32\include;$(IncludePath)</IncludePath>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;C:\GnuWin32\include;$(IncludePath)</IncludePath>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebug</RuntimeLibrary>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
@ -295,9 +346,9 @@
|
||||
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
|
||||
<AdditionalIncludeDirectories>C:\Program Files\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
|
||||
<AdditionalIncludeDirectories>C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../../;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;LM_INTERNAL;TRACE_ENABLE;_CRT_SECURE_NO_DEPRECATE;_SCL_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
|
@ -7,6 +7,36 @@
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
1E0BA41815B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */; };
|
||||
1E0BA41915B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */; };
|
||||
1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */; };
|
||||
1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E1D823F15AC29BB00FE42E9 /* FileHandler.h */; };
|
||||
1E619EA115B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */; };
|
||||
1E619EA215B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */; };
|
||||
1E6D9FD615D027560064D436 /* BlockHashIndex.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */; };
|
||||
1E6D9FD715D027560064D436 /* BlockHashIndex.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FBE15D027560064D436 /* BlockHashIndex.h */; };
|
||||
1E6D9FD815D027560064D436 /* CanonicalHuffman.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */; };
|
||||
1E6D9FD915D027560064D436 /* CmphStringVectorAdapter.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */; };
|
||||
1E6D9FDA15D027560064D436 /* CmphStringVectorAdapter.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */; };
|
||||
1E6D9FDB15D027560064D436 /* ConsistantPhrases.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC215D027560064D436 /* ConsistantPhrases.h */; };
|
||||
1E6D9FDD15D027560064D436 /* LexicalReorderingTableCompact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */; };
|
||||
1E6D9FDE15D027560064D436 /* LexicalReorderingTableCompact.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */; };
|
||||
1E6D9FDF15D027560064D436 /* LexicalReorderingTableCreator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */; };
|
||||
1E6D9FE015D027560064D436 /* LexicalReorderingTableCreator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */; };
|
||||
1E6D9FE115D027560064D436 /* ListCoders.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC815D027560064D436 /* ListCoders.h */; };
|
||||
1E6D9FE215D027560064D436 /* MmapAllocator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FC915D027560064D436 /* MmapAllocator.h */; };
|
||||
1E6D9FE315D027560064D436 /* MonotonicVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCA15D027560064D436 /* MonotonicVector.h */; };
|
||||
1E6D9FE415D027560064D436 /* MurmurHash3.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */; };
|
||||
1E6D9FE515D027560064D436 /* MurmurHash3.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCC15D027560064D436 /* MurmurHash3.h */; };
|
||||
1E6D9FE615D027560064D436 /* PackedArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCD15D027560064D436 /* PackedArray.h */; };
|
||||
1E6D9FE715D027560064D436 /* PhraseDecoder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */; };
|
||||
1E6D9FE815D027560064D436 /* PhraseDecoder.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FCF15D027560064D436 /* PhraseDecoder.h */; };
|
||||
1E6D9FE915D027560064D436 /* PhraseDictionaryCompact.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */; };
|
||||
1E6D9FEA15D027560064D436 /* PhraseDictionaryCompact.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */; };
|
||||
1E6D9FEB15D027560064D436 /* PhraseTableCreator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */; };
|
||||
1E6D9FEC15D027560064D436 /* PhraseTableCreator.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD315D027560064D436 /* PhraseTableCreator.h */; };
|
||||
1E6D9FED15D027560064D436 /* StringVector.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD415D027560064D436 /* StringVector.h */; };
|
||||
1E6D9FEE15D027560064D436 /* TargetPhraseCollectionCache.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */; };
|
||||
1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */; };
|
||||
1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */ = {isa = PBXBuildFile; fileRef = 1E879EA615A346F90051F346 /* SearchNormalBatch.h */; };
|
||||
1EAC363514CDC79300DF97C3 /* Loader.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC362C14CDC79300DF97C3 /* Loader.h */; };
|
||||
@ -18,6 +48,8 @@
|
||||
1EAC363B14CDC79300DF97C3 /* LoaderHiero.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC363214CDC79300DF97C3 /* LoaderHiero.h */; };
|
||||
1EAC363C14CDC79300DF97C3 /* LoaderStandard.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EAC363314CDC79300DF97C3 /* LoaderStandard.cpp */; };
|
||||
1EAC363D14CDC79300DF97C3 /* LoaderStandard.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EAC363414CDC79300DF97C3 /* LoaderStandard.h */; };
|
||||
1EC32DB815D2D90700A313B1 /* ThrowingFwrite.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */; };
|
||||
1EC32DB915D2D90700A313B1 /* ThrowingFwrite.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */; };
|
||||
1EC7374614B977AB00238410 /* AlignmentInfo.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D314B977AA00238410 /* AlignmentInfo.cpp */; };
|
||||
1EC7374714B977AB00238410 /* AlignmentInfo.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735D414B977AA00238410 /* AlignmentInfo.h */; };
|
||||
1EC7374814B977AB00238410 /* AlignmentInfoCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D514B977AA00238410 /* AlignmentInfoCollection.cpp */; };
|
||||
@ -26,7 +58,6 @@
|
||||
1EC7374B14B977AB00238410 /* BilingualDynSuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */; };
|
||||
1EC7374C14B977AB00238410 /* BitmapContainer.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735D914B977AA00238410 /* BitmapContainer.cpp */; };
|
||||
1EC7374D14B977AB00238410 /* BitmapContainer.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DA14B977AA00238410 /* BitmapContainer.h */; };
|
||||
1EC7374E14B977AB00238410 /* CellCollection.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DB14B977AA00238410 /* CellCollection.h */; };
|
||||
1EC7374F14B977AB00238410 /* ChartCell.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735DC14B977AA00238410 /* ChartCell.cpp */; };
|
||||
1EC7375014B977AB00238410 /* ChartCell.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC735DD14B977AA00238410 /* ChartCell.h */; };
|
||||
1EC7375114B977AB00238410 /* ChartCellCollection.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */; };
|
||||
@ -72,8 +103,6 @@
|
||||
1EC7378414B977AB00238410 /* DummyScoreProducers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361114B977AA00238410 /* DummyScoreProducers.cpp */; };
|
||||
1EC7378514B977AB00238410 /* DummyScoreProducers.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361214B977AA00238410 /* DummyScoreProducers.h */; };
|
||||
1EC7378614B977AB00238410 /* fdstream.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361414B977AA00238410 /* fdstream.h */; };
|
||||
1EC7378714B977AB00238410 /* file.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361514B977AA00238410 /* file.cpp */; };
|
||||
1EC7378814B977AB00238410 /* file.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361614B977AA00238410 /* file.h */; };
|
||||
1EC7378914B977AB00238410 /* hash.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361714B977AA00238410 /* hash.h */; };
|
||||
1EC7378A14B977AB00238410 /* onlineRLM.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EC7361914B977AA00238410 /* onlineRLM.h */; };
|
||||
1EC7378B14B977AB00238410 /* params.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EC7361B14B977AA00238410 /* params.cpp */; };
|
||||
@ -295,12 +324,51 @@
|
||||
1EDA809114D19FBF003D2191 /* UTrie.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EDA808314D19FBF003D2191 /* UTrie.h */; };
|
||||
1EDA809214D19FBF003D2191 /* UTrieNode.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EDA808414D19FBF003D2191 /* UTrieNode.cpp */; };
|
||||
1EDA809314D19FBF003D2191 /* UTrieNode.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EDA808514D19FBF003D2191 /* UTrieNode.h */; };
|
||||
1EE418ED15C7FDCB0028F9AB /* Match.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E415C7FDCB0028F9AB /* Match.h */; };
|
||||
1EE418EE15C7FDCB0028F9AB /* SentenceAlignment.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */; };
|
||||
1EE418EF15C7FDCB0028F9AB /* SentenceAlignment.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */; };
|
||||
1EE418F015C7FDCB0028F9AB /* SuffixArray.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */; };
|
||||
1EE418F115C7FDCB0028F9AB /* SuffixArray.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418E815C7FDCB0028F9AB /* SuffixArray.h */; };
|
||||
1EE418F215C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */; };
|
||||
1EE418F315C7FDCB0028F9AB /* FuzzyMatchWrapper.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */; };
|
||||
1EE418F415C7FDCB0028F9AB /* Vocabulary.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */; };
|
||||
1EE418F515C7FDCB0028F9AB /* Vocabulary.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */; };
|
||||
1EF0709314B9EFCC0052152A /* ParallelBackoff.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF0709114B9EFCC0052152A /* ParallelBackoff.cpp */; };
|
||||
1EF0709414B9EFCC0052152A /* ParallelBackoff.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EF0709214B9EFCC0052152A /* ParallelBackoff.h */; };
|
||||
1EF8F2C4159A61970047B613 /* HypoList.h in Headers */ = {isa = PBXBuildFile; fileRef = 1EF8F2C3159A61970047B613 /* HypoList.h */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryFuzzyMatch.cpp; path = ../../moses/src/RuleTable/PhraseDictionaryFuzzyMatch.cpp; sourceTree = "<group>"; };
|
||||
1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryFuzzyMatch.h; path = ../../moses/src/RuleTable/PhraseDictionaryFuzzyMatch.h; sourceTree = "<group>"; };
|
||||
1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = FileHandler.cpp; sourceTree = "<group>"; };
|
||||
1E1D823F15AC29BB00FE42E9 /* FileHandler.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = FileHandler.h; sourceTree = "<group>"; };
|
||||
1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartRuleLookupManagerMemoryPerSentence.cpp; path = ../../moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.cpp; sourceTree = "<group>"; };
|
||||
1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartRuleLookupManagerMemoryPerSentence.h; path = ../../moses/src/CYKPlusParser/ChartRuleLookupManagerMemoryPerSentence.h; sourceTree = "<group>"; };
|
||||
1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BlockHashIndex.cpp; path = ../../moses/src/CompactPT/BlockHashIndex.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FBE15D027560064D436 /* BlockHashIndex.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BlockHashIndex.h; path = ../../moses/src/CompactPT/BlockHashIndex.h; sourceTree = "<group>"; };
|
||||
1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CanonicalHuffman.h; path = ../../moses/src/CompactPT/CanonicalHuffman.h; sourceTree = "<group>"; };
|
||||
1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = CmphStringVectorAdapter.cpp; path = ../../moses/src/CompactPT/CmphStringVectorAdapter.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CmphStringVectorAdapter.h; path = ../../moses/src/CompactPT/CmphStringVectorAdapter.h; sourceTree = "<group>"; };
|
||||
1E6D9FC215D027560064D436 /* ConsistantPhrases.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ConsistantPhrases.h; path = ../../moses/src/CompactPT/ConsistantPhrases.h; sourceTree = "<group>"; };
|
||||
1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReorderingTableCompact.cpp; path = ../../moses/src/CompactPT/LexicalReorderingTableCompact.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LexicalReorderingTableCompact.h; path = ../../moses/src/CompactPT/LexicalReorderingTableCompact.h; sourceTree = "<group>"; };
|
||||
1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LexicalReorderingTableCreator.cpp; path = ../../moses/src/CompactPT/LexicalReorderingTableCreator.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LexicalReorderingTableCreator.h; path = ../../moses/src/CompactPT/LexicalReorderingTableCreator.h; sourceTree = "<group>"; };
|
||||
1E6D9FC815D027560064D436 /* ListCoders.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ListCoders.h; path = ../../moses/src/CompactPT/ListCoders.h; sourceTree = "<group>"; };
|
||||
1E6D9FC915D027560064D436 /* MmapAllocator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MmapAllocator.h; path = ../../moses/src/CompactPT/MmapAllocator.h; sourceTree = "<group>"; };
|
||||
1E6D9FCA15D027560064D436 /* MonotonicVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MonotonicVector.h; path = ../../moses/src/CompactPT/MonotonicVector.h; sourceTree = "<group>"; };
|
||||
1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = MurmurHash3.cpp; path = ../../moses/src/CompactPT/MurmurHash3.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FCC15D027560064D436 /* MurmurHash3.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = MurmurHash3.h; path = ../../moses/src/CompactPT/MurmurHash3.h; sourceTree = "<group>"; };
|
||||
1E6D9FCD15D027560064D436 /* PackedArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PackedArray.h; path = ../../moses/src/CompactPT/PackedArray.h; sourceTree = "<group>"; };
|
||||
1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDecoder.cpp; path = ../../moses/src/CompactPT/PhraseDecoder.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FCF15D027560064D436 /* PhraseDecoder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDecoder.h; path = ../../moses/src/CompactPT/PhraseDecoder.h; sourceTree = "<group>"; };
|
||||
1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseDictionaryCompact.cpp; path = ../../moses/src/CompactPT/PhraseDictionaryCompact.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseDictionaryCompact.h; path = ../../moses/src/CompactPT/PhraseDictionaryCompact.h; sourceTree = "<group>"; };
|
||||
1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = PhraseTableCreator.cpp; path = ../../moses/src/CompactPT/PhraseTableCreator.cpp; sourceTree = "<group>"; };
|
||||
1E6D9FD315D027560064D436 /* PhraseTableCreator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = PhraseTableCreator.h; path = ../../moses/src/CompactPT/PhraseTableCreator.h; sourceTree = "<group>"; };
|
||||
1E6D9FD415D027560064D436 /* StringVector.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = StringVector.h; path = ../../moses/src/CompactPT/StringVector.h; sourceTree = "<group>"; };
|
||||
1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = TargetPhraseCollectionCache.h; path = ../../moses/src/CompactPT/TargetPhraseCollectionCache.h; sourceTree = "<group>"; };
|
||||
1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SearchNormalBatch.cpp; path = ../../moses/src/SearchNormalBatch.cpp; sourceTree = "<group>"; };
|
||||
1E879EA615A346F90051F346 /* SearchNormalBatch.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SearchNormalBatch.h; path = ../../moses/src/SearchNormalBatch.h; sourceTree = "<group>"; };
|
||||
1EAC362C14CDC79300DF97C3 /* Loader.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Loader.h; path = ../../moses/src/RuleTable/Loader.h; sourceTree = "<group>"; };
|
||||
@ -312,6 +380,8 @@
|
||||
1EAC363214CDC79300DF97C3 /* LoaderHiero.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LoaderHiero.h; path = ../../moses/src/RuleTable/LoaderHiero.h; sourceTree = "<group>"; };
|
||||
1EAC363314CDC79300DF97C3 /* LoaderStandard.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = LoaderStandard.cpp; path = ../../moses/src/RuleTable/LoaderStandard.cpp; sourceTree = "<group>"; };
|
||||
1EAC363414CDC79300DF97C3 /* LoaderStandard.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = LoaderStandard.h; path = ../../moses/src/RuleTable/LoaderStandard.h; sourceTree = "<group>"; };
|
||||
1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ThrowingFwrite.cpp; path = ../../moses/src/CompactPT/ThrowingFwrite.cpp; sourceTree = "<group>"; };
|
||||
1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ThrowingFwrite.h; path = ../../moses/src/CompactPT/ThrowingFwrite.h; sourceTree = "<group>"; };
|
||||
1EC735D314B977AA00238410 /* AlignmentInfo.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfo.cpp; path = ../../moses/src/AlignmentInfo.cpp; sourceTree = "<group>"; };
|
||||
1EC735D414B977AA00238410 /* AlignmentInfo.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = AlignmentInfo.h; path = ../../moses/src/AlignmentInfo.h; sourceTree = "<group>"; };
|
||||
1EC735D514B977AA00238410 /* AlignmentInfoCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = AlignmentInfoCollection.cpp; path = ../../moses/src/AlignmentInfoCollection.cpp; sourceTree = "<group>"; };
|
||||
@ -320,7 +390,6 @@
|
||||
1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BilingualDynSuffixArray.h; path = ../../moses/src/BilingualDynSuffixArray.h; sourceTree = "<group>"; };
|
||||
1EC735D914B977AA00238410 /* BitmapContainer.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = BitmapContainer.cpp; path = ../../moses/src/BitmapContainer.cpp; sourceTree = "<group>"; };
|
||||
1EC735DA14B977AA00238410 /* BitmapContainer.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = BitmapContainer.h; path = ../../moses/src/BitmapContainer.h; sourceTree = "<group>"; };
|
||||
1EC735DB14B977AA00238410 /* CellCollection.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = CellCollection.h; path = ../../moses/src/CellCollection.h; sourceTree = "<group>"; };
|
||||
1EC735DC14B977AA00238410 /* ChartCell.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCell.cpp; path = ../../moses/src/ChartCell.cpp; sourceTree = "<group>"; };
|
||||
1EC735DD14B977AA00238410 /* ChartCell.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = ChartCell.h; path = ../../moses/src/ChartCell.h; sourceTree = "<group>"; };
|
||||
1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = ChartCellCollection.cpp; path = ../../moses/src/ChartCellCollection.cpp; sourceTree = "<group>"; };
|
||||
@ -366,8 +435,6 @@
|
||||
1EC7361114B977AA00238410 /* DummyScoreProducers.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = DummyScoreProducers.cpp; path = ../../moses/src/DummyScoreProducers.cpp; sourceTree = "<group>"; };
|
||||
1EC7361214B977AA00238410 /* DummyScoreProducers.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = DummyScoreProducers.h; path = ../../moses/src/DummyScoreProducers.h; sourceTree = "<group>"; };
|
||||
1EC7361414B977AA00238410 /* fdstream.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = fdstream.h; sourceTree = "<group>"; };
|
||||
1EC7361514B977AA00238410 /* file.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = file.cpp; sourceTree = "<group>"; };
|
||||
1EC7361614B977AA00238410 /* file.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = file.h; sourceTree = "<group>"; };
|
||||
1EC7361714B977AA00238410 /* hash.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = hash.h; sourceTree = "<group>"; };
|
||||
1EC7361914B977AA00238410 /* onlineRLM.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = onlineRLM.h; sourceTree = "<group>"; };
|
||||
1EC7361B14B977AA00238410 /* params.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = params.cpp; sourceTree = "<group>"; };
|
||||
@ -591,6 +658,15 @@
|
||||
1EDA808314D19FBF003D2191 /* UTrie.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UTrie.h; path = ../../moses/src/RuleTable/UTrie.h; sourceTree = "<group>"; };
|
||||
1EDA808414D19FBF003D2191 /* UTrieNode.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = UTrieNode.cpp; path = ../../moses/src/RuleTable/UTrieNode.cpp; sourceTree = "<group>"; };
|
||||
1EDA808514D19FBF003D2191 /* UTrieNode.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = UTrieNode.h; path = ../../moses/src/RuleTable/UTrieNode.h; sourceTree = "<group>"; };
|
||||
1EE418E415C7FDCB0028F9AB /* Match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Match.h; path = "../../moses/src/fuzzy-match/Match.h"; sourceTree = "<group>"; };
|
||||
1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SentenceAlignment.cpp; path = "../../moses/src/fuzzy-match/SentenceAlignment.cpp"; sourceTree = "<group>"; };
|
||||
1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SentenceAlignment.h; path = "../../moses/src/fuzzy-match/SentenceAlignment.h"; sourceTree = "<group>"; };
|
||||
1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = SuffixArray.cpp; path = "../../moses/src/fuzzy-match/SuffixArray.cpp"; sourceTree = "<group>"; };
|
||||
1EE418E815C7FDCB0028F9AB /* SuffixArray.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = SuffixArray.h; path = "../../moses/src/fuzzy-match/SuffixArray.h"; sourceTree = "<group>"; };
|
||||
1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = FuzzyMatchWrapper.cpp; path = "../../moses/src/fuzzy-match/FuzzyMatchWrapper.cpp"; sourceTree = "<group>"; };
|
||||
1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = FuzzyMatchWrapper.h; path = "../../moses/src/fuzzy-match/FuzzyMatchWrapper.h"; sourceTree = "<group>"; };
|
||||
1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = Vocabulary.cpp; path = "../../moses/src/fuzzy-match/Vocabulary.cpp"; sourceTree = "<group>"; };
|
||||
1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = Vocabulary.h; path = "../../moses/src/fuzzy-match/Vocabulary.h"; sourceTree = "<group>"; };
|
||||
1EF0709114B9EFCC0052152A /* ParallelBackoff.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = ParallelBackoff.cpp; sourceTree = "<group>"; };
|
||||
1EF0709214B9EFCC0052152A /* ParallelBackoff.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = ParallelBackoff.h; sourceTree = "<group>"; };
|
||||
1EF8F2C3159A61970047B613 /* HypoList.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = HypoList.h; path = ../../moses/src/HypoList.h; sourceTree = "<group>"; };
|
||||
@ -621,8 +697,8 @@
|
||||
08FB7795FE84155DC02AAC07 /* Source */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */,
|
||||
1E879EA615A346F90051F346 /* SearchNormalBatch.h */,
|
||||
1E6D9FF015D027680064D436 /* CompactPT */,
|
||||
1ECF13DE15C1A82400EA1DCE /* fuzzy-match */,
|
||||
1EDA803514D19ECD003D2191 /* Scope3Parser */,
|
||||
1EDA803414D19EB8003D2191 /* CYKPlusParser */,
|
||||
1EC7365B14B977AA00238410 /* LM */,
|
||||
@ -636,7 +712,6 @@
|
||||
1EC735D814B977AA00238410 /* BilingualDynSuffixArray.h */,
|
||||
1EC735D914B977AA00238410 /* BitmapContainer.cpp */,
|
||||
1EC735DA14B977AA00238410 /* BitmapContainer.h */,
|
||||
1EC735DB14B977AA00238410 /* CellCollection.h */,
|
||||
1EC735DC14B977AA00238410 /* ChartCell.cpp */,
|
||||
1EC735DD14B977AA00238410 /* ChartCell.h */,
|
||||
1EC735DE14B977AA00238410 /* ChartCellCollection.cpp */,
|
||||
@ -782,6 +857,8 @@
|
||||
1EC736F414B977AB00238410 /* SearchCubePruning.h */,
|
||||
1EC736F514B977AB00238410 /* SearchNormal.cpp */,
|
||||
1EC736F614B977AB00238410 /* SearchNormal.h */,
|
||||
1E879EA515A346F90051F346 /* SearchNormalBatch.cpp */,
|
||||
1E879EA615A346F90051F346 /* SearchNormalBatch.h */,
|
||||
1EC736F714B977AB00238410 /* Sentence.cpp */,
|
||||
1EC736F814B977AB00238410 /* Sentence.h */,
|
||||
1EC736F914B977AB00238410 /* SentenceStats.cpp */,
|
||||
@ -845,6 +922,39 @@
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1E6D9FF015D027680064D436 /* CompactPT */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1EC32DB615D2D90700A313B1 /* ThrowingFwrite.cpp */,
|
||||
1EC32DB715D2D90700A313B1 /* ThrowingFwrite.h */,
|
||||
1E6D9FBD15D027560064D436 /* BlockHashIndex.cpp */,
|
||||
1E6D9FBE15D027560064D436 /* BlockHashIndex.h */,
|
||||
1E6D9FBF15D027560064D436 /* CanonicalHuffman.h */,
|
||||
1E6D9FC015D027560064D436 /* CmphStringVectorAdapter.cpp */,
|
||||
1E6D9FC115D027560064D436 /* CmphStringVectorAdapter.h */,
|
||||
1E6D9FC215D027560064D436 /* ConsistantPhrases.h */,
|
||||
1E6D9FC415D027560064D436 /* LexicalReorderingTableCompact.cpp */,
|
||||
1E6D9FC515D027560064D436 /* LexicalReorderingTableCompact.h */,
|
||||
1E6D9FC615D027560064D436 /* LexicalReorderingTableCreator.cpp */,
|
||||
1E6D9FC715D027560064D436 /* LexicalReorderingTableCreator.h */,
|
||||
1E6D9FC815D027560064D436 /* ListCoders.h */,
|
||||
1E6D9FC915D027560064D436 /* MmapAllocator.h */,
|
||||
1E6D9FCA15D027560064D436 /* MonotonicVector.h */,
|
||||
1E6D9FCB15D027560064D436 /* MurmurHash3.cpp */,
|
||||
1E6D9FCC15D027560064D436 /* MurmurHash3.h */,
|
||||
1E6D9FCD15D027560064D436 /* PackedArray.h */,
|
||||
1E6D9FCE15D027560064D436 /* PhraseDecoder.cpp */,
|
||||
1E6D9FCF15D027560064D436 /* PhraseDecoder.h */,
|
||||
1E6D9FD015D027560064D436 /* PhraseDictionaryCompact.cpp */,
|
||||
1E6D9FD115D027560064D436 /* PhraseDictionaryCompact.h */,
|
||||
1E6D9FD215D027560064D436 /* PhraseTableCreator.cpp */,
|
||||
1E6D9FD315D027560064D436 /* PhraseTableCreator.h */,
|
||||
1E6D9FD415D027560064D436 /* StringVector.h */,
|
||||
1E6D9FD515D027560064D436 /* TargetPhraseCollectionCache.h */,
|
||||
);
|
||||
name = CompactPT;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1EAC362B14CDC76200DF97C3 /* RuleTable */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
@ -856,6 +966,8 @@
|
||||
1EDA807D14D19FBF003D2191 /* PhraseDictionaryOnDisk.h */,
|
||||
1EDA807E14D19FBF003D2191 /* PhraseDictionarySCFG.cpp */,
|
||||
1EDA807F14D19FBF003D2191 /* PhraseDictionarySCFG.h */,
|
||||
1E0BA41615B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp */,
|
||||
1E0BA41715B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h */,
|
||||
1EDA808014D19FBF003D2191 /* Trie.cpp */,
|
||||
1EDA808114D19FBF003D2191 /* Trie.h */,
|
||||
1EDA808214D19FBF003D2191 /* UTrie.cpp */,
|
||||
@ -878,9 +990,9 @@
|
||||
1EC7361314B977AA00238410 /* DynSAInclude */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1E1D823E15AC29BB00FE42E9 /* FileHandler.cpp */,
|
||||
1E1D823F15AC29BB00FE42E9 /* FileHandler.h */,
|
||||
1EC7361414B977AA00238410 /* fdstream.h */,
|
||||
1EC7361514B977AA00238410 /* file.cpp */,
|
||||
1EC7361614B977AA00238410 /* file.h */,
|
||||
1EC7361714B977AA00238410 /* hash.h */,
|
||||
1EC7361914B977AA00238410 /* onlineRLM.h */,
|
||||
1EC7361B14B977AA00238410 /* params.cpp */,
|
||||
@ -930,9 +1042,27 @@
|
||||
path = ../../moses/src/LM;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1ECF13DE15C1A82400EA1DCE /* fuzzy-match */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1EE418E415C7FDCB0028F9AB /* Match.h */,
|
||||
1EE418E515C7FDCB0028F9AB /* SentenceAlignment.cpp */,
|
||||
1EE418E615C7FDCB0028F9AB /* SentenceAlignment.h */,
|
||||
1EE418E715C7FDCB0028F9AB /* SuffixArray.cpp */,
|
||||
1EE418E815C7FDCB0028F9AB /* SuffixArray.h */,
|
||||
1EE418E915C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp */,
|
||||
1EE418EA15C7FDCB0028F9AB /* FuzzyMatchWrapper.h */,
|
||||
1EE418EB15C7FDCB0028F9AB /* Vocabulary.cpp */,
|
||||
1EE418EC15C7FDCB0028F9AB /* Vocabulary.h */,
|
||||
);
|
||||
name = "fuzzy-match";
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1EDA803414D19EB8003D2191 /* CYKPlusParser */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1E619E9F15B8713600C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp */,
|
||||
1E619EA015B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h */,
|
||||
1EDA806214D19F12003D2191 /* ChartRuleLookupManagerCYKPlus.cpp */,
|
||||
1EDA806314D19F12003D2191 /* ChartRuleLookupManagerCYKPlus.h */,
|
||||
1EDA806414D19F12003D2191 /* ChartRuleLookupManagerMemory.cpp */,
|
||||
@ -986,7 +1116,6 @@
|
||||
1EC7374914B977AB00238410 /* AlignmentInfoCollection.h in Headers */,
|
||||
1EC7374B14B977AB00238410 /* BilingualDynSuffixArray.h in Headers */,
|
||||
1EC7374D14B977AB00238410 /* BitmapContainer.h in Headers */,
|
||||
1EC7374E14B977AB00238410 /* CellCollection.h in Headers */,
|
||||
1EC7375014B977AB00238410 /* ChartCell.h in Headers */,
|
||||
1EC7375214B977AB00238410 /* ChartCellCollection.h in Headers */,
|
||||
1EC7375314B977AB00238410 /* ChartCellLabel.h in Headers */,
|
||||
@ -1012,7 +1141,6 @@
|
||||
1EC7377D14B977AB00238410 /* Dictionary.h in Headers */,
|
||||
1EC7378514B977AB00238410 /* DummyScoreProducers.h in Headers */,
|
||||
1EC7378614B977AB00238410 /* fdstream.h in Headers */,
|
||||
1EC7378814B977AB00238410 /* file.h in Headers */,
|
||||
1EC7378914B977AB00238410 /* hash.h in Headers */,
|
||||
1EC7378A14B977AB00238410 /* onlineRLM.h in Headers */,
|
||||
1EC7378C14B977AB00238410 /* params.h in Headers */,
|
||||
@ -1143,6 +1271,31 @@
|
||||
1EDA809314D19FBF003D2191 /* UTrieNode.h in Headers */,
|
||||
1EF8F2C4159A61970047B613 /* HypoList.h in Headers */,
|
||||
1E879EA815A346F90051F346 /* SearchNormalBatch.h in Headers */,
|
||||
1E1D824115AC29BB00FE42E9 /* FileHandler.h in Headers */,
|
||||
1E0BA41915B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.h in Headers */,
|
||||
1E619EA215B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.h in Headers */,
|
||||
1EE418ED15C7FDCB0028F9AB /* Match.h in Headers */,
|
||||
1EE418EF15C7FDCB0028F9AB /* SentenceAlignment.h in Headers */,
|
||||
1EE418F115C7FDCB0028F9AB /* SuffixArray.h in Headers */,
|
||||
1EE418F315C7FDCB0028F9AB /* FuzzyMatchWrapper.h in Headers */,
|
||||
1EE418F515C7FDCB0028F9AB /* Vocabulary.h in Headers */,
|
||||
1E6D9FD715D027560064D436 /* BlockHashIndex.h in Headers */,
|
||||
1E6D9FD815D027560064D436 /* CanonicalHuffman.h in Headers */,
|
||||
1E6D9FDA15D027560064D436 /* CmphStringVectorAdapter.h in Headers */,
|
||||
1E6D9FDB15D027560064D436 /* ConsistantPhrases.h in Headers */,
|
||||
1E6D9FDE15D027560064D436 /* LexicalReorderingTableCompact.h in Headers */,
|
||||
1E6D9FE015D027560064D436 /* LexicalReorderingTableCreator.h in Headers */,
|
||||
1E6D9FE115D027560064D436 /* ListCoders.h in Headers */,
|
||||
1E6D9FE215D027560064D436 /* MmapAllocator.h in Headers */,
|
||||
1E6D9FE315D027560064D436 /* MonotonicVector.h in Headers */,
|
||||
1E6D9FE515D027560064D436 /* MurmurHash3.h in Headers */,
|
||||
1E6D9FE615D027560064D436 /* PackedArray.h in Headers */,
|
||||
1E6D9FE815D027560064D436 /* PhraseDecoder.h in Headers */,
|
||||
1E6D9FEA15D027560064D436 /* PhraseDictionaryCompact.h in Headers */,
|
||||
1E6D9FEC15D027560064D436 /* PhraseTableCreator.h in Headers */,
|
||||
1E6D9FED15D027560064D436 /* StringVector.h in Headers */,
|
||||
1E6D9FEE15D027560064D436 /* TargetPhraseCollectionCache.h in Headers */,
|
||||
1EC32DB915D2D90700A313B1 /* ThrowingFwrite.h in Headers */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@ -1222,7 +1375,6 @@
|
||||
1EC7377A14B977AB00238410 /* DecodeStepTranslation.cpp in Sources */,
|
||||
1EC7377C14B977AB00238410 /* Dictionary.cpp in Sources */,
|
||||
1EC7378414B977AB00238410 /* DummyScoreProducers.cpp in Sources */,
|
||||
1EC7378714B977AB00238410 /* file.cpp in Sources */,
|
||||
1EC7378B14B977AB00238410 /* params.cpp in Sources */,
|
||||
1EC7379314B977AB00238410 /* vocab.cpp in Sources */,
|
||||
1EC7379514B977AB00238410 /* DynSuffixArray.cpp in Sources */,
|
||||
@ -1328,6 +1480,22 @@
|
||||
1EDA809014D19FBF003D2191 /* UTrie.cpp in Sources */,
|
||||
1EDA809214D19FBF003D2191 /* UTrieNode.cpp in Sources */,
|
||||
1E879EA715A346F90051F346 /* SearchNormalBatch.cpp in Sources */,
|
||||
1E1D824015AC29BB00FE42E9 /* FileHandler.cpp in Sources */,
|
||||
1E0BA41815B70E5F00AC70E1 /* PhraseDictionaryFuzzyMatch.cpp in Sources */,
|
||||
1E619EA115B8713700C2D7A7 /* ChartRuleLookupManagerMemoryPerSentence.cpp in Sources */,
|
||||
1EE418EE15C7FDCB0028F9AB /* SentenceAlignment.cpp in Sources */,
|
||||
1EE418F015C7FDCB0028F9AB /* SuffixArray.cpp in Sources */,
|
||||
1EE418F215C7FDCB0028F9AB /* FuzzyMatchWrapper.cpp in Sources */,
|
||||
1EE418F415C7FDCB0028F9AB /* Vocabulary.cpp in Sources */,
|
||||
1E6D9FD615D027560064D436 /* BlockHashIndex.cpp in Sources */,
|
||||
1E6D9FD915D027560064D436 /* CmphStringVectorAdapter.cpp in Sources */,
|
||||
1E6D9FDD15D027560064D436 /* LexicalReorderingTableCompact.cpp in Sources */,
|
||||
1E6D9FDF15D027560064D436 /* LexicalReorderingTableCreator.cpp in Sources */,
|
||||
1E6D9FE415D027560064D436 /* MurmurHash3.cpp in Sources */,
|
||||
1E6D9FE715D027560064D436 /* PhraseDecoder.cpp in Sources */,
|
||||
1E6D9FE915D027560064D436 /* PhraseDictionaryCompact.cpp in Sources */,
|
||||
1E6D9FEB15D027560064D436 /* PhraseTableCreator.cpp in Sources */,
|
||||
1EC32DB815D2D90700A313B1 /* ThrowingFwrite.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
@ -1338,6 +1506,7 @@
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_MODEL_TUNING = G5;
|
||||
@ -1352,6 +1521,9 @@
|
||||
"_FILE_OFFSET_BITS=64",
|
||||
_LARGE_FILES,
|
||||
WITH_THREADS,
|
||||
IS_XCODE,
|
||||
HAVE_CMPH,
|
||||
"KENLM_MAX_ORDER=7",
|
||||
);
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../..,
|
||||
@ -1376,6 +1548,7 @@
|
||||
"\"$(SRCROOT)/../../moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
|
||||
);
|
||||
PRODUCT_NAME = moses;
|
||||
USER_HEADER_SEARCH_PATHS = "../.. ../../moses/src ../../irstlm/include ../../srilm/include ../../kenlm ../../randlm/include /opt/local/include ../../synlm/hhmm/wsjparse/include ../../synlm/hhmm/rvtl/include/ ../.. ../../cmph/include";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
@ -1383,6 +1556,7 @@
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
GCC_MODEL_TUNING = G5;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
@ -1395,6 +1569,9 @@
|
||||
"_FILE_OFFSET_BITS=64",
|
||||
_LARGE_FILES,
|
||||
WITH_THREADS,
|
||||
IS_XCODE,
|
||||
HAVE_CMPH,
|
||||
"KENLM_MAX_ORDER=7",
|
||||
);
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../..,
|
||||
@ -1419,6 +1596,7 @@
|
||||
"\"$(SRCROOT)/../../moses/src/Scope3Parser/bin/darwin-4.2.1/release/debug-symbols-on/link-static/threading-multi\"",
|
||||
);
|
||||
PRODUCT_NAME = moses;
|
||||
USER_HEADER_SEARCH_PATHS = "../.. ../../moses/src ../../irstlm/include ../../srilm/include ../../kenlm ../../randlm/include /opt/local/include ../../synlm/hhmm/wsjparse/include ../../synlm/hhmm/rvtl/include/ ../.. ../../cmph/include";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
|
102
contrib/other-builds/mosesserver.vcxproj
Normal file
@ -0,0 +1,102 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{85811FDF-8AD1-4490-A545-B2F51931A18C}</ProjectGuid>
|
||||
<RootNamespace>mosescmd</RootNamespace>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup>
|
||||
<_ProjectFileVersion>10.0.30319.1</_ProjectFileVersion>
|
||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
|
||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(Configuration)\</IntDir>
|
||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">true</LinkIncremental>
|
||||
<OutDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(SolutionDir)$(Configuration)\</OutDir>
|
||||
<IntDir Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(Configuration)\</IntDir>
|
||||
<LinkIncremental Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">false</LinkIncremental>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
<IncludePath Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">C:\Program Files\boost\boost_1_47;$(IncludePath)</IncludePath>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<AdditionalIncludeDirectories>C:\xmlrpc-c\include;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<MinimalRebuild>true</MinimalRebuild>
|
||||
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
|
||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention>
|
||||
</DataExecutionPrevention>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
<AdditionalLibraryDirectories>C:\xmlrpc-c\bin\Debug-Static-Win32;C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<AdditionalIncludeDirectories>C:\xmlrpc-c\include;C:\boost\boost_1_47;$(SolutionDir)/../../moses/src;$(SolutionDir)/../..;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>WITH_THREADS;NO_PIPES;WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<AdditionalDependencies>libxmlrpc_server_abyss.lib;libxmlrpc_server.lib;libxmlrpc_abyss.lib;libxmlrpc.lib;libxmlrpc_util.lib;libxmlrpc_xmlparse.lib;libxmlrpc_xmltok.lib;libxmlrpc++.lib;C:\GnuWin32\lib\zlib.lib;$(SolutionDir)$(Configuration)\moses.lib;$(SolutionDir)$(Configuration)\kenlm.lib;$(SolutionDir)$(Configuration)\OnDiskPt.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<RandomizedBaseAddress>false</RandomizedBaseAddress>
|
||||
<DataExecutionPrevention>
|
||||
</DataExecutionPrevention>
|
||||
<TargetMachine>MachineX86</TargetMachine>
|
||||
<AdditionalLibraryDirectories>C:\xmlrpc-c\bin\Release-Static-Win32;C:\boost\boost_1_47\lib</AdditionalLibraryDirectories>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="..\server\mosesserver.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
@ -0,0 +1,297 @@
|
||||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 46;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
1E6D9FF115D027F00064D436 /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EB3EBD515D0269B006B9CF1 /* libmoses.a */; };
|
||||
1EB3EBB315D024C7006B9CF1 /* processLexicalTableMin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
1E6D9FF215D0292D0064D436 /* PBXContainerItemProxy */ = {
|
||||
isa = PBXContainerItemProxy;
|
||||
containerPortal = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
|
||||
proxyType = 1;
|
||||
remoteGlobalIDString = D2AAC045055464E500DB518D;
|
||||
remoteInfo = moses;
|
||||
};
|
||||
1EB3EBD415D0269B006B9CF1 /* PBXContainerItemProxy */ = {
|
||||
isa = PBXContainerItemProxy;
|
||||
containerPortal = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
|
||||
proxyType = 2;
|
||||
remoteGlobalIDString = D2AAC046055464E500DB518D;
|
||||
remoteInfo = moses;
|
||||
};
|
||||
/* End PBXContainerItemProxy section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
1E3A0AEA15D0242A003EF9B4 /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
dstPath = /usr/share/man/man1/;
|
||||
dstSubfolderSpec = 0;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 1;
|
||||
};
|
||||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = processLexicalTableMin; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = processLexicalTableMin.cpp; path = ../../misc/processLexicalTableMin.cpp; sourceTree = "<group>"; };
|
||||
1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
1E3A0AE915D0242A003EF9B4 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1E6D9FF115D027F00064D436 /* libmoses.a in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
1E3A0AE115D02427003EF9B4 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1EB3EBB215D024C7006B9CF1 /* processLexicalTableMin.cpp */,
|
||||
1E3A0AED15D0242A003EF9B4 /* Products */,
|
||||
1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1E3A0AED15D0242A003EF9B4 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1EB3EBD115D0269B006B9CF1 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1EB3EBD515D0269B006B9CF1 /* libmoses.a */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
1E3A0AEB15D0242A003EF9B4 /* processLexicalTableMin */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1E3A0AF615D0242B003EF9B4 /* Build configuration list for PBXNativeTarget "processLexicalTableMin" */;
|
||||
buildPhases = (
|
||||
1E3A0AE815D0242A003EF9B4 /* Sources */,
|
||||
1E3A0AE915D0242A003EF9B4 /* Frameworks */,
|
||||
1E3A0AEA15D0242A003EF9B4 /* CopyFiles */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
1E6D9FF315D0292D0064D436 /* PBXTargetDependency */,
|
||||
);
|
||||
name = processLexicalTableMin;
|
||||
productName = processLexicalTableMin;
|
||||
productReference = 1E3A0AEC15D0242A003EF9B4 /* processLexicalTableMin */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
1E3A0AE315D02427003EF9B4 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
buildConfigurationList = 1E3A0AE615D02427003EF9B4 /* Build configuration list for PBXProject "processLexicalTableMin" */;
|
||||
compatibilityVersion = "Xcode 3.2";
|
||||
developmentRegion = English;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
);
|
||||
mainGroup = 1E3A0AE115D02427003EF9B4;
|
||||
productRefGroup = 1E3A0AED15D0242A003EF9B4 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectReferences = (
|
||||
{
|
||||
ProductGroup = 1EB3EBD115D0269B006B9CF1 /* Products */;
|
||||
ProjectRef = 1EB3EBD015D0269B006B9CF1 /* moses.xcodeproj */;
|
||||
},
|
||||
);
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
1E3A0AEB15D0242A003EF9B4 /* processLexicalTableMin */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXReferenceProxy section */
|
||||
1EB3EBD515D0269B006B9CF1 /* libmoses.a */ = {
|
||||
isa = PBXReferenceProxy;
|
||||
fileType = archive.ar;
|
||||
path = libmoses.a;
|
||||
remoteRef = 1EB3EBD415D0269B006B9CF1 /* PBXContainerItemProxy */;
|
||||
sourceTree = BUILT_PRODUCTS_DIR;
|
||||
};
|
||||
/* End PBXReferenceProxy section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
1E3A0AE815D0242A003EF9B4 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1EB3EBB315D024C7006B9CF1 /* processLexicalTableMin.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin PBXTargetDependency section */
|
||||
1E6D9FF315D0292D0064D436 /* PBXTargetDependency */ = {
|
||||
isa = PBXTargetDependency;
|
||||
name = moses;
|
||||
targetProxy = 1E6D9FF215D0292D0064D436 /* PBXContainerItemProxy */;
|
||||
};
|
||||
/* End PBXTargetDependency section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
1E3A0AF415D0242B003EF9B4 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
|
||||
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../../,
|
||||
../../irstlm/include,
|
||||
/opt/local/include,
|
||||
);
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.7;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = macosx;
|
||||
USER_HEADER_SEARCH_PATHS = "../../ ../../irstlm/include /opt/local/include ../../moses/src";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1E3A0AF515D0242B003EF9B4 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
COPY_PHASE_STRIP = YES;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
|
||||
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../../,
|
||||
../../irstlm/include,
|
||||
/opt/local/include,
|
||||
);
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.7;
|
||||
SDKROOT = macosx;
|
||||
USER_HEADER_SEARCH_PATHS = "../../ ../../irstlm/include /opt/local/include ../../moses/src";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
1E3A0AF715D0242B003EF9B4 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
|
||||
"GCC_PREPROCESSOR_DEFINITIONS[arch=*]" = WITH_THREADS;
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
../../randlm/lib,
|
||||
/opt/local/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lmisc",
|
||||
"-ldstruct",
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lrandlm",
|
||||
"-lboost_thread-mt",
|
||||
);
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1E3A0AF815D0242B003EF9B4 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
../../randlm/lib,
|
||||
/opt/local/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lmisc",
|
||||
"-ldstruct",
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lrandlm",
|
||||
"-lboost_thread-mt",
|
||||
);
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
1E3A0AE615D02427003EF9B4 /* Build configuration list for PBXProject "processLexicalTableMin" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1E3A0AF415D0242B003EF9B4 /* Debug */,
|
||||
1E3A0AF515D0242B003EF9B4 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
1E3A0AF615D0242B003EF9B4 /* Build configuration list for PBXNativeTarget "processLexicalTableMin" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1E3A0AF715D0242B003EF9B4 /* Debug */,
|
||||
1E3A0AF815D0242B003EF9B4 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 1E3A0AE315D02427003EF9B4 /* Project object */;
|
||||
}
|
@ -0,0 +1,304 @@
|
||||
// !$*UTF8*$!
|
||||
{
|
||||
archiveVersion = 1;
|
||||
classes = {
|
||||
};
|
||||
objectVersion = 46;
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
1EF3D68A15D02AEF00969478 /* processPhraseTableMin.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */; };
|
||||
1EF3D6A415D02B6400969478 /* libmoses.a in Frameworks */ = {isa = PBXBuildFile; fileRef = 1EF3D69915D02B4400969478 /* libmoses.a */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
/* Begin PBXContainerItemProxy section */
|
||||
1EF3D69815D02B4400969478 /* PBXContainerItemProxy */ = {
|
||||
isa = PBXContainerItemProxy;
|
||||
containerPortal = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
|
||||
proxyType = 2;
|
||||
remoteGlobalIDString = D2AAC046055464E500DB518D;
|
||||
remoteInfo = moses;
|
||||
};
|
||||
1EF3D6A515D02B6B00969478 /* PBXContainerItemProxy */ = {
|
||||
isa = PBXContainerItemProxy;
|
||||
containerPortal = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
|
||||
proxyType = 1;
|
||||
remoteGlobalIDString = D2AAC045055464E500DB518D;
|
||||
remoteInfo = moses;
|
||||
};
|
||||
/* End PBXContainerItemProxy section */
|
||||
|
||||
/* Begin PBXCopyFilesBuildPhase section */
|
||||
1E6D9FFD15D02A8D0064D436 /* CopyFiles */ = {
|
||||
isa = PBXCopyFilesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
dstPath = /usr/share/man/man1/;
|
||||
dstSubfolderSpec = 0;
|
||||
files = (
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 1;
|
||||
};
|
||||
/* End PBXCopyFilesBuildPhase section */
|
||||
|
||||
/* Begin PBXFileReference section */
|
||||
1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = processPhraseTableMin; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = processPhraseTableMin.cpp; path = ../../misc/processPhraseTableMin.cpp; sourceTree = "<group>"; };
|
||||
1EF3D69415D02B4400969478 /* moses.xcodeproj */ = {isa = PBXFileReference; lastKnownFileType = "wrapper.pb-project"; path = moses.xcodeproj; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
/* Begin PBXFrameworksBuildPhase section */
|
||||
1E6D9FFC15D02A8D0064D436 /* Frameworks */ = {
|
||||
isa = PBXFrameworksBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1EF3D6A415D02B6400969478 /* libmoses.a in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXFrameworksBuildPhase section */
|
||||
|
||||
/* Begin PBXGroup section */
|
||||
1E6D9FF415D02A8C0064D436 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1EF3D68915D02AEF00969478 /* processPhraseTableMin.cpp */,
|
||||
1E6DA00015D02A8D0064D436 /* Products */,
|
||||
1EF3D69415D02B4400969478 /* moses.xcodeproj */,
|
||||
);
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1E6DA00015D02A8D0064D436 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
1EF3D69515D02B4400969478 /* Products */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
1EF3D69915D02B4400969478 /* libmoses.a */,
|
||||
);
|
||||
name = Products;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
/* End PBXGroup section */
|
||||
|
||||
/* Begin PBXNativeTarget section */
|
||||
1E6D9FFE15D02A8D0064D436 /* processPhraseTableMin */ = {
|
||||
isa = PBXNativeTarget;
|
||||
buildConfigurationList = 1E6DA00915D02A8D0064D436 /* Build configuration list for PBXNativeTarget "processPhraseTableMin" */;
|
||||
buildPhases = (
|
||||
1E6D9FFB15D02A8D0064D436 /* Sources */,
|
||||
1E6D9FFC15D02A8D0064D436 /* Frameworks */,
|
||||
1E6D9FFD15D02A8D0064D436 /* CopyFiles */,
|
||||
);
|
||||
buildRules = (
|
||||
);
|
||||
dependencies = (
|
||||
1EF3D6A615D02B6B00969478 /* PBXTargetDependency */,
|
||||
);
|
||||
name = processPhraseTableMin;
|
||||
productName = processPhraseTableMin;
|
||||
productReference = 1E6D9FFF15D02A8D0064D436 /* processPhraseTableMin */;
|
||||
productType = "com.apple.product-type.tool";
|
||||
};
|
||||
/* End PBXNativeTarget section */
|
||||
|
||||
/* Begin PBXProject section */
|
||||
1E6D9FF615D02A8C0064D436 /* Project object */ = {
|
||||
isa = PBXProject;
|
||||
buildConfigurationList = 1E6D9FF915D02A8C0064D436 /* Build configuration list for PBXProject "processPhraseTableMin" */;
|
||||
compatibilityVersion = "Xcode 3.2";
|
||||
developmentRegion = English;
|
||||
hasScannedForEncodings = 0;
|
||||
knownRegions = (
|
||||
en,
|
||||
);
|
||||
mainGroup = 1E6D9FF415D02A8C0064D436;
|
||||
productRefGroup = 1E6DA00015D02A8D0064D436 /* Products */;
|
||||
projectDirPath = "";
|
||||
projectReferences = (
|
||||
{
|
||||
ProductGroup = 1EF3D69515D02B4400969478 /* Products */;
|
||||
ProjectRef = 1EF3D69415D02B4400969478 /* moses.xcodeproj */;
|
||||
},
|
||||
);
|
||||
projectRoot = "";
|
||||
targets = (
|
||||
1E6D9FFE15D02A8D0064D436 /* processPhraseTableMin */,
|
||||
);
|
||||
};
|
||||
/* End PBXProject section */
|
||||
|
||||
/* Begin PBXReferenceProxy section */
|
||||
1EF3D69915D02B4400969478 /* libmoses.a */ = {
|
||||
isa = PBXReferenceProxy;
|
||||
fileType = archive.ar;
|
||||
path = libmoses.a;
|
||||
remoteRef = 1EF3D69815D02B4400969478 /* PBXContainerItemProxy */;
|
||||
sourceTree = BUILT_PRODUCTS_DIR;
|
||||
};
|
||||
/* End PBXReferenceProxy section */
|
||||
|
||||
/* Begin PBXSourcesBuildPhase section */
|
||||
1E6D9FFB15D02A8D0064D436 /* Sources */ = {
|
||||
isa = PBXSourcesBuildPhase;
|
||||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
1EF3D68A15D02AEF00969478 /* processPhraseTableMin.cpp in Sources */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
};
|
||||
/* End PBXSourcesBuildPhase section */
|
||||
|
||||
/* Begin PBXTargetDependency section */
|
||||
1EF3D6A615D02B6B00969478 /* PBXTargetDependency */ = {
|
||||
isa = PBXTargetDependency;
|
||||
name = moses;
|
||||
targetProxy = 1EF3D6A515D02B6B00969478 /* PBXContainerItemProxy */;
|
||||
};
|
||||
/* End PBXTargetDependency section */
|
||||
|
||||
/* Begin XCBuildConfiguration section */
|
||||
1E6DA00715D02A8D0064D436 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
COPY_PHASE_STRIP = NO;
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_DYNAMIC_NO_PIC = NO;
|
||||
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
|
||||
GCC_OPTIMIZATION_LEVEL = 0;
|
||||
GCC_PREPROCESSOR_DEFINITIONS = (
|
||||
"DEBUG=1",
|
||||
"$(inherited)",
|
||||
);
|
||||
GCC_SYMBOLS_PRIVATE_EXTERN = NO;
|
||||
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
LIBRARY_SEARCH_PATHS = "";
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.7;
|
||||
ONLY_ACTIVE_ARCH = YES;
|
||||
SDKROOT = macosx;
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1E6DA00815D02A8D0064D436 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
ALWAYS_SEARCH_USER_PATHS = NO;
|
||||
ARCHS = "$(ARCHS_STANDARD_64_BIT)";
|
||||
COPY_PHASE_STRIP = YES;
|
||||
DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym";
|
||||
GCC_C_LANGUAGE_STANDARD = gnu99;
|
||||
GCC_ENABLE_OBJC_EXCEPTIONS = YES;
|
||||
GCC_VERSION = com.apple.compilers.llvm.clang.1_0;
|
||||
GCC_WARN_64_TO_32_BIT_CONVERSION = YES;
|
||||
GCC_WARN_ABOUT_MISSING_PROTOTYPES = YES;
|
||||
GCC_WARN_ABOUT_RETURN_TYPE = YES;
|
||||
GCC_WARN_UNUSED_VARIABLE = YES;
|
||||
LIBRARY_SEARCH_PATHS = "";
|
||||
MACOSX_DEPLOYMENT_TARGET = 10.7;
|
||||
SDKROOT = macosx;
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
1E6DA00A15D02A8D0064D436 /* Debug */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../../,
|
||||
../../irstlm/include,
|
||||
/opt/local/include,
|
||||
../../moses/src,
|
||||
../../cmph/include,
|
||||
);
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
../../randlm/lib,
|
||||
/opt/local/lib,
|
||||
../../cmph/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lmisc",
|
||||
"-ldstruct",
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lrandlm",
|
||||
"-lboost_thread-mt",
|
||||
"-lcmph",
|
||||
);
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Debug;
|
||||
};
|
||||
1E6DA00B15D02A8D0064D436 /* Release */ = {
|
||||
isa = XCBuildConfiguration;
|
||||
buildSettings = {
|
||||
GCC_PREPROCESSOR_DEFINITIONS = WITH_THREADS;
|
||||
HEADER_SEARCH_PATHS = (
|
||||
../../,
|
||||
../../irstlm/include,
|
||||
/opt/local/include,
|
||||
../../moses/src,
|
||||
../../cmph/include,
|
||||
);
|
||||
LIBRARY_SEARCH_PATHS = (
|
||||
../../irstlm/lib,
|
||||
../../srilm/lib/macosx,
|
||||
../../randlm/lib,
|
||||
/opt/local/lib,
|
||||
../../cmph/lib,
|
||||
);
|
||||
OTHER_LDFLAGS = (
|
||||
"-lz",
|
||||
"-lirstlm",
|
||||
"-lmisc",
|
||||
"-ldstruct",
|
||||
"-loolm",
|
||||
"-lflm",
|
||||
"-llattice",
|
||||
"-lrandlm",
|
||||
"-lboost_thread-mt",
|
||||
"-lcmph",
|
||||
);
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
};
|
||||
name = Release;
|
||||
};
|
||||
/* End XCBuildConfiguration section */
|
||||
|
||||
/* Begin XCConfigurationList section */
|
||||
1E6D9FF915D02A8C0064D436 /* Build configuration list for PBXProject "processPhraseTableMin" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1E6DA00715D02A8D0064D436 /* Debug */,
|
||||
1E6DA00815D02A8D0064D436 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
1E6DA00915D02A8D0064D436 /* Build configuration list for PBXNativeTarget "processPhraseTableMin" */ = {
|
||||
isa = XCConfigurationList;
|
||||
buildConfigurations = (
|
||||
1E6DA00A15D02A8D0064D436 /* Debug */,
|
||||
1E6DA00B15D02A8D0064D436 /* Release */,
|
||||
);
|
||||
defaultConfigurationIsVisible = 0;
|
||||
defaultConfigurationName = Release;
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
};
|
||||
rootObject = 1E6D9FF615D02A8C0064D436 /* Project object */;
|
||||
}
|
2
contrib/server/Translation-web/src/conf/MANIFEST.MF
Executable file
@ -0,0 +1,2 @@
|
||||
Manifest-Version: 1.0
|
||||
|
129
contrib/server/Translation-web/src/java/com/hpl/mt/Translate.java
Executable file
@ -0,0 +1,129 @@
|
||||
package com.hpl.mt;
|
||||
|
||||
/*
|
||||
* To change this template, choose Tools | Templates
|
||||
* and open the template in the editor.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.net.URL;
|
||||
import java.util.HashMap;
|
||||
import java.util.logging.Level;
|
||||
import java.util.logging.Logger;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import org.apache.xmlrpc.XmlRpcException;
|
||||
import org.apache.xmlrpc.client.XmlRpcClient;
|
||||
import org.apache.xmlrpc.client.XmlRpcClientConfigImpl;
|
||||
|
||||
/**
|
||||
*
|
||||
* @author ulanov
|
||||
*/
|
||||
public class Translate extends HttpServlet {
|
||||
|
||||
/**
|
||||
* Processes requests for both HTTP
|
||||
* <code>GET</code> and
|
||||
* <code>POST</code> methods.
|
||||
*
|
||||
* @param request servlet request
|
||||
* @param response servlet response
|
||||
* @throws ServletException if a servlet-specific error occurs
|
||||
* @throws IOException if an I/O error occurs
|
||||
*/
|
||||
protected void processRequest(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
response.setContentType("text/html;charset=UTF-8");
|
||||
System.out.println("before" + request.getCharacterEncoding());
|
||||
request.setCharacterEncoding("UTF-8");
|
||||
System.out.println("after" + request.getCharacterEncoding());
|
||||
PrintWriter out = response.getWriter();
|
||||
try {
|
||||
/*
|
||||
* TODO output your page here. You may use following sample code.
|
||||
*/
|
||||
// Create an instance of XmlRpcClient
|
||||
String textToTranslate = request.getParameter("text");
|
||||
XmlRpcClientConfigImpl config = new XmlRpcClientConfigImpl();
|
||||
config.setServerURL(new URL("http://localhost:9008/RPC2"));
|
||||
XmlRpcClient client = new XmlRpcClient();
|
||||
client.setConfig(config);
|
||||
// The XML-RPC data type used by mosesserver is <struct>. In Java, this data type can be represented using HashMap.
|
||||
HashMap<String,String> mosesParams = new HashMap<String,String>();
|
||||
mosesParams.put("text", textToTranslate);
|
||||
mosesParams.put("align", "true");
|
||||
mosesParams.put("report-all-factors", "true");
|
||||
// The XmlRpcClient.execute method doesn't accept Hashmap (pParams). It's either Object[] or List.
|
||||
Object[] params = new Object[] { null };
|
||||
params[0] = mosesParams;
|
||||
// Invoke the remote method "translate". The result is an Object, convert it to a HashMap.
|
||||
HashMap result;
|
||||
try {
|
||||
result = (HashMap)client.execute("translate", params);
|
||||
} catch (XmlRpcException ex) {
|
||||
Logger.getLogger(Translate.class.getName()).log(Level.SEVERE, null, ex);
|
||||
throw new IOException("XML-RPC failed");
|
||||
}
|
||||
// Print the returned results
|
||||
String textTranslation = (String)result.get("text");
|
||||
System.out.println("Input : "+textToTranslate);
|
||||
System.out.println("Translation : "+textTranslation);
|
||||
out.write(textTranslation);
|
||||
if (result.get("align") != null){
|
||||
Object[] aligns = (Object[])result.get("align");
|
||||
System.out.println("Phrase alignments : [Source Start:Source End][Target Start]");
|
||||
for ( Object element : aligns) {
|
||||
HashMap align = (HashMap)element;
|
||||
System.out.println("["+align.get("src-start")+":"+align.get("src-end")+"]["+align.get("tgt-start")+"]");
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
|
||||
// <editor-fold defaultstate="collapsed" desc="HttpServlet methods. Click on the + sign on the left to edit the code.">
|
||||
/**
|
||||
* Handles the HTTP
|
||||
* <code>GET</code> method.
|
||||
*
|
||||
* @param request servlet request
|
||||
* @param response servlet response
|
||||
* @throws ServletException if a servlet-specific error occurs
|
||||
* @throws IOException if an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
protected void doGet(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
processRequest(request, response);
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles the HTTP
|
||||
* <code>POST</code> method.
|
||||
*
|
||||
* @param request servlet request
|
||||
* @param response servlet response
|
||||
* @throws ServletException if a servlet-specific error occurs
|
||||
* @throws IOException if an I/O error occurs
|
||||
*/
|
||||
@Override
|
||||
protected void doPost(HttpServletRequest request, HttpServletResponse response)
|
||||
throws ServletException, IOException {
|
||||
processRequest(request, response);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a short description of the servlet.
|
||||
*
|
||||
* @return a String containing servlet description
|
||||
*/
|
||||
@Override
|
||||
public String getServletInfo() {
|
||||
return "Short description";
|
||||
}// </editor-fold>
|
||||
}
|
2
contrib/server/Translation-web/web/META-INF/context.xml
Executable file
@ -0,0 +1,2 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<Context antiJARLocking="true" path="/Translation"/>
|
16
contrib/server/Translation-web/web/WEB-INF/web.xml
Executable file
@ -0,0 +1,16 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<web-app version="3.0" xmlns="http://java.sun.com/xml/ns/javaee" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_3_0.xsd">
|
||||
<servlet>
|
||||
<servlet-name>Translate</servlet-name>
|
||||
<servlet-class>com.hpl.mt.Translate</servlet-class>
|
||||
</servlet>
|
||||
<servlet-mapping>
|
||||
<servlet-name>Translate</servlet-name>
|
||||
<url-pattern>/Translate</url-pattern>
|
||||
</servlet-mapping>
|
||||
<session-config>
|
||||
<session-timeout>
|
||||
30
|
||||
</session-timeout>
|
||||
</session-config>
|
||||
</web-app>
|
22
contrib/server/Translation-web/web/css/common.css
Executable file
@ -0,0 +1,22 @@
|
||||
/*
|
||||
Document : common
|
||||
Created on : Jul 31, 2012, 11:53:29 AM
|
||||
Author : ulanov
|
||||
Description:
|
||||
Purpose of the stylesheet follows.
|
||||
*/
|
||||
|
||||
root {
|
||||
display: block;
|
||||
}
|
||||
|
||||
body {font-size:small; font-family: Verdana,Arial,sans-serif;height:auto; width: auto;}
|
||||
span {font-size:medium;}
|
||||
|
||||
#north_tab {height: 10%; width: 100%; float: top;}
|
||||
#south_tab {height: 80%; width: 100%; float: bottom;}
|
||||
|
||||
#input_text {height: 50%; width: 30%; margin-right: 10px; float: left;}
|
||||
#output_text {height: 50%; width: 30%; margin-right: 10px; float: left;}
|
||||
|
||||
#translate {float: left; margin-right: 10px;}
|
47
contrib/server/Translation-web/web/index.html
Executable file
@ -0,0 +1,47 @@
|
||||
<html lang="fr">
|
||||
<head>
|
||||
<style>
|
||||
</style>
|
||||
<link href="http://ajax.googleapis.com/ajax/libs/jqueryui/1.8/themes/base/jquery-ui.css" rel="stylesheet"
|
||||
type="text/css"/>
|
||||
<script src="lib/jquery-1.6.4.js" type="text/javascript"></script>
|
||||
<script src="lib/jquery-ui-1.8.16.custom.js" type="text/javascript"></script>
|
||||
|
||||
<link rel="stylesheet" href="css/common.css" type="text/css"/>
|
||||
<script>
|
||||
$(document).ready(function () {
|
||||
$( "input:submit").button();
|
||||
$( "input:submit").click(function(){
|
||||
$.ajax({
|
||||
url: "Translate",
|
||||
type: "POST",
|
||||
context: document.body,
|
||||
data: {text: $("#input_text").val()}
|
||||
}).done(function(data) {
|
||||
$("#output_text").val(data);
|
||||
});
|
||||
})
|
||||
|
||||
|
||||
|
||||
});
|
||||
</script>
|
||||
<title>Translate FR-EN</title>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
|
||||
</head>
|
||||
<body>
|
||||
<div id="north_tab">
|
||||
<h2>Translate FR-EN</h2>
|
||||
</div>
|
||||
<div id="south_tab">
|
||||
<textarea id="input_text">
|
||||
</textarea>
|
||||
|
||||
<input id="translate" type="submit" value="Translate">
|
||||
|
||||
<textarea id="output_text" readonly="readonly">
|
||||
</textarea>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
9046
contrib/server/Translation-web/web/lib/jquery-1.6.4.js
vendored
Executable file
11769
contrib/server/Translation-web/web/lib/jquery-ui-1.8.16.custom.js
vendored
Executable file
@ -192,7 +192,8 @@ public:
|
||||
staticData.GetInputFactorOrder();
|
||||
stringstream in(source + "\n");
|
||||
sentence.Read(in,inputFactorOrder);
|
||||
Manager manager(sentence,staticData.GetSearchAlgorithm(), &system);
|
||||
size_t lineNumber = 0; // TODO: Include sentence request number here?
|
||||
Manager manager(lineNumber, sentence, staticData.GetSearchAlgorithm(), &system);
|
||||
manager.ProcessSentence();
|
||||
const Hypothesis* hypo = manager.GetBestHypothesis();
|
||||
|
||||
@ -367,7 +368,7 @@ int main(int argc, char** argv)
|
||||
params->Explain();
|
||||
exit(1);
|
||||
}
|
||||
if (!StaticData::LoadDataStatic(params)) {
|
||||
if (!StaticData::LoadDataStatic(params, argv[0])) {
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
@ -99,10 +99,15 @@ PTEntry::PTEntry(const std::string& str, int index) :
|
||||
|
||||
pos = nextPos + SEPARATOR.size();
|
||||
nextPos = str.find(SEPARATOR, pos);
|
||||
this->scores = str.substr(pos,nextPos-pos);
|
||||
if (nextPos < str.size()) {
|
||||
this->scores = str.substr(pos,nextPos-pos);
|
||||
|
||||
pos = nextPos + SEPARATOR.size();
|
||||
this->extra = str.substr(pos);
|
||||
pos = nextPos + SEPARATOR.size();
|
||||
this->extra = str.substr(pos);
|
||||
}
|
||||
else {
|
||||
this->scores = str.substr(pos,str.size()-pos);
|
||||
}
|
||||
|
||||
int c = 0;
|
||||
std::string::iterator i=scores.begin();
|
||||
|
@ -153,7 +153,7 @@ class Moses():
|
||||
self.phrase_target[target][i] = 1
|
||||
|
||||
|
||||
def load_reordering_probabilities(self,line,priority,i,store='pairs'):
|
||||
def load_reordering_probabilities(self,line,priority,i,**unused):
|
||||
"""take single reordering table line and store probablities in internal data structure"""
|
||||
|
||||
src = line[0]
|
||||
@ -162,9 +162,12 @@ class Moses():
|
||||
model_probabilities = map(float,line[2].split())
|
||||
reordering_probabilities = self.reordering_pairs[src][target]
|
||||
|
||||
for j,p in enumerate(model_probabilities):
|
||||
reordering_probabilities[j][i] = p
|
||||
|
||||
try:
|
||||
for j,p in enumerate(model_probabilities):
|
||||
reordering_probabilities[j][i] = p
|
||||
except IndexError:
|
||||
sys.stderr.write('\nIndexError: Did you correctly specify the number of reordering features? (--number_of_features N in command line)\n')
|
||||
exit()
|
||||
|
||||
def traverse_incrementally(self,table,models,load_lines,store_flag,mode='interpolate',inverted=False,lowmem=False,flags=None):
|
||||
"""hack-ish way to find common phrase pairs in multiple models in one traversal without storing it all in memory
|
||||
@ -217,11 +220,11 @@ class Moses():
|
||||
|
||||
a, b, prob = line.split(b' ')
|
||||
|
||||
if side == 'e2f' and not e2f_filter or a in e2f_filter and b in e2f_filter[a]:
|
||||
if side == 'e2f' and (not e2f_filter or a in e2f_filter and b in e2f_filter[a]):
|
||||
|
||||
self.word_pairs_e2f[a][b][i] = float(prob)
|
||||
|
||||
elif side == 'f2e' and not f2e_filter or a in f2e_filter and b in f2e_filter[a]:
|
||||
elif side == 'f2e' and (not f2e_filter or a in f2e_filter and b in f2e_filter[a]):
|
||||
|
||||
self.word_pairs_f2e[a][b][i] = float(prob)
|
||||
|
||||
@ -419,7 +422,7 @@ class Moses():
|
||||
if 0 in features:
|
||||
return ''
|
||||
|
||||
features = b' '.join([b'%6g' %(f) for f in features])
|
||||
features = b' '.join([b'%.6g' %(f) for f in features])
|
||||
|
||||
line = b"%s ||| %s ||| %s\n" %(src,target,features)
|
||||
return line
|
||||
|
@ -4,6 +4,7 @@ import os ;
|
||||
import path ;
|
||||
import project ;
|
||||
import build-system ;
|
||||
import version ;
|
||||
|
||||
#Shell with trailing line removed http://lists.boost.org/boost-build/2007/08/17051.php
|
||||
rule trim-nl ( str extras * ) {
|
||||
@ -51,13 +52,25 @@ rule test_library ( name ) {
|
||||
constant CLEANING : $(cleaning) ;
|
||||
}
|
||||
|
||||
requirements = ;
|
||||
|
||||
FORCE-STATIC = [ option.get "static" : : "yes" ] ;
|
||||
if $(FORCE-STATIC) {
|
||||
requirements += <runtime-link>static ;
|
||||
}
|
||||
|
||||
#Determine if a library can be compiled statically.
|
||||
rule auto-shared ( name : additional * ) {
|
||||
additional ?= "" ;
|
||||
if [ test_flags $(additional)" -static -l"$(name) ] {
|
||||
return ;
|
||||
} else {
|
||||
return "<link>shared" ;
|
||||
if $(FORCE-STATIC) {
|
||||
echo "Could not statically link against lib $(name). Your build will probably fail." ;
|
||||
return ;
|
||||
} else {
|
||||
return "<link>shared" ;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -88,18 +101,16 @@ else {
|
||||
}
|
||||
}
|
||||
|
||||
#Are we linking static binaries against shared boost?
|
||||
boost-auto-shared = [ auto-shared "boost_program_options" : $(L-boost-search) ] ;
|
||||
#Convenience rule for boost libraries. Defines library boost_$(name).
|
||||
rule boost-lib ( name macro : deps * ) {
|
||||
#Link multi-threaded programs against the -mt version if available. Old
|
||||
#versions of boost do not have -mt tagged versions of all libraries. Sadly,
|
||||
#boost.jam does not handle this correctly.
|
||||
if [ test_flags $(L-boost-search)" -lboost_"$(name)"-mt" ] {
|
||||
lib inner_boost_$(name) : $(deps) : <threading>single $(boost-search) <name>boost_$(name) ;
|
||||
lib inner_boost_$(name) : $(deps) : <threading>multi $(boost-search) <name>boost_$(name)-mt ;
|
||||
if [ test_flags $(L-boost-search)" -lboost_"$(name)"-mt$(boost-lib-version)" ] {
|
||||
lib inner_boost_$(name) : : <threading>single $(boost-search) <name>boost_$(name)$(boost-lib-version) : : <library>$(deps) ;
|
||||
lib inner_boost_$(name) : : <threading>multi $(boost-search) <name>boost_$(name)-mt$(boost-lib-version) : : <library>$(deps) ;
|
||||
} else {
|
||||
lib inner_boost_$(name) : $(deps) : $(boost-search) <name>boost_$(name) ;
|
||||
lib inner_boost_$(name) : : $(boost-search) <name>boost_$(name)$(boost-lib-version) : : <library>$(deps) ;
|
||||
}
|
||||
|
||||
alias boost_$(name) : inner_boost_$(name) : $(boost-auto-shared) : : <link>shared:<define>BOOST_$(macro) $(boost-include) ;
|
||||
@ -107,7 +118,7 @@ rule boost-lib ( name macro : deps * ) {
|
||||
|
||||
#Argument is e.g. 103600
|
||||
rule boost ( min-version ) {
|
||||
local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_VERSION '\"" ;
|
||||
local cmd = "bash -c \"g++ "$(I-boost-include)" -dM -x c++ -E /dev/null -include boost/version.hpp 2>/dev/null |grep '#define BOOST_'\"" ;
|
||||
local boost-shell = [ SHELL "$(cmd)" : exit-status ] ;
|
||||
if $(boost-shell[2]) != 0 && $(CLEANING) = no {
|
||||
echo Failed to run "$(cmd)" ;
|
||||
@ -117,12 +128,24 @@ rule boost ( min-version ) {
|
||||
if $(boost-version) < $(min-version) && $(CLEANING) = no {
|
||||
exit You have Boost $(boost-version). This package requires Boost at least $(min-version) (and preferably newer). : 1 ;
|
||||
}
|
||||
# If matching version tags exist, use them.
|
||||
boost-lib-version = [ MATCH "#define BOOST_LIB_VERSION \"([^\"]*)\"" : $(boost-shell[1]) ] ;
|
||||
if [ test_flags $(L-boost-search)" -lboost_program_options-"$(boost-lib-version) ] {
|
||||
boost-lib-version = "-"$(boost-lib-version) ;
|
||||
} else {
|
||||
boost-lib-version = "" ;
|
||||
}
|
||||
|
||||
#Are we linking static binaries against shared boost?
|
||||
boost-auto-shared = [ auto-shared "boost_program_options"$(boost-lib-version) : $(L-boost-search) ] ;
|
||||
|
||||
#See tools/build/v2/contrib/boost.jam in a boost distribution for a table of macros to define.
|
||||
boost-lib system SYSTEM_DYN_LINK ;
|
||||
boost-lib thread THREAD_DYN_DLL : boost_system ;
|
||||
boost-lib program_options PROGRAM_OPTIONS_DYN_LINK ;
|
||||
boost-lib unit_test_framework TEST_DYN_LINK ;
|
||||
boost-lib iostreams IOSTREAMS_DYN_LINK ;
|
||||
boost-lib filesystem FILE_SYSTEM_DYN_LINK ;
|
||||
}
|
||||
|
||||
#Link normally to a library, but sometimes static isn't installed so fall back to dynamic.
|
||||
@ -148,11 +171,10 @@ rule external-lib ( name : search-path * ) {
|
||||
local ignored = @($(build-log):E=$(script)) ;
|
||||
}
|
||||
|
||||
requirements = ;
|
||||
{
|
||||
#Boost jam's static clang for Linux is buggy.
|
||||
requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ;
|
||||
#Boost jam's static clang for Linux is buggy.
|
||||
requirements += <cxxflags>$(cxxflags) <cflags>$(cflags) <linkflags>$(ldflags) <os>LINUX,<toolset>clang:<link>shared ;
|
||||
|
||||
if ! [ option.get "without-libsegfault" : : "yes" ] && ! $(FORCE-STATIC) {
|
||||
#libSegFault prints a stack trace on segfault. Link against it if available.
|
||||
if [ test_flags "-lSegFault" ] {
|
||||
external-lib SegFault ;
|
||||
@ -244,3 +266,12 @@ rule always-if-changed ( file current : targets * ) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if [ option.get "sanity-test" : : "yes" ] {
|
||||
local current_version = [ modules.peek : JAM_VERSION ] ;
|
||||
if ( $(current_version[0]) < 2000 && [ version.check-jam-version 3 1 16 ] ) || [ version.check-jam-version 2011 0 0 ] {
|
||||
EXIT "Sane" : 0 ;
|
||||
} else {
|
||||
EXIT "Bad" : 1 ;
|
||||
}
|
||||
}
|
||||
|
14
lm/Jamfile
@ -1,4 +1,14 @@
|
||||
lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc value_build.cc virtual_interface.cc vocab.cc ../util//kenutil : <include>.. : : <include>.. <library>../util//kenutil ;
|
||||
# If you need higher order, change this option
|
||||
# Having this limit means that State can be
|
||||
# (KENLM_MAX_ORDER - 1) * sizeof(float) bytes instead of
|
||||
# sizeof(float*) + (KENLM_MAX_ORDER - 1) * sizeof(float) + malloc overhead
|
||||
max-order = [ option.get "max-kenlm-order" : 6 : 6 ] ;
|
||||
if ( $(max-order) != 6 ) {
|
||||
echo "Setting KenLM maximum n-gram order to $(max-order)" ;
|
||||
}
|
||||
max-order = <define>KENLM_MAX_ORDER=$(max-order) ;
|
||||
|
||||
lib kenlm : bhiksha.cc binary_format.cc config.cc lm_exception.cc model.cc quantize.cc read_arpa.cc search_hashed.cc search_trie.cc trie.cc trie_sort.cc value_build.cc virtual_interface.cc vocab.cc ../util//kenutil : <include>.. $(max-order) : : <include>.. <library>../util//kenutil $(max-order) ;
|
||||
|
||||
import testing ;
|
||||
|
||||
@ -7,4 +17,4 @@ run model_test.cc ../util//kenutil kenlm ..//boost_unit_test_framework : : test.
|
||||
|
||||
exe query : ngram_query.cc kenlm ../util//kenutil ;
|
||||
exe build_binary : build_binary.cc kenlm ../util//kenutil ;
|
||||
|
||||
exe kenlm_max_order : max_order.cc : $(max-order) ;
|
||||
|
@ -38,7 +38,6 @@
|
||||
#ifndef LM_LEFT__
|
||||
#define LM_LEFT__
|
||||
|
||||
#include "lm/max_order.hh"
|
||||
#include "lm/state.hh"
|
||||
#include "lm/return.hh"
|
||||
|
||||
@ -111,7 +110,7 @@ template <class M> class RuleScore {
|
||||
return;
|
||||
}
|
||||
|
||||
float backoffs[kMaxOrder - 1], backoffs2[kMaxOrder - 1];
|
||||
float backoffs[KENLM_MAX_ORDER - 1], backoffs2[KENLM_MAX_ORDER - 1];
|
||||
float *back = backoffs, *back2 = backoffs2;
|
||||
unsigned char next_use = out_.right.length;
|
||||
|
||||
|
@ -16,13 +16,16 @@ namespace {
|
||||
#define Term(word) score.Terminal(m.GetVocabulary().Index(word));
|
||||
#define VCheck(word, value) BOOST_CHECK_EQUAL(m.GetVocabulary().Index(word), value);
|
||||
|
||||
// Apparently some Boost versions use templates and are pretty strict about types matching.
|
||||
#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
|
||||
|
||||
template <class M> void Short(const M &m) {
|
||||
ChartState base;
|
||||
{
|
||||
RuleScore<M> score(m, base);
|
||||
Term("more");
|
||||
Term("loin");
|
||||
BOOST_CHECK_CLOSE(-1.206319 - 0.3561665, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.206319 - 0.3561665, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK(base.left.full);
|
||||
BOOST_CHECK_EQUAL(2, base.left.length);
|
||||
@ -35,7 +38,7 @@ template <class M> void Short(const M &m) {
|
||||
Term("little");
|
||||
score.NonTerminal(base, -1.206319 - 0.3561665);
|
||||
// p(little more loin | null context)
|
||||
BOOST_CHECK_CLOSE(-1.56538, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.56538, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(3, more_left.left.length);
|
||||
BOOST_CHECK_EQUAL(1, more_left.right.length);
|
||||
@ -47,7 +50,7 @@ template <class M> void Short(const M &m) {
|
||||
RuleScore<M> score(m, shorter);
|
||||
Term("to");
|
||||
score.NonTerminal(base, -1.206319 - 0.3561665);
|
||||
BOOST_CHECK_CLOSE(-0.30103 - 1.687872 - 1.206319 - 0.3561665, score.Finish(), 0.01);
|
||||
SLOPPY_CHECK_CLOSE(-0.30103 - 1.687872 - 1.206319 - 0.3561665, score.Finish(), 0.01);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(1, shorter.left.length);
|
||||
BOOST_CHECK_EQUAL(1, shorter.right.length);
|
||||
@ -61,7 +64,7 @@ template <class M> void Charge(const M &m) {
|
||||
RuleScore<M> score(m, base);
|
||||
Term("on");
|
||||
Term("more");
|
||||
BOOST_CHECK_CLOSE(-1.509559 -0.4771212 -1.206319, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.509559 -0.4771212 -1.206319, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(1, base.left.length);
|
||||
BOOST_CHECK_EQUAL(1, base.right.length);
|
||||
@ -73,7 +76,7 @@ template <class M> void Charge(const M &m) {
|
||||
RuleScore<M> score(m, extend);
|
||||
Term("looking");
|
||||
score.NonTerminal(base, -1.509559 -0.4771212 -1.206319);
|
||||
BOOST_CHECK_CLOSE(-3.91039, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-3.91039, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(2, extend.left.length);
|
||||
BOOST_CHECK_EQUAL(1, extend.right.length);
|
||||
@ -85,7 +88,7 @@ template <class M> void Charge(const M &m) {
|
||||
RuleScore<M> score(m, tobos);
|
||||
score.BeginSentence();
|
||||
score.NonTerminal(extend, -3.91039);
|
||||
BOOST_CHECK_CLOSE(-3.471169, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-3.471169, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(0, tobos.left.length);
|
||||
BOOST_CHECK_EQUAL(1, tobos.right.length);
|
||||
@ -169,8 +172,8 @@ template <class M> void LookupVocab(const M &m, const StringPiece &str, std::vec
|
||||
#define TEXT_TEST(str) \
|
||||
LookupVocab(m, str, words); \
|
||||
expect = LeftToRight(m, words, rest); \
|
||||
BOOST_CHECK_CLOSE(expect, RightToLeft(m, words, rest), 0.001); \
|
||||
BOOST_CHECK_CLOSE(expect, TreeMiddle(m, words, rest), 0.001); \
|
||||
SLOPPY_CHECK_CLOSE(expect, RightToLeft(m, words, rest), 0.001); \
|
||||
SLOPPY_CHECK_CLOSE(expect, TreeMiddle(m, words, rest), 0.001); \
|
||||
|
||||
// Build sentences, or parts thereof, from right to left.
|
||||
template <class M> void GrowBig(const M &m, bool rest = false) {
|
||||
@ -202,20 +205,20 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
|
||||
{
|
||||
RuleScore<M> score(m, also);
|
||||
score.Terminal(m.GetVocabulary().Index("also"));
|
||||
BOOST_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
|
||||
}
|
||||
ChartState would;
|
||||
{
|
||||
RuleScore<M> score(m, would);
|
||||
score.Terminal(m.GetVocabulary().Index("would"));
|
||||
BOOST_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
|
||||
}
|
||||
ChartState combine_also_would;
|
||||
{
|
||||
RuleScore<M> score(m, combine_also_would);
|
||||
score.NonTerminal(also, -1.687872);
|
||||
score.NonTerminal(would, -1.687872);
|
||||
BOOST_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(2, combine_also_would.right.length);
|
||||
|
||||
@ -224,7 +227,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
|
||||
RuleScore<M> score(m, also_would);
|
||||
score.Terminal(m.GetVocabulary().Index("also"));
|
||||
score.Terminal(m.GetVocabulary().Index("would"));
|
||||
BOOST_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.687872 - 2.0, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(2, also_would.right.length);
|
||||
|
||||
@ -232,7 +235,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
|
||||
{
|
||||
RuleScore<M> score(m, consider);
|
||||
score.Terminal(m.GetVocabulary().Index("consider"));
|
||||
BOOST_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.687872, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(1, consider.left.length);
|
||||
BOOST_CHECK_EQUAL(1, consider.right.length);
|
||||
@ -245,19 +248,19 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
|
||||
score.Terminal(m.GetVocabulary().Index("higher"));
|
||||
higher_score = score.Finish();
|
||||
}
|
||||
BOOST_CHECK_CLOSE(-1.509559, higher_score, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.509559, higher_score, 0.001);
|
||||
BOOST_CHECK_EQUAL(1, higher.left.length);
|
||||
BOOST_CHECK_EQUAL(1, higher.right.length);
|
||||
BOOST_CHECK(!higher.left.full);
|
||||
VCheck("higher", higher.right.words[0]);
|
||||
BOOST_CHECK_CLOSE(-0.30103, higher.right.backoff[0], 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.30103, higher.right.backoff[0], 0.001);
|
||||
|
||||
ChartState consider_higher;
|
||||
{
|
||||
RuleScore<M> score(m, consider_higher);
|
||||
score.NonTerminal(consider, -1.687872);
|
||||
score.NonTerminal(higher, higher_score);
|
||||
BOOST_CHECK_CLOSE(-1.509559 - 1.687872 - 0.30103, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-1.509559 - 1.687872 - 0.30103, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(2, consider_higher.left.length);
|
||||
BOOST_CHECK(!consider_higher.left.full);
|
||||
@ -267,7 +270,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
|
||||
RuleScore<M> score(m, full);
|
||||
score.NonTerminal(combine_also_would, -1.687872 - 2.0);
|
||||
score.NonTerminal(consider_higher, -1.509559 - 1.687872 - 0.30103);
|
||||
BOOST_CHECK_CLOSE(-10.6879, score.Finish(), 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-10.6879, score.Finish(), 0.001);
|
||||
}
|
||||
BOOST_CHECK_EQUAL(4, full.right.length);
|
||||
}
|
||||
@ -277,7 +280,7 @@ template <class M> void AlsoWouldConsiderHigher(const M &m) {
|
||||
float got = val; \
|
||||
std::vector<WordIndex> indices; \
|
||||
LookupVocab(m, str, indices); \
|
||||
BOOST_CHECK_CLOSE(LeftToRight(m, indices), got, 0.001); \
|
||||
SLOPPY_CHECK_CLOSE(LeftToRight(m, indices), got, 0.001); \
|
||||
}
|
||||
|
||||
template <class M> void FullGrow(const M &m) {
|
||||
|
5
lm/max_order.cc
Normal file
@ -0,0 +1,5 @@
|
||||
#include <iostream>
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
std::cerr << "KenLM was compiled with a maximum supported n-gram order set to " << KENLM_MAX_ORDER << "." << std::endl;
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
#ifndef LM_MAX_ORDER__
|
||||
#define LM_MAX_ORDER__
|
||||
namespace lm {
|
||||
namespace ngram {
|
||||
// If you need higher order, change this and recompile.
|
||||
// Having this limit means that State can be
|
||||
// (kMaxOrder - 1) * sizeof(float) bytes instead of
|
||||
// sizeof(float*) + (kMaxOrder - 1) * sizeof(float) + malloc overhead
|
||||
const unsigned char kMaxOrder = 6;
|
||||
|
||||
} // namespace ngram
|
||||
} // namespace lm
|
||||
|
||||
#endif // LM_MAX_ORDER__
|
@ -48,6 +48,7 @@ template <class Search, class VocabularyT> GenericModel<Search, VocabularyT>::Ge
|
||||
}
|
||||
|
||||
template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT>::InitializeFromBinary(void *start, const Parameters ¶ms, const Config &config, int fd) {
|
||||
UTIL_THROW_IF(params.counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << params.counts.size() << ". Re-compile (use -a), passing a number at least this large to bjam's --max-kenlm-order flag.");
|
||||
SetupMemory(start, params.counts, config);
|
||||
vocab_.LoadedBinary(params.fixed.has_vocabulary, fd, config.enumerate_vocab);
|
||||
search_.LoadedBinary();
|
||||
@ -61,7 +62,7 @@ template <class Search, class VocabularyT> void GenericModel<Search, VocabularyT
|
||||
// File counts do not include pruned trigrams that extend to quadgrams etc. These will be fixed by search_.
|
||||
ReadARPACounts(f, counts);
|
||||
|
||||
if (counts.size() > kMaxOrder) UTIL_THROW(FormatLoadException, "This model has order " << counts.size() << ". Edit lm/max_order.hh, set kMaxOrder to at least this value, and recompile.");
|
||||
UTIL_THROW_IF(counts.size() > KENLM_MAX_ORDER, FormatLoadException, "This model has order " << counts.size() << ". Re-compile (use -a), passing a number at least this large to bjam's --max-kenlm-order flag.");
|
||||
if (counts.size() < 2) UTIL_THROW(FormatLoadException, "This ngram implementation assumes at least a bigram model.");
|
||||
if (config.probing_multiplier <= 1.0) UTIL_THROW(ConfigException, "probing multiplier must be > 1.0");
|
||||
|
||||
@ -123,7 +124,7 @@ template <class Search, class VocabularyT> FullScoreReturn GenericModel<Search,
|
||||
return ret;
|
||||
}
|
||||
// i is the order of the backoff we're looking for.
|
||||
unsigned char order_minus_2 = 0;
|
||||
unsigned char order_minus_2 = start - 2;
|
||||
for (const WordIndex *i = context_rbegin + start - 1; i < context_rend; ++i, ++order_minus_2) {
|
||||
typename Search::MiddlePointer p(search_.LookupMiddle(order_minus_2, *i, node, independent_left, extend_left));
|
||||
if (!p.Found()) break;
|
||||
|
@ -5,7 +5,6 @@
|
||||
#include "lm/binary_format.hh"
|
||||
#include "lm/config.hh"
|
||||
#include "lm/facade.hh"
|
||||
#include "lm/max_order.hh"
|
||||
#include "lm/quantize.hh"
|
||||
#include "lm/search_hashed.hh"
|
||||
#include "lm/search_trie.hh"
|
||||
|
@ -6,6 +6,9 @@
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <boost/test/floating_point_comparison.hpp>
|
||||
|
||||
// Apparently some Boost versions use templates and are pretty strict about types matching.
|
||||
#define SLOPPY_CHECK_CLOSE(ref, value, tol) BOOST_CHECK_CLOSE(static_cast<double>(ref), static_cast<double>(value), static_cast<double>(tol));
|
||||
|
||||
namespace lm {
|
||||
namespace ngram {
|
||||
|
||||
@ -46,7 +49,7 @@ template <class Model> State GetState(const Model &model, const char *word, cons
|
||||
state, \
|
||||
model.GetVocabulary().Index(word), \
|
||||
out);\
|
||||
BOOST_CHECK_CLOSE(score, ret.prob, 0.001); \
|
||||
SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
|
||||
BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
|
||||
BOOST_CHECK_GE(std::min<unsigned char>(ngram, 5 - 1), out.length); \
|
||||
BOOST_CHECK_EQUAL(indep_left, ret.independent_left); \
|
||||
@ -176,14 +179,14 @@ template <class M> void ExtendLeftTest(const M &model) {
|
||||
State right;
|
||||
FullScoreReturn little(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("little"), right));
|
||||
const float kLittleProb = -1.285941;
|
||||
BOOST_CHECK_CLOSE(kLittleProb, little.prob, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(kLittleProb, little.prob, 0.001);
|
||||
unsigned char next_use;
|
||||
float backoff_out[4];
|
||||
|
||||
FullScoreReturn extend_none(model.ExtendLeft(NULL, NULL, NULL, little.extend_left, 1, NULL, next_use));
|
||||
BOOST_CHECK_EQUAL(0, next_use);
|
||||
BOOST_CHECK_EQUAL(little.extend_left, extend_none.extend_left);
|
||||
BOOST_CHECK_CLOSE(little.prob - little.rest, extend_none.prob, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(little.prob - little.rest, extend_none.prob, 0.001);
|
||||
BOOST_CHECK_EQUAL(1, extend_none.ngram_length);
|
||||
|
||||
const WordIndex a = model.GetVocabulary().Index("a");
|
||||
@ -191,16 +194,16 @@ template <class M> void ExtendLeftTest(const M &model) {
|
||||
// a little
|
||||
FullScoreReturn extend_a(model.ExtendLeft(&a, &a + 1, &backoff_in, little.extend_left, 1, backoff_out, next_use));
|
||||
BOOST_CHECK_EQUAL(1, next_use);
|
||||
BOOST_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.09132547 - little.rest, extend_a.prob, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.09132547 - little.rest, extend_a.prob, 0.001);
|
||||
BOOST_CHECK_EQUAL(2, extend_a.ngram_length);
|
||||
BOOST_CHECK(!extend_a.independent_left);
|
||||
|
||||
const WordIndex on = model.GetVocabulary().Index("on");
|
||||
FullScoreReturn extend_on(model.ExtendLeft(&on, &on + 1, &backoff_in, extend_a.extend_left, 2, backoff_out, next_use));
|
||||
BOOST_CHECK_EQUAL(1, next_use);
|
||||
BOOST_CHECK_CLOSE(-0.4771212, backoff_out[0], 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.0283603 - (extend_a.rest + little.rest), extend_on.prob, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[0], 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.0283603 - (extend_a.rest + little.rest), extend_on.prob, 0.001);
|
||||
BOOST_CHECK_EQUAL(3, extend_on.ngram_length);
|
||||
BOOST_CHECK(!extend_on.independent_left);
|
||||
|
||||
@ -208,9 +211,9 @@ template <class M> void ExtendLeftTest(const M &model) {
|
||||
float backoff_in_arr[4];
|
||||
FullScoreReturn extend_both(model.ExtendLeft(both, both + 2, backoff_in_arr, little.extend_left, 1, backoff_out, next_use));
|
||||
BOOST_CHECK_EQUAL(2, next_use);
|
||||
BOOST_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.4771212, backoff_out[1], 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.0283603 - little.rest, extend_both.prob, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.69897, backoff_out[0], 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.4771212, backoff_out[1], 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.0283603 - little.rest, extend_both.prob, 0.001);
|
||||
BOOST_CHECK_EQUAL(3, extend_both.ngram_length);
|
||||
BOOST_CHECK(!extend_both.independent_left);
|
||||
BOOST_CHECK_EQUAL(extend_on.extend_left, extend_both.extend_left);
|
||||
@ -218,12 +221,12 @@ template <class M> void ExtendLeftTest(const M &model) {
|
||||
|
||||
#define StatelessTest(word, provide, ngram, score) \
|
||||
ret = model.FullScoreForgotState(indices + num_words - word, indices + num_words - word + provide, indices[num_words - word - 1], state); \
|
||||
BOOST_CHECK_CLOSE(score, ret.prob, 0.001); \
|
||||
SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
|
||||
BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length); \
|
||||
model.GetState(indices + num_words - word, indices + num_words - word + provide, before); \
|
||||
ret = model.FullScore(before, indices[num_words - word - 1], out); \
|
||||
BOOST_CHECK(state == out); \
|
||||
BOOST_CHECK_CLOSE(score, ret.prob, 0.001); \
|
||||
SLOPPY_CHECK_CLOSE(score, ret.prob, 0.001); \
|
||||
BOOST_CHECK_EQUAL(static_cast<unsigned int>(ngram), ret.ngram_length);
|
||||
|
||||
template <class M> void Stateless(const M &model) {
|
||||
@ -238,7 +241,7 @@ template <class M> void Stateless(const M &model) {
|
||||
State state, out, before;
|
||||
|
||||
ret = model.FullScoreForgotState(indices + num_words - 1, indices + num_words, indices[num_words - 2], state);
|
||||
BOOST_CHECK_CLOSE(-0.484652, ret.prob, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.484652, ret.prob, 0.001);
|
||||
StatelessTest(1, 1, 2, -0.484652);
|
||||
|
||||
// looking
|
||||
@ -276,7 +279,7 @@ template <class M> void NoUnkCheck(const M &model) {
|
||||
State state;
|
||||
|
||||
FullScoreReturn ret = model.FullScoreForgotState(&unk_index, &unk_index + 1, unk_index, state);
|
||||
BOOST_CHECK_CLOSE(-100.0, ret.prob, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-100.0, ret.prob, 0.001);
|
||||
}
|
||||
|
||||
template <class M> void Everything(const M &m) {
|
||||
@ -426,8 +429,8 @@ BOOST_AUTO_TEST_CASE(rest_max) {
|
||||
RestProbingModel model(TestLocation(), config);
|
||||
State state, out;
|
||||
FullScoreReturn ret(model.FullScore(model.NullContextState(), model.GetVocabulary().Index("."), state));
|
||||
BOOST_CHECK_CLOSE(-0.2705918, ret.rest, 0.001);
|
||||
BOOST_CHECK_CLOSE(-0.01916512, model.FullScore(state, model.GetVocabulary().EndSentence(), out).rest, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.2705918, ret.rest, 0.001);
|
||||
SLOPPY_CHECK_CLOSE(-0.01916512, model.FullScore(state, model.GetVocabulary().EndSentence(), out).rest, 0.001);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -3,7 +3,6 @@
|
||||
|
||||
#include "lm/blank.hh"
|
||||
#include "lm/config.hh"
|
||||
#include "lm/max_order.hh"
|
||||
#include "lm/model_type.hh"
|
||||
#include "util/bit_packing.hh"
|
||||
|
||||
@ -217,7 +216,7 @@ class SeparatelyQuantize {
|
||||
const Bins &LongestTable() const { return longest_; }
|
||||
|
||||
private:
|
||||
Bins tables_[kMaxOrder - 1][2];
|
||||
Bins tables_[KENLM_MAX_ORDER - 1][2];
|
||||
|
||||
Bins longest_;
|
||||
|
||||
|