mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-28 14:32:38 +03:00
make filter-pt compile under windows
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@1454 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
af9488c7ba
commit
72d53aee46
220
sigtest-filter/XGetopt.cpp
Normal file
220
sigtest-filter/XGetopt.cpp
Normal file
@ -0,0 +1,220 @@
|
||||
// XGetopt.cpp Version 1.2
|
||||
//
|
||||
// Author: Hans Dietrich
|
||||
// hdietrich2@hotmail.com
|
||||
//
|
||||
// Description:
|
||||
// XGetopt.cpp implements getopt(), a function to parse command lines.
|
||||
//
|
||||
// History
|
||||
// Version 1.2 - 2003 May 17
|
||||
// - Added Unicode support
|
||||
//
|
||||
// Version 1.1 - 2002 March 10
|
||||
// - Added example to XGetopt.cpp module header
|
||||
//
|
||||
// This software is released into the public domain.
|
||||
// You are free to use it in any way you like.
|
||||
//
|
||||
// This software is provided "as is" with no expressed
|
||||
// or implied warranty. I accept no liability for any
|
||||
// damage or loss of business that this software may cause.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// if you are using precompiled headers then include this line:
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// if you are not using precompiled headers then include these lines:
|
||||
//#include <windows.h>
|
||||
//#include <stdio.h>
|
||||
//#include <tchar.h>
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "XGetopt.h"
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// X G e t o p t . c p p
|
||||
//
|
||||
//
|
||||
// NAME
|
||||
// getopt -- parse command line options
|
||||
//
|
||||
// SYNOPSIS
|
||||
// int getopt(int argc, char *argv[], char *optstring)
|
||||
//
|
||||
// extern char *optarg;
|
||||
// extern int optind;
|
||||
//
|
||||
// DESCRIPTION
|
||||
// The getopt() function parses the command line arguments. Its
|
||||
// arguments argc and argv are the argument count and array as
|
||||
// passed into the application on program invocation. In the case
|
||||
// of Visual C++ programs, argc and argv are available via the
|
||||
// variables __argc and __argv (double underscores), respectively.
|
||||
// getopt returns the next option letter in argv that matches a
|
||||
// letter in optstring. (Note: Unicode programs should use
|
||||
// __targv instead of __argv. Also, all character and string
|
||||
// literals should be enclosed in ( ) ).
|
||||
//
|
||||
// optstring is a string of recognized option letters; if a letter
|
||||
// is followed by a colon, the option is expected to have an argument
|
||||
// that may or may not be separated from it by white space. optarg
|
||||
// is set to point to the start of the option argument on return from
|
||||
// getopt.
|
||||
//
|
||||
// Option letters may be combined, e.g., "-ab" is equivalent to
|
||||
// "-a -b". Option letters are case sensitive.
|
||||
//
|
||||
// getopt places in the external variable optind the argv index
|
||||
// of the next argument to be processed. optind is initialized
|
||||
// to 0 before the first call to getopt.
|
||||
//
|
||||
// When all options have been processed (i.e., up to the first
|
||||
// non-option argument), getopt returns EOF, optarg will point
|
||||
// to the argument, and optind will be set to the argv index of
|
||||
// the argument. If there are no non-option arguments, optarg
|
||||
// will be set to NULL.
|
||||
//
|
||||
// The special option "--" may be used to delimit the end of the
|
||||
// options; EOF will be returned, and "--" (and everything after it)
|
||||
// will be skipped.
|
||||
//
|
||||
// RETURN VALUE
|
||||
// For option letters contained in the string optstring, getopt
|
||||
// will return the option letter. getopt returns a question mark (?)
|
||||
// when it encounters an option letter not included in optstring.
|
||||
// EOF is returned when processing is finished.
|
||||
//
|
||||
// BUGS
|
||||
// 1) Long options are not supported.
|
||||
// 2) The GNU double-colon extension is not supported.
|
||||
// 3) The environment variable POSIXLY_CORRECT is not supported.
|
||||
// 4) The + syntax is not supported.
|
||||
// 5) The automatic permutation of arguments is not supported.
|
||||
// 6) This implementation of getopt() returns EOF if an error is
|
||||
// encountered, instead of -1 as the latest standard requires.
|
||||
//
|
||||
// EXAMPLE
|
||||
// BOOL CMyApp::ProcessCommandLine(int argc, char *argv[])
|
||||
// {
|
||||
// int c;
|
||||
//
|
||||
// while ((c = getopt(argc, argv, ("aBn:"))) != EOF)
|
||||
// {
|
||||
// switch (c)
|
||||
// {
|
||||
// case ('a'):
|
||||
// TRACE(("option a\n"));
|
||||
// //
|
||||
// // set some flag here
|
||||
// //
|
||||
// break;
|
||||
//
|
||||
// case ('B'):
|
||||
// TRACE( ("option B\n"));
|
||||
// //
|
||||
// // set some other flag here
|
||||
// //
|
||||
// break;
|
||||
//
|
||||
// case ('n'):
|
||||
// TRACE(("option n: value=%d\n"), atoi(optarg));
|
||||
// //
|
||||
// // do something with value here
|
||||
// //
|
||||
// break;
|
||||
//
|
||||
// case ('?'):
|
||||
// TRACE(("ERROR: illegal option %s\n"), argv[optind-1]);
|
||||
// return FALSE;
|
||||
// break;
|
||||
//
|
||||
// default:
|
||||
// TRACE(("WARNING: no handler for option %c\n"), c);
|
||||
// return FALSE;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// //
|
||||
// // check for non-option args here
|
||||
// //
|
||||
// return TRUE;
|
||||
// }
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
char *optarg; // global argument pointer
|
||||
int optind = 0; // global argv index
|
||||
|
||||
int getopt(int argc, char *argv[], char *optstring)
|
||||
{
|
||||
static char *next = NULL;
|
||||
if (optind == 0)
|
||||
next = NULL;
|
||||
|
||||
optarg = NULL;
|
||||
|
||||
if (next == NULL || *next =='\0')
|
||||
{
|
||||
if (optind == 0)
|
||||
optind++;
|
||||
|
||||
if (optind >= argc || argv[optind][0] != ('-') || argv[optind][1] == ('\0'))
|
||||
{
|
||||
optarg = NULL;
|
||||
if (optind < argc)
|
||||
optarg = argv[optind];
|
||||
return EOF;
|
||||
}
|
||||
|
||||
if (strcmp(argv[optind], "--") == 0)
|
||||
{
|
||||
optind++;
|
||||
optarg = NULL;
|
||||
if (optind < argc)
|
||||
optarg = argv[optind];
|
||||
return EOF;
|
||||
}
|
||||
|
||||
next = argv[optind];
|
||||
next++; // skip past -
|
||||
optind++;
|
||||
}
|
||||
|
||||
char c = *next++;
|
||||
char *cp = strchr(optstring, c);
|
||||
|
||||
if (cp == NULL || c == (':'))
|
||||
return ('?');
|
||||
|
||||
cp++;
|
||||
if (*cp == (':'))
|
||||
{
|
||||
if (*next != ('\0'))
|
||||
{
|
||||
optarg = next;
|
||||
next = NULL;
|
||||
}
|
||||
else if (optind < argc)
|
||||
{
|
||||
optarg = argv[optind];
|
||||
optind++;
|
||||
}
|
||||
else
|
||||
{
|
||||
return ('?');
|
||||
}
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
23
sigtest-filter/XGetopt.h
Normal file
23
sigtest-filter/XGetopt.h
Normal file
@ -0,0 +1,23 @@
|
||||
// XGetopt.h Version 1.2
|
||||
//
|
||||
// Author: Hans Dietrich
|
||||
// hdietrich2@hotmail.com
|
||||
//
|
||||
// This software is released into the public domain.
|
||||
// You are free to use it in any way you like.
|
||||
//
|
||||
// This software is provided "as is" with no expressed
|
||||
// or implied warranty. I accept no liability for any
|
||||
// damage or loss of business that this software may cause.
|
||||
//
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#ifndef XGETOPT_H
|
||||
#define XGETOPT_H
|
||||
|
||||
extern int optind, opterr;
|
||||
extern char *optarg;
|
||||
|
||||
int getopt(int argc, char *argv[], char *optstring);
|
||||
|
||||
#endif //XGETOPT_H
|
@ -1,357 +1,364 @@
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "_SuffixArraySearchApplicationBase.h"
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
|
||||
typedef std::set<TextLenType> SentIdSet;
|
||||
typedef std::map<std::string, SentIdSet> PhraseSetMap;
|
||||
|
||||
// constants
|
||||
const size_t MINIMUM_SIZE_TO_KEEP = 10000; // reduce this to improve memory usage,
|
||||
// increase for speed
|
||||
const std::string SEPARATOR = " ||| ";
|
||||
|
||||
const double ALPHA_PLUS_EPS = -1000.0; // dummy value
|
||||
const double ALPHA_MINUS_EPS = -2000.0; // dummy value
|
||||
|
||||
// configuration params
|
||||
int pfe_filter_limit = 0; // 0 = don't filter anything based on P(f|e)
|
||||
bool print_cooc_counts = false; // add cooc counts to phrase table?
|
||||
bool print_neglog_significance = false; // add -log(p) to phrase table?
|
||||
double sig_filter_limit = 0; // keep phrase pairs with -log(sig) > sig_filter_limit
|
||||
// higher = filter-more
|
||||
|
||||
// globals
|
||||
PhraseSetMap esets;
|
||||
double p_111 = 0.0; // alpha
|
||||
size_t nremoved_sigfilter = 0;
|
||||
size_t nremoved_pfefilter = 0;
|
||||
|
||||
C_SuffixArraySearchApplicationBase e_sa;
|
||||
C_SuffixArraySearchApplicationBase f_sa;
|
||||
int num_lines;
|
||||
|
||||
void usage()
|
||||
{
|
||||
std::cerr << "\nFilter phrase table using significance testing as described\n"
|
||||
<< "in H. Johnson, et al. (2007) Improving Translation Quality\n"
|
||||
<< "by Discarding Most of the Phrasetable. EMNLP 2007.\n"
|
||||
<< "\nUsage:\n"
|
||||
<< "\n filter-pt -e english.suf-arr -f french.suf-arr\n"
|
||||
<< " [-c] [-p] [-l threshold] [-n num] < PHRASE-TABLE > FILTERED-PHRASE-TABLE\n\n"
|
||||
<< " [-l threshold] >0.0, a+e, or a-e: keep values that have a -log significance > this\n"
|
||||
<< " [-n num ] 0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements\n"
|
||||
<< " [-c ] add the cooccurence counts to the phrase table\n"
|
||||
<< " [-p ] add -log(significance) to the phrasetable\n\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
struct PTEntry {
|
||||
PTEntry(const std::string& str, int index);
|
||||
std::string f_phrase;
|
||||
std::string e_phrase;
|
||||
std::string extra;
|
||||
std::string scores;
|
||||
float pfe;
|
||||
int cf;
|
||||
int ce;
|
||||
int cfe;
|
||||
float nlog_pte;
|
||||
void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
|
||||
cfe = _cef;
|
||||
cf = _cf;
|
||||
ce = _ce;
|
||||
nlog_pte = nlp;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
PTEntry::PTEntry(const std::string& str, int index) :
|
||||
cf(0), ce(0), cfe(0), nlog_pte(0.0)
|
||||
{
|
||||
size_t pos = 0;
|
||||
std::string::size_type nextPos = str.find(SEPARATOR, pos);
|
||||
this->f_phrase = str.substr(pos,nextPos); pos = nextPos + SEPARATOR.size();
|
||||
nextPos = str.find(SEPARATOR, pos);
|
||||
this->e_phrase = str.substr(pos,nextPos-pos); pos = nextPos + SEPARATOR.size();
|
||||
nextPos = str.rfind(SEPARATOR);
|
||||
this->extra = str.substr(pos, nextPos-pos);
|
||||
this->scores = str.substr(nextPos + SEPARATOR.size(),std::string::npos);
|
||||
int c = 0;
|
||||
std::string::iterator i=scores.begin();
|
||||
if (index > 0) {
|
||||
for (; i != scores.end(); ++i) {
|
||||
if ((*i) == ' ') {
|
||||
c++;
|
||||
if (c == index) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
++i;
|
||||
char f[24];
|
||||
char *fp=f;
|
||||
while (i != scores.end() && *i != ' ') {
|
||||
*fp++=*i++;
|
||||
}
|
||||
*fp++=0;
|
||||
char *x;
|
||||
this->pfe = strtof(f, &x);
|
||||
// std::cerr << "L: " << f_phrase << " ::: " << e_phrase << " ::: " << scores << " ::: " << pfe << std::endl;
|
||||
// std::cerr << "X: " << extra << "\n";
|
||||
}
|
||||
|
||||
struct PfeComparer {
|
||||
bool operator()(const PTEntry* a, const PTEntry* b) const { return a->pfe > b->pfe; }
|
||||
};
|
||||
|
||||
struct NlogSigThresholder {
|
||||
NlogSigThresholder(float threshold) : t(threshold) {}
|
||||
float t;
|
||||
bool operator()(const PTEntry* a) const { if (a->nlog_pte < t) { delete a; return true; } else return false; }
|
||||
};
|
||||
|
||||
std::ostream& operator << (std::ostream& os, const PTEntry& pp)
|
||||
{
|
||||
os << pp.f_phrase << " ||| " << pp.e_phrase;
|
||||
if (pp.extra.size()>0) os << " ||| " << pp.extra;
|
||||
os << " ||| " << pp.scores;
|
||||
if (print_cooc_counts) os << " ||| " << pp.cfe << " " << pp.cf << " " << pp.ce;
|
||||
if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
|
||||
return os;
|
||||
}
|
||||
|
||||
// for an overview, see
|
||||
// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
|
||||
double log_gamma(int x)
|
||||
{
|
||||
// size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
|
||||
if (x <= 2) { return 0.0; }
|
||||
static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
|
||||
double tmp=(double)x+5.5;
|
||||
tmp -= (((double)x)+0.5)*log(tmp);
|
||||
double y=(double)x;
|
||||
double sum = 1.000000000190015;
|
||||
for (size_t j=0;j<6;++j) { sum += coefs[j]/++y; }
|
||||
return -tmp+log(2.5066282746310005*sum/(double)x);
|
||||
}
|
||||
|
||||
void print(int a, int b, int c, int d, float p) {
|
||||
std::cerr << a << "\t" << b << "\t P=" << p << "\n"
|
||||
<< c << "\t" << d << "\t xf=" << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
|
||||
}
|
||||
|
||||
// 2x2 (one-sided) Fisher's exact test
|
||||
// see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
|
||||
double fisher_exact(int cfe, int ce, int cf)
|
||||
{
|
||||
assert(cfe <= ce);
|
||||
assert(cfe <= cf);
|
||||
|
||||
int a = cfe;
|
||||
int b = (cf - cfe);
|
||||
int c = (ce - cfe);
|
||||
int d = (num_lines - ce - cf + cfe);
|
||||
int n = a + b + c + d;
|
||||
|
||||
double cp = exp(log_gamma(1+a+c) + log_gamma(1+b+d) + log_gamma(1+a+b) + log_gamma(1+c+d) - log_gamma(1+n) - log_gamma(1+a) - log_gamma(1+b) - log_gamma(1+c) - log_gamma(1+d));
|
||||
double total_p = 0.0;
|
||||
int tc = std::min(b,c);
|
||||
for (int i=0; i<=tc; i++) {
|
||||
total_p += cp;
|
||||
// double lg = log_gamma(1+a+c) + log_gamma(1+b+d) + log_gamma(1+a+b) + log_gamma(1+c+d) - log_gamma(1+n) - log_gamma(1+a) - log_gamma(1+b) - log_gamma(1+c) - log_gamma(1+d); double cp = exp(lg);
|
||||
// print(a,b,c,d,cp);
|
||||
double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
|
||||
cp *= coef;
|
||||
++a; --c; ++d; --b;
|
||||
}
|
||||
return total_p;
|
||||
}
|
||||
|
||||
// input: unordered list of translation options for a single source phrase
|
||||
void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
|
||||
{
|
||||
if (pfe_filter_limit>0 && options.size() > pfe_filter_limit) {
|
||||
nremoved_pfefilter += (options.size() - pfe_filter_limit);
|
||||
std::nth_element(options.begin(), options.begin()+pfe_filter_limit, options.end(), PfeComparer());
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin()+pfe_filter_limit; i != options.end(); ++i)
|
||||
delete *i;
|
||||
options.erase(options.begin()+pfe_filter_limit,options.end());
|
||||
}
|
||||
|
||||
SentIdSet fset;
|
||||
vector<S_SimplePhraseLocationElement> locations;
|
||||
//std::cerr << "Looking up f-phrase: " << options.front()->f_phrase << "\n";
|
||||
|
||||
locations = f_sa.locateExactPhraseInCorpus(options.front()->f_phrase.c_str());
|
||||
if(locations.size()==0){
|
||||
cerr<<"No occurrences found!!\n";
|
||||
}
|
||||
for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin();
|
||||
i != locations.end();
|
||||
++i)
|
||||
{
|
||||
fset.insert(i->sentIdInCorpus);
|
||||
}
|
||||
size_t cf = fset.size();
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
|
||||
const std::string& e_phrase = (*i)->e_phrase;
|
||||
size_t cef=0;
|
||||
SentIdSet& eset = esets[(*i)->e_phrase];
|
||||
if (eset.empty()) {
|
||||
//std::cerr << "Looking up e-phrase: " << e_phrase << "\n";
|
||||
vector<S_SimplePhraseLocationElement> locations = e_sa.locateExactPhraseInCorpus(e_phrase.c_str());
|
||||
for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin(); i!= locations.end(); ++i) {
|
||||
TextLenType curSentId = i->sentIdInCorpus;
|
||||
eset.insert(curSentId);
|
||||
}
|
||||
}
|
||||
size_t ce=eset.size();
|
||||
if (ce < cf) {
|
||||
for (SentIdSet::iterator i=eset.begin(); i != eset.end(); ++i) {
|
||||
if (fset.find(*i) != fset.end()) cef++;
|
||||
}
|
||||
} else {
|
||||
for (SentIdSet::iterator i=fset.begin(); i != fset.end(); ++i) {
|
||||
if (eset.find(*i) != eset.end()) cef++;
|
||||
}
|
||||
}
|
||||
double nlp = -log(fisher_exact(cef, cf, ce));
|
||||
(*i)->set_cooc_stats(cef, cf, ce, nlp);
|
||||
if (ce < MINIMUM_SIZE_TO_KEEP) {
|
||||
esets.erase(e_phrase);
|
||||
}
|
||||
}
|
||||
std::vector<PTEntry*>::iterator new_end =
|
||||
std::remove_if(options.begin(), options.end(), NlogSigThresholder(sig_filter_limit));
|
||||
nremoved_sigfilter += (options.end() - new_end);
|
||||
options.erase(new_end,options.end());
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[]){
|
||||
int c;
|
||||
const char* efile=0;
|
||||
const char* ffile=0;
|
||||
int pfe_index = 2;
|
||||
while ((c = getopt(argc, argv, "cpf:e:i:n:l:")) != -1) {
|
||||
switch (c) {
|
||||
case 'e':
|
||||
efile = optarg;
|
||||
break;
|
||||
case 'f':
|
||||
ffile = optarg;
|
||||
break;
|
||||
case 'i': // index of pfe in phrase table
|
||||
pfe_index = atoi(optarg);
|
||||
break;
|
||||
case 'n': // keep only the top n entries in phrase table sorted by p(f|e) (0=all)
|
||||
pfe_filter_limit = atoi(optarg);
|
||||
std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
|
||||
break;
|
||||
case 'c':
|
||||
print_cooc_counts = true;
|
||||
break;
|
||||
case 'p':
|
||||
print_neglog_significance = true;
|
||||
break;
|
||||
case 'l':
|
||||
std::cerr << "-l = " << optarg << "\n";
|
||||
if (strcmp(optarg,"a+e") == 0) {
|
||||
sig_filter_limit = ALPHA_PLUS_EPS;
|
||||
} else if (strcmp(optarg,"a-e") == 0) {
|
||||
sig_filter_limit = ALPHA_MINUS_EPS;
|
||||
} else {
|
||||
char *x;
|
||||
sig_filter_limit = strtod(optarg, &x);
|
||||
if (sig_filter_limit < 0.0) {
|
||||
std::cerr << "Filter limit (-l) must be either 'a+e', 'a-e' or a real number >= 0.0\n";
|
||||
usage();
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
}
|
||||
}
|
||||
//-----------------------------------------------------------------------------
|
||||
if (optind != argc || !efile || !ffile) {
|
||||
usage();
|
||||
}
|
||||
|
||||
//load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false)
|
||||
e_sa.loadData_forSearch(efile, false, false);
|
||||
f_sa.loadData_forSearch(ffile, false, false);
|
||||
size_t elines = e_sa.returnTotalSentNumber();
|
||||
size_t flines = f_sa.returnTotalSentNumber();
|
||||
if (elines != flines) {
|
||||
std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
|
||||
usage();
|
||||
} else {
|
||||
std::cerr << "Training corpus: " << elines << " lines\n";
|
||||
num_lines = elines;
|
||||
}
|
||||
p_111 = -log(fisher_exact(1,1,1));
|
||||
std::cerr << "\\alpha = " << p_111 << "\n";
|
||||
if (sig_filter_limit == ALPHA_MINUS_EPS) { sig_filter_limit = p_111 - 0.001; }
|
||||
else if (sig_filter_limit == ALPHA_PLUS_EPS) { sig_filter_limit = p_111 + 0.001; }
|
||||
std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
|
||||
|
||||
char tmpString[10000];
|
||||
std::string prev = "";
|
||||
std::vector<PTEntry*> options;
|
||||
size_t pt_lines = 0;
|
||||
while(!cin.eof()){
|
||||
cin.getline(tmpString,10000,'\n');
|
||||
if(++pt_lines%10000==0)
|
||||
{
|
||||
std::cerr << ".";
|
||||
if(pt_lines%500000==0) std::cerr << "[n:"<<pt_lines<<"]\n";
|
||||
}
|
||||
|
||||
if(strlen(tmpString)>0){
|
||||
PTEntry* pp = new PTEntry(tmpString, pfe_index);
|
||||
if (prev != pp->f_phrase) {
|
||||
prev = pp->f_phrase;
|
||||
|
||||
if (!options.empty()) { // always true after first line
|
||||
compute_cooc_stats_and_filter(options);
|
||||
}
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
|
||||
std::cout << **i << std::endl;
|
||||
delete *i;
|
||||
}
|
||||
options.clear();
|
||||
options.push_back(pp);
|
||||
|
||||
} else {
|
||||
options.push_back(pp);
|
||||
}
|
||||
// for(int i=0;i<locations.size(); i++){
|
||||
// cout<<"SentId="<<locations[i].sentIdInCorpus<<" Pos="<<(int)locations[i].posInSentInCorpus<<endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
compute_cooc_stats_and_filter(options);
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
|
||||
std::cout << **i << std::endl;
|
||||
delete *i;
|
||||
}
|
||||
float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
|
||||
float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
|
||||
std::cerr << "\n\n------------------------------------------------------\n"
|
||||
<< " unfiltered phrases pairs: " << pt_lines << "\n"
|
||||
<< "\n"
|
||||
<< " P(f|e) filter [first]: " << nremoved_pfefilter << " (" << pfefper << "%)\n"
|
||||
<< " significance filter: " << nremoved_sigfilter << " (" << sigfper << "%)\n"
|
||||
<< " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n"
|
||||
<< "\n"
|
||||
<< " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n"
|
||||
<< "------------------------------------------------------\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
//#include <unistd.h>
|
||||
#include <algorithm>
|
||||
|
||||
#include "_SuffixArraySearchApplicationBase.h"
|
||||
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
|
||||
#include "XGetopt.h"
|
||||
|
||||
typedef std::set<TextLenType> SentIdSet;
|
||||
typedef std::map<std::string, SentIdSet> PhraseSetMap;
|
||||
|
||||
#undef min
|
||||
|
||||
// constants
|
||||
const size_t MINIMUM_SIZE_TO_KEEP = 10000; // reduce this to improve memory usage,
|
||||
// increase for speed
|
||||
const std::string SEPARATOR = " ||| ";
|
||||
|
||||
const double ALPHA_PLUS_EPS = -1000.0; // dummy value
|
||||
const double ALPHA_MINUS_EPS = -2000.0; // dummy value
|
||||
|
||||
// configuration params
|
||||
int pfe_filter_limit = 0; // 0 = don't filter anything based on P(f|e)
|
||||
bool print_cooc_counts = false; // add cooc counts to phrase table?
|
||||
bool print_neglog_significance = false; // add -log(p) to phrase table?
|
||||
double sig_filter_limit = 0; // keep phrase pairs with -log(sig) > sig_filter_limit
|
||||
// higher = filter-more
|
||||
|
||||
// globals
|
||||
PhraseSetMap esets;
|
||||
double p_111 = 0.0; // alpha
|
||||
size_t nremoved_sigfilter = 0;
|
||||
size_t nremoved_pfefilter = 0;
|
||||
|
||||
C_SuffixArraySearchApplicationBase e_sa;
|
||||
C_SuffixArraySearchApplicationBase f_sa;
|
||||
int num_lines;
|
||||
|
||||
void usage()
|
||||
{
|
||||
std::cerr << "\nFilter phrase table using significance testing as described\n"
|
||||
<< "in H. Johnson, et al. (2007) Improving Translation Quality\n"
|
||||
<< "by Discarding Most of the Phrasetable. EMNLP 2007.\n"
|
||||
<< "\nUsage:\n"
|
||||
<< "\n filter-pt -e english.suf-arr -f french.suf-arr\n"
|
||||
<< " [-c] [-p] [-l threshold] [-n num] < PHRASE-TABLE > FILTERED-PHRASE-TABLE\n\n"
|
||||
<< " [-l threshold] >0.0, a+e, or a-e: keep values that have a -log significance > this\n"
|
||||
<< " [-n num ] 0, 1...: 0=no filtering, >0 sort by P(e|f) and keep the top num elements\n"
|
||||
<< " [-c ] add the cooccurence counts to the phrase table\n"
|
||||
<< " [-p ] add -log(significance) to the phrasetable\n\n";
|
||||
exit(1);
|
||||
}
|
||||
|
||||
struct PTEntry {
|
||||
PTEntry(const std::string& str, int index);
|
||||
std::string f_phrase;
|
||||
std::string e_phrase;
|
||||
std::string extra;
|
||||
std::string scores;
|
||||
float pfe;
|
||||
int cf;
|
||||
int ce;
|
||||
int cfe;
|
||||
float nlog_pte;
|
||||
void set_cooc_stats(int _cef, int _cf, int _ce, float nlp) {
|
||||
cfe = _cef;
|
||||
cf = _cf;
|
||||
ce = _ce;
|
||||
nlog_pte = nlp;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
PTEntry::PTEntry(const std::string& str, int index) :
|
||||
cf(0), ce(0), cfe(0), nlog_pte(0.0)
|
||||
{
|
||||
size_t pos = 0;
|
||||
std::string::size_type nextPos = str.find(SEPARATOR, pos);
|
||||
this->f_phrase = str.substr(pos,nextPos); pos = nextPos + SEPARATOR.size();
|
||||
nextPos = str.find(SEPARATOR, pos);
|
||||
this->e_phrase = str.substr(pos,nextPos-pos); pos = nextPos + SEPARATOR.size();
|
||||
nextPos = str.rfind(SEPARATOR);
|
||||
this->extra = str.substr(pos, nextPos-pos);
|
||||
this->scores = str.substr(nextPos + SEPARATOR.size(),std::string::npos);
|
||||
int c = 0;
|
||||
std::string::iterator i=scores.begin();
|
||||
if (index > 0) {
|
||||
for (; i != scores.end(); ++i) {
|
||||
if ((*i) == ' ') {
|
||||
c++;
|
||||
if (c == index) break;
|
||||
}
|
||||
}
|
||||
}
|
||||
++i;
|
||||
char f[24];
|
||||
char *fp=f;
|
||||
while (i != scores.end() && *i != ' ') {
|
||||
*fp++=*i++;
|
||||
}
|
||||
*fp++=0;
|
||||
|
||||
this->pfe = atof(f);
|
||||
|
||||
// std::cerr << "L: " << f_phrase << " ::: " << e_phrase << " ::: " << scores << " ::: " << pfe << std::endl;
|
||||
// std::cerr << "X: " << extra << "\n";
|
||||
}
|
||||
|
||||
struct PfeComparer {
|
||||
bool operator()(const PTEntry* a, const PTEntry* b) const { return a->pfe > b->pfe; }
|
||||
};
|
||||
|
||||
struct NlogSigThresholder {
|
||||
NlogSigThresholder(float threshold) : t(threshold) {}
|
||||
float t;
|
||||
bool operator()(const PTEntry* a) const { if (a->nlog_pte < t) { delete a; return true; } else return false; }
|
||||
};
|
||||
|
||||
std::ostream& operator << (std::ostream& os, const PTEntry& pp)
|
||||
{
|
||||
os << pp.f_phrase << " ||| " << pp.e_phrase;
|
||||
if (pp.extra.size()>0) os << " ||| " << pp.extra;
|
||||
os << " ||| " << pp.scores;
|
||||
if (print_cooc_counts) os << " ||| " << pp.cfe << " " << pp.cf << " " << pp.ce;
|
||||
if (print_neglog_significance) os << " ||| " << pp.nlog_pte;
|
||||
return os;
|
||||
}
|
||||
|
||||
// for an overview, see
|
||||
// W. Press, S. Teukolsky and W. Vetterling. (1992) Numerical Recipes in C. Chapter 6.1.
|
||||
double log_gamma(int x)
|
||||
{
|
||||
// size_t xx=(size_t)x; xx--; size_t sum=1; while (xx) { sum *= xx--; } return log((double)(sum));
|
||||
if (x <= 2) { return 0.0; }
|
||||
static double coefs[6] = {76.18009172947146, -86.50532032941677, 24.01409824083091, -1.231739572450155, 0.1208650973866179e-2, -0.5395239384953e-5};
|
||||
double tmp=(double)x+5.5;
|
||||
tmp -= (((double)x)+0.5)*log(tmp);
|
||||
double y=(double)x;
|
||||
double sum = 1.000000000190015;
|
||||
for (size_t j=0;j<6;++j) { sum += coefs[j]/++y; }
|
||||
return -tmp+log(2.5066282746310005*sum/(double)x);
|
||||
}
|
||||
|
||||
void print(int a, int b, int c, int d, float p) {
|
||||
std::cerr << a << "\t" << b << "\t P=" << p << "\n"
|
||||
<< c << "\t" << d << "\t xf=" << (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1) << "\n\n";
|
||||
}
|
||||
|
||||
// 2x2 (one-sided) Fisher's exact test
|
||||
// see B. Moore. (2004) On Log Likelihood and the Significance of Rare Events
|
||||
double fisher_exact(int cfe, int ce, int cf)
|
||||
{
|
||||
assert(cfe <= ce);
|
||||
assert(cfe <= cf);
|
||||
|
||||
int a = cfe;
|
||||
int b = (cf - cfe);
|
||||
int c = (ce - cfe);
|
||||
int d = (num_lines - ce - cf + cfe);
|
||||
int n = a + b + c + d;
|
||||
|
||||
double cp = exp(log_gamma(1+a+c) + log_gamma(1+b+d) + log_gamma(1+a+b) + log_gamma(1+c+d) - log_gamma(1+n) - log_gamma(1+a) - log_gamma(1+b) - log_gamma(1+c) - log_gamma(1+d));
|
||||
double total_p = 0.0;
|
||||
int tc = std::min(b,c);
|
||||
for (int i=0; i<=tc; i++) {
|
||||
total_p += cp;
|
||||
// double lg = log_gamma(1+a+c) + log_gamma(1+b+d) + log_gamma(1+a+b) + log_gamma(1+c+d) - log_gamma(1+n) - log_gamma(1+a) - log_gamma(1+b) - log_gamma(1+c) - log_gamma(1+d); double cp = exp(lg);
|
||||
// print(a,b,c,d,cp);
|
||||
double coef = (double)(b)*(double)(c)/(double)(a+1)/(double)(d+1);
|
||||
cp *= coef;
|
||||
++a; --c; ++d; --b;
|
||||
}
|
||||
return total_p;
|
||||
}
|
||||
|
||||
// input: unordered list of translation options for a single source phrase
|
||||
void compute_cooc_stats_and_filter(std::vector<PTEntry*>& options)
|
||||
{
|
||||
if (pfe_filter_limit>0 && options.size() > pfe_filter_limit) {
|
||||
nremoved_pfefilter += (options.size() - pfe_filter_limit);
|
||||
std::nth_element(options.begin(), options.begin()+pfe_filter_limit, options.end(), PfeComparer());
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin()+pfe_filter_limit; i != options.end(); ++i)
|
||||
delete *i;
|
||||
options.erase(options.begin()+pfe_filter_limit,options.end());
|
||||
}
|
||||
|
||||
SentIdSet fset;
|
||||
vector<S_SimplePhraseLocationElement> locations;
|
||||
//std::cerr << "Looking up f-phrase: " << options.front()->f_phrase << "\n";
|
||||
|
||||
locations = f_sa.locateExactPhraseInCorpus(options.front()->f_phrase.c_str());
|
||||
if(locations.size()==0){
|
||||
cerr<<"No occurrences found!!\n";
|
||||
}
|
||||
for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin();
|
||||
i != locations.end();
|
||||
++i)
|
||||
{
|
||||
fset.insert(i->sentIdInCorpus);
|
||||
}
|
||||
size_t cf = fset.size();
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
|
||||
const std::string& e_phrase = (*i)->e_phrase;
|
||||
size_t cef=0;
|
||||
SentIdSet& eset = esets[(*i)->e_phrase];
|
||||
if (eset.empty()) {
|
||||
//std::cerr << "Looking up e-phrase: " << e_phrase << "\n";
|
||||
vector<S_SimplePhraseLocationElement> locations = e_sa.locateExactPhraseInCorpus(e_phrase.c_str());
|
||||
for (vector<S_SimplePhraseLocationElement>::iterator i=locations.begin(); i!= locations.end(); ++i) {
|
||||
TextLenType curSentId = i->sentIdInCorpus;
|
||||
eset.insert(curSentId);
|
||||
}
|
||||
}
|
||||
size_t ce=eset.size();
|
||||
if (ce < cf) {
|
||||
for (SentIdSet::iterator i=eset.begin(); i != eset.end(); ++i) {
|
||||
if (fset.find(*i) != fset.end()) cef++;
|
||||
}
|
||||
} else {
|
||||
for (SentIdSet::iterator i=fset.begin(); i != fset.end(); ++i) {
|
||||
if (eset.find(*i) != eset.end()) cef++;
|
||||
}
|
||||
}
|
||||
double nlp = -log(fisher_exact(cef, cf, ce));
|
||||
(*i)->set_cooc_stats(cef, cf, ce, nlp);
|
||||
if (ce < MINIMUM_SIZE_TO_KEEP) {
|
||||
esets.erase(e_phrase);
|
||||
}
|
||||
}
|
||||
std::vector<PTEntry*>::iterator new_end =
|
||||
std::remove_if(options.begin(), options.end(), NlogSigThresholder(sig_filter_limit));
|
||||
nremoved_sigfilter += (options.end() - new_end);
|
||||
options.erase(new_end,options.end());
|
||||
}
|
||||
|
||||
int main(int argc, char * argv[]){
|
||||
int c;
|
||||
const char* efile=0;
|
||||
const char* ffile=0;
|
||||
int pfe_index = 2;
|
||||
while ((c = getopt(argc, argv, "cpf:e:i:n:l:")) != -1) {
|
||||
switch (c) {
|
||||
case 'e':
|
||||
efile = optarg;
|
||||
break;
|
||||
case 'f':
|
||||
ffile = optarg;
|
||||
break;
|
||||
case 'i': // index of pfe in phrase table
|
||||
pfe_index = atoi(optarg);
|
||||
break;
|
||||
case 'n': // keep only the top n entries in phrase table sorted by p(f|e) (0=all)
|
||||
pfe_filter_limit = atoi(optarg);
|
||||
std::cerr << "P(f|e) filter limit: " << pfe_filter_limit << std::endl;
|
||||
break;
|
||||
case 'c':
|
||||
print_cooc_counts = true;
|
||||
break;
|
||||
case 'p':
|
||||
print_neglog_significance = true;
|
||||
break;
|
||||
case 'l':
|
||||
std::cerr << "-l = " << optarg << "\n";
|
||||
if (strcmp(optarg,"a+e") == 0) {
|
||||
sig_filter_limit = ALPHA_PLUS_EPS;
|
||||
} else if (strcmp(optarg,"a-e") == 0) {
|
||||
sig_filter_limit = ALPHA_MINUS_EPS;
|
||||
} else {
|
||||
char *x;
|
||||
sig_filter_limit = strtod(optarg, &x);
|
||||
if (sig_filter_limit < 0.0) {
|
||||
std::cerr << "Filter limit (-l) must be either 'a+e', 'a-e' or a real number >= 0.0\n";
|
||||
usage();
|
||||
}
|
||||
}
|
||||
break;
|
||||
default:
|
||||
usage();
|
||||
}
|
||||
}
|
||||
//-----------------------------------------------------------------------------
|
||||
if (optind != argc || !efile || !ffile) {
|
||||
usage();
|
||||
}
|
||||
|
||||
//load the indexed corpus with vocabulary(noVoc=false) and with offset(noOffset=false)
|
||||
e_sa.loadData_forSearch(efile, false, false);
|
||||
f_sa.loadData_forSearch(ffile, false, false);
|
||||
size_t elines = e_sa.returnTotalSentNumber();
|
||||
size_t flines = f_sa.returnTotalSentNumber();
|
||||
if (elines != flines) {
|
||||
std::cerr << "Number of lines in e-corpus != number of lines in f-corpus!\n";
|
||||
usage();
|
||||
} else {
|
||||
std::cerr << "Training corpus: " << elines << " lines\n";
|
||||
num_lines = elines;
|
||||
}
|
||||
p_111 = -log(fisher_exact(1,1,1));
|
||||
std::cerr << "\\alpha = " << p_111 << "\n";
|
||||
if (sig_filter_limit == ALPHA_MINUS_EPS) { sig_filter_limit = p_111 - 0.001; }
|
||||
else if (sig_filter_limit == ALPHA_PLUS_EPS) { sig_filter_limit = p_111 + 0.001; }
|
||||
std::cerr << "Sig filter threshold is = " << sig_filter_limit << "\n";
|
||||
|
||||
char tmpString[10000];
|
||||
std::string prev = "";
|
||||
std::vector<PTEntry*> options;
|
||||
size_t pt_lines = 0;
|
||||
while(!cin.eof()){
|
||||
cin.getline(tmpString,10000,'\n');
|
||||
if(++pt_lines%10000==0)
|
||||
{
|
||||
std::cerr << ".";
|
||||
if(pt_lines%500000==0) std::cerr << "[n:"<<pt_lines<<"]\n";
|
||||
}
|
||||
|
||||
if(strlen(tmpString)>0){
|
||||
PTEntry* pp = new PTEntry(tmpString, pfe_index);
|
||||
if (prev != pp->f_phrase) {
|
||||
prev = pp->f_phrase;
|
||||
|
||||
if (!options.empty()) { // always true after first line
|
||||
compute_cooc_stats_and_filter(options);
|
||||
}
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
|
||||
std::cout << **i << std::endl;
|
||||
delete *i;
|
||||
}
|
||||
options.clear();
|
||||
options.push_back(pp);
|
||||
|
||||
} else {
|
||||
options.push_back(pp);
|
||||
}
|
||||
// for(int i=0;i<locations.size(); i++){
|
||||
// cout<<"SentId="<<locations[i].sentIdInCorpus<<" Pos="<<(int)locations[i].posInSentInCorpus<<endl;
|
||||
// }
|
||||
}
|
||||
}
|
||||
compute_cooc_stats_and_filter(options);
|
||||
for (std::vector<PTEntry*>::iterator i=options.begin(); i != options.end(); ++i) {
|
||||
std::cout << **i << std::endl;
|
||||
delete *i;
|
||||
}
|
||||
float pfefper = (100.0*(float)nremoved_pfefilter)/(float)pt_lines;
|
||||
float sigfper = (100.0*(float)nremoved_sigfilter)/(float)pt_lines;
|
||||
std::cerr << "\n\n------------------------------------------------------\n"
|
||||
<< " unfiltered phrases pairs: " << pt_lines << "\n"
|
||||
<< "\n"
|
||||
<< " P(f|e) filter [first]: " << nremoved_pfefilter << " (" << pfefper << "%)\n"
|
||||
<< " significance filter: " << nremoved_sigfilter << " (" << sigfper << "%)\n"
|
||||
<< " TOTAL FILTERED: " << (nremoved_pfefilter + nremoved_sigfilter) << " (" << (sigfper + pfefper) << "%)\n"
|
||||
<< "\n"
|
||||
<< " FILTERED phrase pairs: " << (pt_lines - nremoved_pfefilter - nremoved_sigfilter) << " (" << (100.0-sigfper - pfefper) << "%)\n"
|
||||
<< "------------------------------------------------------\n";
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
20
sigtest-filter/sigtest-filter.sln
Normal file
20
sigtest-filter/sigtest-filter.sln
Normal file
@ -0,0 +1,20 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 9.00
|
||||
# Visual Studio 2005
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "sigtest-filter", "sigtest-filter.vcproj", "{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Release|Win32 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}.Release|Win32.Build.0 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
237
sigtest-filter/sigtest-filter.vcproj
Normal file
237
sigtest-filter/sigtest-filter.vcproj
Normal file
@ -0,0 +1,237 @@
|
||||
<?xml version="1.0" encoding="Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="8.00"
|
||||
Name="sigtest-filter"
|
||||
ProjectGUID="{FA2910DF-FD9D-4E6D-A393-9F9F9E309E78}"
|
||||
RootNamespace="sigtestfilter"
|
||||
Keyword="Win32Proj"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="2"
|
||||
CharacterSet="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories="..\..\..\SALM\Src\SuffixArrayApplications\SuffixArraySearch;..\..\..\SALM\Src\SuffixArrayApplications;..\..\..\SALM\Src\Shared"
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
LinkIncremental="2"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(SolutionDir)$(ConfigurationName)"
|
||||
IntermediateDirectory="$(ConfigurationName)"
|
||||
ConfigurationType="1"
|
||||
UseOfMFC="2"
|
||||
CharacterSet="1"
|
||||
WholeProgramOptimization="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
AdditionalIncludeDirectories="..\..\..\SALM\Src\SuffixArrayApplications\SuffixArraySearch;..\..\..\SALM\Src\SuffixArrayApplications;..\..\..\SALM\Src\Shared"
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
|
||||
RuntimeLibrary="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
Detect64BitPortabilityProblems="true"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLinkerTool"
|
||||
LinkIncremental="1"
|
||||
GenerateDebugInformation="true"
|
||||
SubSystem="1"
|
||||
OptimizeReferences="2"
|
||||
EnableCOMDATFolding="2"
|
||||
TargetMachine="1"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManifestTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCAppVerifierTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebDeploymentTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<Filter
|
||||
Name="Source Files"
|
||||
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
|
||||
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\SALM\Src\Shared\_IDVocabulary.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\SALM\Src\Shared\_String.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\SALM\Src\SuffixArrayApplications\_SuffixArrayApplicationBase.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\SALM\Src\SuffixArrayApplications\SuffixArraySearch\_SuffixArraySearchApplicationBase.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\filter-pt.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\XGetopt.cpp"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Header Files"
|
||||
Filter="h;hpp;hxx;hm;inl;inc;xsd"
|
||||
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
|
||||
>
|
||||
<File
|
||||
RelativePath="..\..\..\SALM\Src\Shared\_IDVocabulary.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\SALM\Src\SuffixArrayApplications\_SuffixArrayApplicationBase.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\SALM\Src\SuffixArrayApplications\SuffixArraySearch\_SuffixArraySearchApplicationBase.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\XGetopt.h"
|
||||
>
|
||||
</File>
|
||||
</Filter>
|
||||
<Filter
|
||||
Name="Resource Files"
|
||||
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
|
||||
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
|
||||
>
|
||||
</Filter>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
Loading…
Reference in New Issue
Block a user