mosesdecoder/biconcor/phrase-lookup.cpp

135 lines
3.5 KiB
C++
Raw Normal View History

#include "SuffixArray.h"
#include "../util/tokenize.hh"
#include <getopt.h>
using namespace std;
size_t lookup( string );
vector<string> tokenize( const char input[] );
SuffixArray suffixArray;
2015-01-14 14:07:42 +03:00
int main(int argc, char* argv[])
{
// handle parameters
string query;
string fileNameSuffix;
string fileNameSource;
bool loadFlag = false;
bool saveFlag = false;
bool createFlag = false;
bool queryFlag = false;
bool querySentenceFlag = false;
2015-07-21 02:00:42 +03:00
int stdioFlag = false; // receive requests from STDIN, respond to STDOUT
string info = "usage: biconcor\n\t[--load model-file]\n\t[--save model-file]\n\t[--create corpus]\n\t[--query string]\n\t[--stdio]\n";
while(1) {
static struct option long_options[] = {
{"load", required_argument, 0, 'l'},
{"save", required_argument, 0, 's'},
{"create", required_argument, 0, 'c'},
{"query", required_argument, 0, 'q'},
{"query-sentence", required_argument, 0, 'Q'},
{"document", required_argument, 0, 'd'},
{"stdio", no_argument, 0, 'i'},
{"stdio-sentence", no_argument, 0, 'I'},
{0, 0, 0, 0}
};
int option_index = 0;
int c = getopt_long (argc, argv, "l:s:c:q:Q:iId", long_options, &option_index);
if (c == -1) break;
switch (c) {
case 'l':
fileNameSuffix = string(optarg);
loadFlag = true;
break;
case 's':
fileNameSuffix = string(optarg);
saveFlag = true;
break;
case 'c':
fileNameSource = string(optarg);
createFlag = true;
break;
case 'q':
query = string(optarg);
queryFlag = true;
break;
case 'Q':
query = string(optarg);
querySentenceFlag = true;
break;
case 'i':
stdioFlag = true;
break;
case 'I':
stdioFlag = true;
querySentenceFlag = true;
break;
case 'd':
suffixArray.UseDocument();
break;
default:
cerr << info;
exit(1);
}
}
// check if parameter settings are legal
if (saveFlag && !createFlag) {
cerr << "error: cannot save without creating\n" << info;
exit(1);
}
if (saveFlag && loadFlag) {
cerr << "error: cannot load and save at the same time\n" << info;
exit(1);
}
if (!loadFlag && !createFlag) {
cerr << "error: neither load or create - i have no info!\n" << info;
exit(1);
}
// get suffix array
if (createFlag) {
cerr << "will create\n";
cerr << "corpus is in " << fileNameSource << endl;
suffixArray.Create( fileNameSource );
if (saveFlag) {
suffixArray.Save( fileNameSuffix );
cerr << "will save in " << fileNameSuffix << endl;
}
}
if (loadFlag) {
cerr << "will load from " << fileNameSuffix << endl;
suffixArray.Load( fileNameSuffix );
}
// do something with it
if (stdioFlag) {
while(true) {
string query;
if (getline(cin, query, '\n').eof()) {
return 0;
}
if (querySentenceFlag) {
vector< string > queryString = util::tokenize( query.c_str() );
suffixArray.PrintSentenceMatches( queryString );
2015-07-21 02:00:42 +03:00
} else {
cout << lookup( query ) << endl;
}
}
2015-07-21 02:00:42 +03:00
} else if (queryFlag) {
cout << lookup( query ) << endl;
2015-07-21 02:00:42 +03:00
} else if (querySentenceFlag) {
vector< string > queryString = util::tokenize( query.c_str() );
suffixArray.PrintSentenceMatches( queryString );
}
return 0;
}
2015-01-14 14:07:42 +03:00
size_t lookup( string query )
{
cerr << "query is " << query << endl;
vector< string > queryString = util::tokenize( query.c_str() );
return suffixArray.Count( queryString );
}