Add util for querying on-disk pt

This commit is contained in:
Hieu Hoang 2012-05-15 16:46:16 +01:00
parent 9467cc82e6
commit f408401ff7
3 changed files with 11 additions and 20 deletions

View File

@ -102,7 +102,8 @@ void Phrase::DebugPrint(ostream &out, const Vocab &vocab) const
{
for (size_t pos = 0; pos < GetSize(); ++pos) {
const Word &word = GetWord(pos);
out << word << " ";
word.DebugPrint(out, vocab);
out << " ";
}
}

View File

@ -147,9 +147,6 @@ bool Word::operator==(const Word &compare) const
void Word::DebugPrint(ostream &out, const Vocab &vocab) const
{
if (m_isNonTerminal)
out << "[";
std::vector<UINT64>::const_iterator iter;
for (size_t ind = 0; ind < m_factors.size() - 1; ++ind) {
UINT64 vocabId = *iter;
@ -161,9 +158,6 @@ void Word::DebugPrint(ostream &out, const Vocab &vocab) const
UINT64 vocabId = m_factors.back();
const string &str = vocab.GetString(vocabId);
out << str;
if (m_isNonTerminal)
out << "]";
}
std::ostream& operator<<(std::ostream &out, const Word &word)

View File

@ -17,8 +17,6 @@ void usage();
typedef unsigned int uint;
#define TABLE_LIMIT 20
void Tokenize(OnDiskPt::Phrase &phrase
, const std::string &token, bool addSourceNonTerm, bool addTargetNonTerm
, OnDiskPt::OnDiskWrapper &onDiskWrapper)
@ -70,21 +68,20 @@ void Tokenize(OnDiskPt::Phrase &phrase
int main(int argc, char **argv)
{
int nscores = 5;
int tableLimit = 20;
std::string ttable = "";
bool useAlignments = false;
for(int i = 1; i < argc; i++) {
if(!strcmp(argv[i], "-n")) {
if(!strcmp(argv[i], "-tlimit")) {
if(i + 1 == argc)
usage();
nscores = atoi(argv[++i]);
tableLimit = atoi(argv[++i]);
} else if(!strcmp(argv[i], "-t")) {
if(i + 1 == argc)
usage();
ttable = argv[++i];
} else if(!strcmp(argv[i], "-a"))
useAlignments = true;
}
else
usage();
}
@ -141,14 +138,14 @@ int main(int argc, char **argv)
if (node)
{ // source phrase points to a bunch of rules
const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(TABLE_LIMIT, onDiskWrapper);
const TargetPhraseCollection *coll = node->GetTargetPhraseCollection(tableLimit, onDiskWrapper);
string str = coll->GetDebugStr();
cout << "Found" << coll->GetSize() << endl;
cout << "Found " << coll->GetSize() << endl;
for (size_t ind = 0; ind < coll->GetSize(); ++ind)
{
const TargetPhrase &targetPhrase = coll->GetTargetPhrase(ind);
cerr << " ** ";
cerr << " ";
targetPhrase.DebugPrint(cerr, onDiskWrapper.GetVocab());
cerr << endl;
@ -170,9 +167,8 @@ int main(int argc, char **argv)
void usage()
{
std::cerr << "Usage: queryPhraseTable [-n <nscores>] [-a] -t <ttable>\n"
"-n <nscores> number of scores in phrase table (default: 5)\n"
"-a binary phrase table contains alignments\n"
std::cerr << "Usage: queryOnDiskPt [-n <nscores>] [-a] -t <ttable>\n"
"-tlimit <table limit> max number of rules per source phrase (default: 20)\n"
"-t <ttable> phrase table\n";
exit(1);
}