Added tool to query compact phrase table

This commit is contained in:
Marcin Junczys-Dowmunt 2012-09-13 14:09:32 +00:00
parent 21938e4d94
commit 200d1bdc34
5 changed files with 125 additions and 12 deletions

View File

@ -10,8 +10,9 @@ local with-cmph = [ option.get "with-cmph" ] ;
if $(with-cmph) {
exe processPhraseTableMin : processPhraseTableMin.cpp ../moses/src//moses ;
exe processLexicalTableMin : processLexicalTableMin.cpp ../moses/src//moses ;
exe queryPhraseTableMin : queryPhraseTableMin.cpp ../moses/src//moses ;
alias programsMin : processPhraseTableMin processLexicalTableMin ;
alias programsMin : processPhraseTableMin processLexicalTableMin queryPhraseTableMin ;
}
else {
alias programsMin ;

View File

@ -0,0 +1,97 @@
// Query binary phrase tables.
// Marcin Junczys-Dowmunt, 13 September 2012
#include <cstdlib>
#include <cstring>
#include <string>
#include <vector>
#include "CompactPT/PhraseDictionaryCompact.h"
#include "Util.h"
void usage();
typedef unsigned int uint;
using namespace Moses;
int main(int argc, char **argv)
{
int nscores = 5;
std::string ttable = "";
bool useAlignments = false;
bool reportCounts = false;
for(int i = 1; i < argc; i++) {
if(!strcmp(argv[i], "-n")) {
if(i + 1 == argc)
usage();
nscores = atoi(argv[++i]);
} else if(!strcmp(argv[i], "-t")) {
if(i + 1 == argc)
usage();
ttable = argv[++i];
} else if(!strcmp(argv[i], "-a")) {
useAlignments = true;
} else if (!strcmp(argv[i], "-c")) {
reportCounts = true;
}
else
usage();
}
if(ttable == "")
usage();
std::vector<FactorType> input(1, 0);
std::vector<FactorType> output(1, 0);
std::vector<float> weight(nscores, 0);
LMList lmList;
PhraseDictionaryFeature pdf(Compact, nscores, nscores, input, output, ttable, weight, 0, "", "");
PhraseDictionaryCompact pdc(nscores, Compact, &pdf, false, useAlignments);
bool ret = pdc.Load(input, output, ttable, weight, 0, lmList, 0);
assert(ret);
std::string line;
while(getline(std::cin, line)) {
Phrase sourcePhrase(0);
sourcePhrase.CreateFromString(input, line, "||dummy_string||");
TargetPhraseVectorPtr decodedPhraseColl
= pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
if(decodedPhraseColl != NULL) {
if(reportCounts)
std::cout << sourcePhrase << decodedPhraseColl->size() << std::endl;
else
for(TargetPhraseVector::iterator it = decodedPhraseColl->begin(); it != decodedPhraseColl->end(); it++) {
TargetPhrase &tp = *it;
std::cout << sourcePhrase << "||| ";
std::cout << static_cast<const Phrase&>(tp) << "|||";
if(useAlignments)
std::cout << " " << tp.GetAlignmentInfo() << "|||";
for(size_t i = 0; i < tp.GetScoreBreakdown().size(); i++)
std::cout << " " << exp(tp.GetScoreBreakdown()[i]);
std::cout << std::endl;
}
}
else if(reportCounts)
std::cout << sourcePhrase << 0 << std::endl;
std::cout.flush();
}
}
void usage()
{
std::cerr << "Usage: queryPhraseTable [-n <nscores>] [-a] -t <ttable>\n"
"-n <nscores> number of scores in phrase table (default: 5)\n"
"-c only report counts of entries\n"
"-a binary phrase table contains alignments\n"
"-t <ttable> phrase table\n";
exit(1);
}

View File

@ -327,7 +327,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
return TargetPhraseVectorPtr();
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(StaticData::Instance().UseAlignmentInfo())
if(m_phraseDictionary.m_useAlignmentInfo)
{
size_t trgPos = targetPhrase->GetSize();
alignment.insert(AlignPoint(srcPos, trgPos));
@ -342,7 +342,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
return TargetPhraseVectorPtr();
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
if(StaticData::Instance().UseAlignmentInfo())
if(m_phraseDictionary.m_useAlignmentInfo)
{
size_t trgPos = srcPos;
alignment.insert(AlignPoint(srcPos, trgPos));
@ -398,7 +398,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
{
// insert the subphrase into the main target phrase
TargetPhrase& subTp = subTpv->at(rank);
if(StaticData::Instance().UseAlignmentInfo())
if(m_phraseDictionary.m_useAlignmentInfo)
{
// reconstruct the alignment data based on the alignment of the subphrase
for(AlignmentInfo::const_iterator it = subTp.GetAlignmentInfo().begin();
@ -448,14 +448,14 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
}
else
{
if(StaticData::Instance().UseAlignmentInfo())
if(m_phraseDictionary.m_useAlignmentInfo)
alignment.insert(AlignPointSizeT(alignPoint));
}
}
if(state == Add)
{
if(StaticData::Instance().UseAlignmentInfo())
if(m_phraseDictionary.m_useAlignmentInfo)
targetPhrase->SetAlignmentInfo(alignment);
if(m_coding == PREnc)

View File

@ -85,7 +85,6 @@ bool PhraseDictionaryCompact::Load(const std::vector<FactorType> &input
// Keep source phrase index on disk
indexSize = m_hash.LoadIndex(pFile);
size_t coderSize = m_phraseDecoder->Load(pFile);
size_t phraseSize;
@ -136,7 +135,18 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
}
else
return NULL;
}
TargetPhraseVectorPtr
PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const {
// There is no souch source phrase if source phrase is longer than longest
// observed source phrase during compilation
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
return TargetPhraseVectorPtr();
// Retrieve target phrase collection from phrase table
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
}
PhraseDictionaryCompact::~PhraseDictionaryCompact() {

View File

@ -50,6 +50,7 @@ protected:
PhraseTableImplementation m_implementation;
bool m_inMemory;
bool m_useAlignmentInfo;
typedef std::vector<TargetPhraseCollection*> PhraseCache;
#ifdef WITH_THREADS
@ -75,11 +76,14 @@ protected:
public:
PhraseDictionaryCompact(size_t numScoreComponent,
PhraseTableImplementation implementation,
PhraseDictionaryFeature* feature)
PhraseTableImplementation implementation,
PhraseDictionaryFeature* feature,
bool inMemory = StaticData::Instance().UseMinphrInMemory(),
bool useAlignmentInfo = StaticData::Instance().UseAlignmentInfo())
: PhraseDictionary(numScoreComponent, feature),
m_implementation(implementation),
m_inMemory(StaticData::Instance().UseMinphrInMemory()),
m_inMemory(inMemory),
m_useAlignmentInfo(useAlignmentInfo),
m_hash(10, 16),
m_phraseDecoder(0)
{}
@ -95,7 +99,8 @@ public:
, float weightWP);
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &source) const;
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
void InitializeForInput(const Moses::InputType&);