mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-27 05:55:02 +03:00
Added tool to query compact phrase table
This commit is contained in:
parent
21938e4d94
commit
200d1bdc34
@ -10,8 +10,9 @@ local with-cmph = [ option.get "with-cmph" ] ;
|
||||
if $(with-cmph) {
|
||||
exe processPhraseTableMin : processPhraseTableMin.cpp ../moses/src//moses ;
|
||||
exe processLexicalTableMin : processLexicalTableMin.cpp ../moses/src//moses ;
|
||||
exe queryPhraseTableMin : queryPhraseTableMin.cpp ../moses/src//moses ;
|
||||
|
||||
alias programsMin : processPhraseTableMin processLexicalTableMin ;
|
||||
alias programsMin : processPhraseTableMin processLexicalTableMin queryPhraseTableMin ;
|
||||
}
|
||||
else {
|
||||
alias programsMin ;
|
||||
|
97
misc/queryPhraseTableMin.cpp
Normal file
97
misc/queryPhraseTableMin.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
// Query binary phrase tables.
|
||||
// Marcin Junczys-Dowmunt, 13 September 2012
|
||||
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "CompactPT/PhraseDictionaryCompact.h"
|
||||
#include "Util.h"
|
||||
|
||||
void usage();
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
using namespace Moses;
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
int nscores = 5;
|
||||
std::string ttable = "";
|
||||
bool useAlignments = false;
|
||||
bool reportCounts = false;
|
||||
|
||||
for(int i = 1; i < argc; i++) {
|
||||
if(!strcmp(argv[i], "-n")) {
|
||||
if(i + 1 == argc)
|
||||
usage();
|
||||
nscores = atoi(argv[++i]);
|
||||
} else if(!strcmp(argv[i], "-t")) {
|
||||
if(i + 1 == argc)
|
||||
usage();
|
||||
ttable = argv[++i];
|
||||
} else if(!strcmp(argv[i], "-a")) {
|
||||
useAlignments = true;
|
||||
} else if (!strcmp(argv[i], "-c")) {
|
||||
reportCounts = true;
|
||||
}
|
||||
else
|
||||
usage();
|
||||
}
|
||||
|
||||
if(ttable == "")
|
||||
usage();
|
||||
|
||||
std::vector<FactorType> input(1, 0);
|
||||
std::vector<FactorType> output(1, 0);
|
||||
std::vector<float> weight(nscores, 0);
|
||||
|
||||
LMList lmList;
|
||||
|
||||
PhraseDictionaryFeature pdf(Compact, nscores, nscores, input, output, ttable, weight, 0, "", "");
|
||||
PhraseDictionaryCompact pdc(nscores, Compact, &pdf, false, useAlignments);
|
||||
bool ret = pdc.Load(input, output, ttable, weight, 0, lmList, 0);
|
||||
assert(ret);
|
||||
|
||||
std::string line;
|
||||
while(getline(std::cin, line)) {
|
||||
Phrase sourcePhrase(0);
|
||||
sourcePhrase.CreateFromString(input, line, "||dummy_string||");
|
||||
|
||||
TargetPhraseVectorPtr decodedPhraseColl
|
||||
= pdc.GetTargetPhraseCollectionRaw(sourcePhrase);
|
||||
|
||||
if(decodedPhraseColl != NULL) {
|
||||
if(reportCounts)
|
||||
std::cout << sourcePhrase << decodedPhraseColl->size() << std::endl;
|
||||
else
|
||||
for(TargetPhraseVector::iterator it = decodedPhraseColl->begin(); it != decodedPhraseColl->end(); it++) {
|
||||
TargetPhrase &tp = *it;
|
||||
std::cout << sourcePhrase << "||| ";
|
||||
std::cout << static_cast<const Phrase&>(tp) << "|||";
|
||||
|
||||
if(useAlignments)
|
||||
std::cout << " " << tp.GetAlignmentInfo() << "|||";
|
||||
|
||||
for(size_t i = 0; i < tp.GetScoreBreakdown().size(); i++)
|
||||
std::cout << " " << exp(tp.GetScoreBreakdown()[i]);
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
else if(reportCounts)
|
||||
std::cout << sourcePhrase << 0 << std::endl;
|
||||
|
||||
std::cout.flush();
|
||||
}
|
||||
}
|
||||
|
||||
void usage()
|
||||
{
|
||||
std::cerr << "Usage: queryPhraseTable [-n <nscores>] [-a] -t <ttable>\n"
|
||||
"-n <nscores> number of scores in phrase table (default: 5)\n"
|
||||
"-c only report counts of entries\n"
|
||||
"-a binary phrase table contains alignments\n"
|
||||
"-t <ttable> phrase table\n";
|
||||
exit(1);
|
||||
}
|
@ -327,7 +327,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
|
||||
if(StaticData::Instance().UseAlignmentInfo())
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
{
|
||||
size_t trgPos = targetPhrase->GetSize();
|
||||
alignment.insert(AlignPoint(srcPos, trgPos));
|
||||
@ -342,7 +342,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
wordString = GetTargetSymbol(GetTranslation(sourceWords[srcPos], rank));
|
||||
if(StaticData::Instance().UseAlignmentInfo())
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
{
|
||||
size_t trgPos = srcPos;
|
||||
alignment.insert(AlignPoint(srcPos, trgPos));
|
||||
@ -398,7 +398,7 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
{
|
||||
// insert the subphrase into the main target phrase
|
||||
TargetPhrase& subTp = subTpv->at(rank);
|
||||
if(StaticData::Instance().UseAlignmentInfo())
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
{
|
||||
// reconstruct the alignment data based on the alignment of the subphrase
|
||||
for(AlignmentInfo::const_iterator it = subTp.GetAlignmentInfo().begin();
|
||||
@ -448,14 +448,14 @@ TargetPhraseVectorPtr PhraseDecoder::DecodeCollection(
|
||||
}
|
||||
else
|
||||
{
|
||||
if(StaticData::Instance().UseAlignmentInfo())
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
alignment.insert(AlignPointSizeT(alignPoint));
|
||||
}
|
||||
}
|
||||
|
||||
if(state == Add)
|
||||
{
|
||||
if(StaticData::Instance().UseAlignmentInfo())
|
||||
if(m_phraseDictionary.m_useAlignmentInfo)
|
||||
targetPhrase->SetAlignmentInfo(alignment);
|
||||
|
||||
if(m_coding == PREnc)
|
||||
|
@ -85,7 +85,6 @@ bool PhraseDictionaryCompact::Load(const std::vector<FactorType> &input
|
||||
// Keep source phrase index on disk
|
||||
indexSize = m_hash.LoadIndex(pFile);
|
||||
|
||||
|
||||
size_t coderSize = m_phraseDecoder->Load(pFile);
|
||||
|
||||
size_t phraseSize;
|
||||
@ -136,7 +135,18 @@ PhraseDictionaryCompact::GetTargetPhraseCollection(const Phrase &sourcePhrase) c
|
||||
}
|
||||
else
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
TargetPhraseVectorPtr
|
||||
PhraseDictionaryCompact::GetTargetPhraseCollectionRaw(const Phrase &sourcePhrase) const {
|
||||
|
||||
// There is no souch source phrase if source phrase is longer than longest
|
||||
// observed source phrase during compilation
|
||||
if(sourcePhrase.GetSize() > m_phraseDecoder->GetMaxSourcePhraseLength())
|
||||
return TargetPhraseVectorPtr();
|
||||
|
||||
// Retrieve target phrase collection from phrase table
|
||||
return m_phraseDecoder->CreateTargetPhraseCollection(sourcePhrase, true);
|
||||
}
|
||||
|
||||
PhraseDictionaryCompact::~PhraseDictionaryCompact() {
|
||||
|
@ -50,6 +50,7 @@ protected:
|
||||
|
||||
PhraseTableImplementation m_implementation;
|
||||
bool m_inMemory;
|
||||
bool m_useAlignmentInfo;
|
||||
|
||||
typedef std::vector<TargetPhraseCollection*> PhraseCache;
|
||||
#ifdef WITH_THREADS
|
||||
@ -75,11 +76,14 @@ protected:
|
||||
|
||||
public:
|
||||
PhraseDictionaryCompact(size_t numScoreComponent,
|
||||
PhraseTableImplementation implementation,
|
||||
PhraseDictionaryFeature* feature)
|
||||
PhraseTableImplementation implementation,
|
||||
PhraseDictionaryFeature* feature,
|
||||
bool inMemory = StaticData::Instance().UseMinphrInMemory(),
|
||||
bool useAlignmentInfo = StaticData::Instance().UseAlignmentInfo())
|
||||
: PhraseDictionary(numScoreComponent, feature),
|
||||
m_implementation(implementation),
|
||||
m_inMemory(StaticData::Instance().UseMinphrInMemory()),
|
||||
m_inMemory(inMemory),
|
||||
m_useAlignmentInfo(useAlignmentInfo),
|
||||
m_hash(10, 16),
|
||||
m_phraseDecoder(0)
|
||||
{}
|
||||
@ -95,7 +99,8 @@ public:
|
||||
, float weightWP);
|
||||
|
||||
const TargetPhraseCollection* GetTargetPhraseCollection(const Phrase &source) const;
|
||||
|
||||
TargetPhraseVectorPtr GetTargetPhraseCollectionRaw(const Phrase &source) const;
|
||||
|
||||
void AddEquivPhrase(const Phrase &source, const TargetPhrase &targetPhrase);
|
||||
|
||||
void InitializeForInput(const Moses::InputType&);
|
||||
|
Loading…
Reference in New Issue
Block a user