hack kenlm's populate load by catting files into memory. Also check that all files exists

This commit is contained in:
Hieu Hoang 2016-12-05 15:54:43 +00:00
parent b188c3b649
commit 114702fcd8
3 changed files with 53 additions and 10 deletions

View File

@ -81,7 +81,7 @@ ProbingPT::~ProbingPT()
void ProbingPT::Load(System &system)
{
m_engine = new QueryEngine(m_path.c_str());
m_engine = new QueryEngine(m_path.c_str(), load_method);
m_unkId = 456456546456;
@ -163,6 +163,9 @@ void ProbingPT::SetParameter(const std::string& key, const std::string& value)
UTIL_THROW2("load method not supported" << value);
}
}
else {
PhraseTable::SetParameter(key, value);
}
}
void ProbingPT::CreateAlignmentMap(System &system, const std::string path)

View File

@ -7,7 +7,7 @@ using namespace std;
namespace Moses2
{
QueryEngine::QueryEngine(const char * filepath)
QueryEngine::QueryEngine(const char * filepath, util::LoadMethod load_method)
{
//Create filepaths
@ -17,15 +17,12 @@ QueryEngine::QueryEngine(const char * filepath)
std::string path_to_source_vocabid = basepath + "/source_vocabids";
std::string alignPath = basepath + "/Alignments.dat";
if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) ||
!FileExists(path_to_source_vocabid) || !FileExists(alignPath) ||
!FileExists(basepath + "TargetColl.dat") || !FileExists(basepath + "TargetVocab.dat") ||
!FileExists(basepath + "cache")) {
UTIL_THROW2("Binary table doesn't exist is didn't finish binarizing: " << path_to_config);
file_exits(basepath);
if (load_method == util::POPULATE_OR_READ) {
cat_files(basepath);
}
///Source phrase vocabids
read_map(source_vocabids, path_to_source_vocabid.c_str());
@ -144,5 +141,46 @@ void QueryEngine::read_alignments(const std::string &alignPath)
}
}
void QueryEngine::file_exits(const std::string &basePath)
{
if (!FileExists(basePath + "/Alignments.dat")) {
UTIL_THROW2("Require file does not exist in: " << basePath << "/Alignments.dat");
}
if (!FileExists(basePath + "/TargetColl.dat")) {
UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetColl.dat");
}
if (!FileExists(basePath + "/TargetVocab.dat")) {
UTIL_THROW2("Require file does not exist in: " << basePath << "/TargetVocab.dat");
}
if (!FileExists(basePath + "/cache")) {
UTIL_THROW2("Require file does not exist in: " << basePath << "/cache");
}
if (!FileExists(basePath + "/config")) {
UTIL_THROW2("Require file does not exist in: " << basePath << "/config");
}
if (!FileExists(basePath + "/probing_hash.dat")) {
UTIL_THROW2("Require file does not exist in: " << basePath << "/probing_hash.dat");
}
if (!FileExists(basePath + "/source_vocabids")) {
UTIL_THROW2("Require file does not exist in: " << basePath << "/source_vocabids");
}
/*
if (!FileExists(path_to_config) || !FileExists(path_to_hashtable) ||
!FileExists(path_to_source_vocabid) || !FileExists(basepath + alignPath) ||
!FileExists(basepath + "/TargetColl.dat") || !FileExists(basepath + "/TargetVocab.dat") ||
!FileExists(basepath + "/cache")) {
UTIL_THROW2("A required table doesn't exist in: " << basepath);
}
*/
}
void QueryEngine::cat_files(const std::string &basePath)
{
system((string("cat ") + basePath + "/TargetColl.dat > /dev/null").c_str());
system((string("cat ") + basePath + "/probing_hash.dat > /dev/null").c_str());
}
}

View File

@ -27,13 +27,15 @@ class QueryEngine
bool is_reordering;
void read_alignments(const std::string &alignPath);
void file_exits(const std::string &basePath);
void cat_files(const std::string &basePath);
public:
int num_scores;
int num_lex_scores;
bool logProb;
QueryEngine(const char *);
QueryEngine(const char *, util::LoadMethod load_method);
~QueryEngine();
std::pair<bool, uint64_t> query(uint64_t key);