#include "quering.hh" unsigned char * read_binary_file(const char * filename, size_t filesize){ //Get filesize int fd; unsigned char * map; fd = open(filename, O_RDONLY); if (fd == -1) { perror("Error opening file for reading"); exit(EXIT_FAILURE); } map = (unsigned char *)mmap(0, filesize, PROT_READ, MAP_SHARED, fd, 0); if (map == MAP_FAILED) { close(fd); perror("Error mmapping the file"); exit(EXIT_FAILURE); } return map; } QueryEngine::QueryEngine(const char * filepath) : decoder(filepath){ //Create filepaths std::string basepath(filepath); std::string path_to_hashtable = basepath + "/probing_hash.dat"; std::string path_to_data_bin = basepath + "/binfile.dat"; std::string path_to_source_vocabid = basepath + "/source_vocabids"; ///Source phrase vocabids read_map(&source_vocabids, path_to_source_vocabid.c_str()); //Target phrase vocabIDs vocabids = decoder.get_target_lookup_map(); //Read config file std::string line; std::ifstream config ((basepath + "/config").c_str()); getline(config, line); int tablesize = atoi(line.c_str()); //Get tablesize. config.close(); //Mmap binary table struct stat filestatus; stat(path_to_data_bin.c_str(), &filestatus); binary_filesize = filestatus.st_size; binary_mmaped = read_binary_file(path_to_data_bin.c_str(), binary_filesize); //Read hashtable size_t table_filesize = Table::Size(tablesize, 1.2); mem = readTable(path_to_hashtable.c_str(), table_filesize); Table table_init(mem, table_filesize); table = table_init; std::cerr << "Initialized successfully! " << std::endl; } QueryEngine::~QueryEngine(){ //Clear mmap content from memory. munmap(binary_mmaped, binary_filesize); munmap(mem, table_filesize); } std::pair > QueryEngine::query(std::vector source_phrase){ bool found; std::vector translation_entries; const Entry * entry; //TOO SLOW //uint64_t key = util::MurmurHashNative(&source_phrase[0], source_phrase.size()); uint64_t key = 0; for (int i = 0; i < source_phrase.size(); i++){ key += source_phrase[i]; } found = table.Find(key, entry); if (found){ //The phrase that was searched for was found! We need to get the translation entries. //We will read the largest entry in bytes and then filter the unnecesarry with functions //from line_splitter uint64_t initial_index = entry -> GetValue(); unsigned int bytes_toread = entry -> bytes_toread; //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS! std::vector encoded_text; //Assign to the vector the relevant portion of the array. encoded_text.reserve(bytes_toread); for (int i = 0; i < bytes_toread; i++){ encoded_text.push_back(binary_mmaped[i+initial_index]); } //Get only the translation entries necessary translation_entries = decoder.full_decode_line(encoded_text); } std::pair > output (found, translation_entries); return output; } std::pair > QueryEngine::query(StringPiece source_phrase){ bool found; std::vector translation_entries; const Entry * entry; //Convert source frase to VID std::vector source_phrase_vid = getVocabIDs(source_phrase); //TOO SLOW //uint64_t key = util::MurmurHashNative(&source_phrase_vid[0], source_phrase_vid.size()); uint64_t key = 0; for (int i = 0; i < source_phrase_vid.size(); i++){ key += source_phrase_vid[i]; } found = table.Find(key, entry); if (found){ //The phrase that was searched for was found! We need to get the translation entries. //We will read the largest entry in bytes and then filter the unnecesarry with functions //from line_splitter uint64_t initial_index = entry -> GetValue(); unsigned int bytes_toread = entry -> bytes_toread; //At the end of the file we can't readd + largest_entry cause we get a segfault. std::cerr << "Entry size is bytes is: " << bytes_toread << std::endl; //ASK HIEU FOR MORE EFFICIENT WAY TO DO THIS! std::vector encoded_text; //Assign to the vector the relevant portion of the array. encoded_text.reserve(bytes_toread); for (int i = 0; i < bytes_toread; i++){ encoded_text.push_back(binary_mmaped[i+initial_index]); } //Get only the translation entries necessary translation_entries = decoder.full_decode_line(encoded_text); } std::pair > output (found, translation_entries); return output; } void QueryEngine::printTargetInfo(std::vector target_phrases){ int entries = target_phrases.size(); for (int i = 0; i