trim Entry struct

2024-12-30 15:34:01 +03:00 · 2016-01-21 17:55:47 +00:00 · 2016-01-21 17:55:47 +00:00 · 23dbfb0f27
commit 23dbfb0f27
parent 7e4cfb6416
6 changed files with 6 additions and 730 deletions
--- a/contrib/other-builds/moses2/Jamfile
+++ b/contrib/other-builds/moses2/Jamfile
@ -71,7 +71,6 @@ alias deps :  ../../..//z ../../..//boost_iostreams ../../..//boost_filesystem .
 		legacy/Util2.cpp

    legacy/ProbingPT/hash.cpp
-    legacy/ProbingPT/huffmanish.cpp
    legacy/ProbingPT/line_splitter.cpp
    legacy/ProbingPT/probing_hash_utils.cpp
    legacy/ProbingPT/quering.cpp
--- a/contrib/other-builds/moses2/legacy/ProbingPT/huffmanish.cpp
+++ b/contrib/other-builds/moses2/legacy/ProbingPT/huffmanish.cpp
@ -1,566 +0,0 @@
-#include "huffmanish.hh"
-
-namespace Moses2
-{
-
-Huffman::Huffman (const char * filepath)
-{
-  //Read the file
-  util::FilePiece filein(filepath);
-
-  //Init uniq_lines to zero;
-  uniq_lines = 0;
-
-  line_text prev_line; //Check for unique lines.
-  int num_lines = 0 ;
-
-  while (true) {
-    line_text new_line;
-
-    num_lines++;
-
-    try {
-      //Process line read
-      new_line = splitLine(filein.ReadLine());
-      count_elements(new_line); //Counts the number of elements, adds new and increments counters.
-
-    } catch (util::EndOfFileException e) {
-      std::cerr << "Unique entries counted: ";
-      break;
-    }
-
-    if (new_line.source_phrase == prev_line.source_phrase) {
-      continue;
-    } else {
-      uniq_lines++;
-      prev_line = new_line;
-    }
-  }
-
-  std::cerr << uniq_lines << std::endl;
-}
-
-void Huffman::count_elements(const line_text &linein)
-{
-  //For target phrase:
-  util::TokenIter<util::SingleCharacter> it(linein.target_phrase, util::SingleCharacter(' '));
-  while (it) {
-    //Check if we have that entry
-    std::map<std::string, unsigned int>::iterator mapiter;
-    mapiter = target_phrase_words.find(it->as_string());
-
-    if (mapiter != target_phrase_words.end()) {
-      //If the element is found, increment the count.
-      mapiter->second++;
-    } else {
-      //Else create a new entry;
-      target_phrase_words.insert(std::pair<std::string, unsigned int>(it->as_string(), 1));
-    }
-    it++;
-  }
-
-  //For word allignment 1
-  std::map<std::vector<unsigned char>, unsigned int>::iterator mapiter3;
-  std::vector<unsigned char> numbers = splitWordAll1(linein.word_align);
-  mapiter3 = word_all1.find(numbers);
-
-  if (mapiter3 != word_all1.end()) {
-    //If the element is found, increment the count.
-    mapiter3->second++;
-  } else {
-    //Else create a new entry;
-    word_all1.insert(std::pair<std::vector<unsigned char>, unsigned int>(numbers, 1));
-  }
-
-}
-
-//Assigns huffman values for each unique element
-void Huffman::assign_values()
-{
-  //First create vectors for all maps so that we could sort them later.
-
-  //Create a vector for target phrases
-  for(std::map<std::string, unsigned int>::iterator it = target_phrase_words.begin(); it != target_phrase_words.end(); it++ ) {
-    target_phrase_words_counts.push_back(*it);
-  }
-  //Sort it
-  std::sort(target_phrase_words_counts.begin(), target_phrase_words_counts.end(), sort_pair());
-
-  //Create a vector for word allignments 1
-  for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1.begin(); it != word_all1.end(); it++ ) {
-    word_all1_counts.push_back(*it);
-  }
-  //Sort it
-  std::sort(word_all1_counts.begin(), word_all1_counts.end(), sort_pair_vec());
-
-
-  //Afterwards we assign a value for each phrase, starting from 1, as zero is reserved for delimiter
-  unsigned int i = 1; //huffman code
-  for(std::vector<std::pair<std::string, unsigned int> >::iterator it = target_phrase_words_counts.begin();
-      it != target_phrase_words_counts.end(); it++) {
-    target_phrase_huffman.insert(std::pair<std::string, unsigned int>(it->first, i));
-    i++; //Go to the next huffman code
-  }
-
-  i = 1; //Reset i for the next map
-  for(std::vector<std::pair<std::vector<unsigned char>, unsigned int> >::iterator it = word_all1_counts.begin();
-      it != word_all1_counts.end(); it++) {
-    word_all1_huffman.insert(std::pair<std::vector<unsigned char>, unsigned int>(it->first, i));
-    i++; //Go to the next huffman code
-  }
-
-  //After lookups are produced, clear some memory usage of objects not needed anymore.
-  target_phrase_words.clear();
-  word_all1.clear();
-
-  target_phrase_words_counts.clear();
-  word_all1_counts.clear();
-
-  std::cerr << "Finished generating huffman codes." << std::endl;
-
-}
-
-void Huffman::serialize_maps(const char * dirname)
-{
-  //Note that directory name should exist.
-  std::string basedir(dirname);
-  std::string target_phrase_path(basedir + "/target_phrases");
-  std::string probabilities_path(basedir + "/probs");
-  std::string word_all1_path(basedir + "/Wall1");
-
-  //Target phrase
-  std::ofstream os (target_phrase_path.c_str(), std::ios::binary);
-  boost::archive::text_oarchive oarch(os);
-  oarch << lookup_target_phrase;
-  os.close();
-
-  //Word all1
-  std::ofstream os2 (word_all1_path.c_str(), std::ios::binary);
-  boost::archive::text_oarchive oarch2(os2);
-  oarch2 << lookup_word_all1;
-  os2.close();
-}
-
-std::vector<unsigned char> Huffman::full_encode_line(line_text &line, bool log_prob)
-{
-  return vbyte_encode_line((encode_line(line, log_prob)));
-}
-
-//! make sure score doesn't fall below LOWEST_SCORE
-inline float FloorScore(float logScore)
-{
-  const float LOWEST_SCORE = -100.0f;
-  return (std::max)(logScore , LOWEST_SCORE);
-}
-
-std::vector<unsigned int> Huffman::encode_line(line_text &line, bool log_prob)
-{
-	  std::vector<unsigned int> retvector;
-
-	  //Get target_phrase first.
-	  util::TokenIter<util::SingleCharacter> it(line.target_phrase, util::SingleCharacter(' '));
-	  while (it) {
-	    retvector.push_back(target_phrase_huffman.find(it->as_string())->second);
-	    it++;
-	  }
-	  //Add a zero;
-	  retvector.push_back(0);
-
-	  //Get probabilities. Reinterpreting the float bytes as unsgined int.
-	  util::TokenIter<util::SingleCharacter> probit(line.prob, util::SingleCharacter(' '));
-	  while (probit) {
-	    //Sometimes we have too big floats to handle, so first convert to double
-	    double tempnum = atof(probit->data());
-	    float num = (float)tempnum;
-	    if (log_prob) {
-	    	num = FloorScore(log(num));
-	    	if (num == 0.0f) num = 0.0000000001;
-	    }
-	    //cerr << "num=" << num << endl;
-	    retvector.push_back(reinterpret_float(&num));
-	    probit++;
-	  }
-
-	  // append LexRO prob to pt scores
-	  AppendLexRO(line, retvector, log_prob);
-
-	  //Add a zero;
-	  retvector.push_back(0);
-
-
-	  //Get Word allignments
-	  retvector.push_back(word_all1_huffman.find(splitWordAll1(line.word_align))->second);
-	  retvector.push_back(0);
-
-	  //The rest of the components might not be there, but add them (as reinterpretation to byte arr)
-	  //In the future we should really make those optional to save space
-
-	  //Counts
-	  const char* counts = line.counts.data();
-	  size_t counts_size = line.counts.size();
-	  for (size_t i = 0; i < counts_size; i++) {
-	    retvector.push_back(counts[i]);
-	  }
-	  retvector.push_back(0);
-
-	  //Sparse score
-	  const char* sparse_score = line.sparse_score.data();
-	  size_t sparse_score_size = line.sparse_score.size();
-	  for (size_t i = 0; i < sparse_score_size; i++) {
-	    retvector.push_back(sparse_score[i]);
-	  }
-	  retvector.push_back(0);
-
-	  //Property
-	  const char* property = line.property_to_be_binarized.data();
-	  size_t property_size = line.property_to_be_binarized.size();
-	  for (size_t i = 0; i < property_size; i++) {
-	    retvector.push_back(property[i]);
-	  }
-	  retvector.push_back(0);
-
-	  return retvector;
-}
-
-void Huffman::AppendLexRO(line_text &line, std::vector<unsigned int> &retvector, bool log_prob) const
-{
-  const StringPiece &origProperty = line.property;
-  StringPiece::size_type startPos = origProperty.find("{{LexRO ");
-
-  if (startPos != StringPiece::npos) {
-	  StringPiece::size_type endPos = origProperty.find("}}", startPos + 8);
-	  StringPiece lexProb = origProperty.substr(startPos + 8, endPos - startPos - 8);
-	  //cerr << "lexProb=" << lexProb << endl;
-
-	  // append lex probs to pt probs
-	  util::TokenIter<util::SingleCharacter> it(lexProb, util::SingleCharacter(' '));
-	  while (it) {
-  	    StringPiece probStr = *it;
-		//cerr << "\t" << probStr << endl;
-
-		double tempnum = atof(probStr.data());
-		float num = (float)tempnum;
-	    if (log_prob) {
-	    	num = FloorScore(log(num));
-	    	if (num == 0.0f) num = 0.0000000001;
-	    }
-
-		retvector.push_back(reinterpret_float(&num));
-
-		// exclude LexRO property from property column
-		line.property_to_be_binarized = origProperty.substr(0, startPos).as_string()
-				+ origProperty.substr(endPos + 2, origProperty.size() - endPos - 2).as_string();
-		//cerr << "line.property_to_be_binarized=" << line.property_to_be_binarized << "AAAA" << endl;
-	    it++;
-	  }
-
-  }
-}
-
-void Huffman::produce_lookups()
-{
-  //basically invert every map that we have
-  for(std::map<std::string, unsigned int>::iterator it = target_phrase_huffman.begin(); it != target_phrase_huffman.end(); it++ ) {
-    lookup_target_phrase.insert(std::pair<unsigned int, std::string>(it->second, it->first));
-  }
-
-  for(std::map<std::vector<unsigned char>, unsigned int>::iterator it = word_all1_huffman.begin(); it != word_all1_huffman.end(); it++ ) {
-    lookup_word_all1.insert(std::pair<unsigned int, std::vector<unsigned char> >(it->second, it->first));
-  }
-
-}
-
-HuffmanDecoder::HuffmanDecoder (const char * dirname)
-{
-  //Read the maps from disk
-
-  //Note that directory name should exist.
-  std::string basedir(dirname);
-  std::string target_phrase_path(basedir + "/target_phrases");
-  std::string word_all1_path(basedir + "/Wall1");
-
-  //Target phrases
-  std::ifstream is (target_phrase_path.c_str(), std::ios::binary);
-  boost::archive::text_iarchive iarch(is);
-  iarch >> lookup_target_phrase;
-  is.close();
-
-  //Word allignment 1
-  std::ifstream is2 (word_all1_path.c_str(), std::ios::binary);
-  boost::archive::text_iarchive iarch2(is2);
-  iarch2 >> lookup_word_all1;
-  is2.close();
-
-}
-
-HuffmanDecoder::HuffmanDecoder (const std::map<unsigned int, std::string> &lookup_target,
-                                const std::map<unsigned int, std::vector<unsigned char> > &lookup_word1)
-{
-  lookup_target_phrase = lookup_target;
-  lookup_word_all1 = lookup_word1;
-}
-
-std::vector<target_text*> HuffmanDecoder::full_decode_line (unsigned char lines[],
-		size_t linesCount,
-		int num_scores,
-		int num_lex_scores,
-		RecycleData &recycler)
-{
-  std::vector<target_text*> retvector; //All target phrases
-  std::vector<unsigned int> *decoded_lines = vbyte_decode_line(lines, linesCount, recycler); //All decoded lines
-  std::vector<unsigned int>::iterator it = decoded_lines->begin(); //Iterator for them
-  std::vector<unsigned int> current_target_phrase; //Current target phrase decoded
-
-  short zero_count = 0; //Count how many zeroes we have met. so far. Every 3 zeroes mean a new target phrase.
-  while(it != decoded_lines->end()) {
-    if (zero_count == 1) {
-      //We are extracting scores. we know how many scores there are so we can push them
-      //to the vector. This is done in case any of the scores is 0, because it would mess
-      //up the state machine.
-      for (int i = 0; i < num_scores + num_lex_scores; i++) {
-        current_target_phrase.push_back(*it);
-        it++;
-      }
-    }
-
-    if (zero_count == 6) {
-      //We have finished with this entry, decode it, and add it to the retvector.
-      retvector.push_back(decode_line(current_target_phrase, num_scores, num_lex_scores, recycler));
-      current_target_phrase.clear(); //Clear the current target phrase and the zero_count
-      zero_count = 0; //So that we can reuse them for the next target phrase
-    }
-    //Add to the next target_phrase, number by number.
-    current_target_phrase.push_back(*it);
-    if (*it == 0) {
-      zero_count++;
-    }
-    it++; //Go to the next word/symbol
-  }
-  //Don't forget the last remaining line!
-  if (zero_count == 6) {
-    //We have finished with this entry, decode it, and add it to the retvector.
-    retvector.push_back(decode_line(current_target_phrase, num_scores, num_lex_scores, recycler));
-    current_target_phrase.clear(); //Clear the current target phrase and the zero_count
-    zero_count = 0; //So that we can reuse them for the next target phrase
-  }
-
-  recycler.huffman_line.push_back(decoded_lines);
-
-  return retvector;
-
-}
-
-target_text *HuffmanDecoder::decode_line (const std::vector<unsigned int> &input,
-		int num_scores,
-		int num_lex_scores,
-		Moses2::RecycleData &recycler)
-{
-  //demo decoder
-  target_text *ret;
-  if (recycler.tt.empty()) {
-	  ret = new target_text;
-  }
-  else {
-	  ret = recycler.tt.back();
-	  recycler.tt.pop_back();
-
-	  ret->Reset();
-  }
-
-  ret->prob.reserve(num_scores);
-  //Split everything
-  unsigned int wAll = 1;
-
-  //Split the line into the proper arrays
-  short num_zeroes = 0;
-  int counter = 0;
-  while (num_zeroes < 6) {
-    unsigned int num = input[counter];
-    if (num == 0) {
-      num_zeroes++;
-    } else if (num_zeroes == 0) {
-      ret->target_phrase.push_back(num);
-    } else if (num_zeroes == 1) {
-      //Push exactly num_scores scores
-      for (int i = 0; i < num_scores + num_lex_scores; i++) {
-    	float prob = reinterpret_uint(&num);
-    	ret->prob.push_back(prob);
-
-        counter++;
-        num = input[counter];
-      }
-      continue;
-    } else if (num_zeroes == 2) {
-      wAll = num;
-    } else if (num_zeroes == 3) {
-      ret->counts.push_back(static_cast<char>(input[counter]));
-    } else if (num_zeroes == 4) {
-      ret->sparse_score.push_back(static_cast<char>(input[counter]));
-    } else if (num_zeroes == 5) {
-      ret->property.push_back(static_cast<char>(input[counter]));
-    }
-
-    counter++;
-  }
-
-  ret->word_all1 = lookup_word_all1.find(wAll)->second;
-
-  return ret;
-
-}
-
-inline const std::string &HuffmanDecoder::getTargetWordFromID(unsigned int id)
-{
-  return lookup_target_phrase.find(id)->second;
-}
-
-std::string HuffmanDecoder::getTargetWordsFromIDs(const std::vector<unsigned int> &ids)
-{
-  std::string returnstring;
-  for (std::vector<unsigned int>::const_iterator it = ids.begin(); it != ids.end(); it++) {
-    returnstring.append(getTargetWordFromID(*it) + " ");
-  }
-
-  return returnstring;
-}
-
-inline const std::string &getTargetWordFromID(unsigned int id, const std::map<unsigned int, std::string> &lookup_target_phrase)
-{
-  return lookup_target_phrase.find(id)->second;
-}
-
-std::string getTargetWordsFromIDs(const std::vector<unsigned int> &ids, const std::map<unsigned int, std::string> &lookup_target_phrase)
-{
-  std::string returnstring;
-  for (std::vector<unsigned int>::const_iterator it = ids.begin(); it != ids.end(); it++) {
-    returnstring.append(getTargetWordFromID(*it, lookup_target_phrase) + " ");
-  }
-
-  return returnstring;
-}
-
-/*Those functions are used to more easily store the floats in the binary phrase table
- We convert the float unsinged int so that it is the same as our other values and we can
- apply variable byte encoding on top of it.*/
-
-inline unsigned int reinterpret_float(float * num)
-{
-  unsigned int * converted_num;
-  converted_num = reinterpret_cast<unsigned int *>(num);
-  return *converted_num;
-}
-
-inline float reinterpret_uint(unsigned int * num)
-{
-  float * converted_num;
-  converted_num = reinterpret_cast<float *>(num);
-  return *converted_num;
-}
-
-/*Mostly taken from stackoverflow, http://stackoverflow.com/questions/5858646/optimizing-variable-length-encoding
-and modified in order to return a vector of chars. Implements ULEB128 or variable byte encoding.
-This is highly optimized version with unrolled loop */
-inline std::vector<unsigned char> vbyte_encode(unsigned int num)
-{
-  //Determine how many bytes we are going to take.
-  short size;
-  std::vector<unsigned char> byte_vector;
-
-  if (num < 0x00000080U) {
-    size = 1;
-    byte_vector.reserve(size);
-    goto b1;
-  }
-  if (num < 0x00004000U) {
-    size = 2;
-    byte_vector.reserve(size);
-    goto b2;
-  }
-  if (num < 0x00200000U) {
-    size = 3;
-    byte_vector.reserve(size);
-    goto b3;
-  }
-  if (num < 0x10000000U) {
-    size = 4;
-    byte_vector.reserve(size);
-    goto b4;
-  }
-  size = 5;
-  byte_vector.reserve(size);
-
-
-  //Now proceed with the encoding.
-  byte_vector.push_back((num & 0x7f) | 0x80);
-  num >>= 7;
-b4:
-  byte_vector.push_back((num & 0x7f) | 0x80);
-  num >>= 7;
-b3:
-  byte_vector.push_back((num & 0x7f) | 0x80);
-  num >>= 7;
-b2:
-  byte_vector.push_back((num & 0x7f) | 0x80);
-  num >>= 7;
-b1:
-  byte_vector.push_back(num);
-
-  return byte_vector;
-}
-
-std::vector<unsigned int> *vbyte_decode_line(unsigned char line[], size_t linesSize, RecycleData &recycler)
-{
-  std::vector<unsigned int> *huffman_line;
-  if (recycler.huffman_line.empty()) {
-	  huffman_line = new std::vector<unsigned int>();
-  }
-  else {
-	  huffman_line = recycler.huffman_line.back();
-	  recycler.huffman_line.pop_back();
-	  huffman_line->clear();
-  }
-
-  unsigned char current_num[linesSize];
-
-  size_t current_num_ind = 0;
-  for (size_t i = 0; i < linesSize; ++i) {
-	unsigned char c = line[i];
-    current_num[current_num_ind++] = c;
-    if ((c >> 7) != 1) {
-      //We don't have continuation in the next bit
-      huffman_line->push_back(bytes_to_int(current_num, current_num_ind));
-      current_num_ind = 0;
-    }
-  }
-  return huffman_line;
-}
-
-inline unsigned int bytes_to_int(unsigned char number[], size_t numberSize)
-{
-  unsigned int retvalue = 0;
-  unsigned char shift = 0; //By how many bits to shift
-
-  for (size_t i = 0; i < numberSize; ++i) {
-	unsigned char c = number[i];
-    retvalue |= (c & 0x7f) << shift;
-    shift += 7;
-  }
-
-  return retvalue;
-}
-
-std::vector<unsigned char> vbyte_encode_line(const std::vector<unsigned int> &line)
-{
-  std::vector<unsigned char> retvec;
-
-  //For each unsigned int in the line, vbyte encode it and add it to a vector of unsigned chars.
-  for (std::vector<unsigned int>::const_iterator it = line.begin(); it != line.end(); it++) {
-    std::vector<unsigned char> vbyte_encoded = vbyte_encode(*it);
-    retvec.insert(retvec.end(), vbyte_encoded.begin(), vbyte_encoded.end());
-  }
-
-  return retvec;
-}
-
-}
-
--- a/contrib/other-builds/moses2/legacy/ProbingPT/huffmanish.hh
+++ b/contrib/other-builds/moses2/legacy/ProbingPT/huffmanish.hh
@ -1,150 +0,0 @@
-#pragma once
-
-//Huffman encodes a line and also produces the vocabulary ids
-#include "hash.hh"
-#include "line_splitter.hh"
-#include <cstdio>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <deque>
-#include <boost/serialization/serialization.hpp>
-#include <boost/serialization/vector.hpp>
-#include <boost/serialization/map.hpp>
-#include <boost/archive/text_iarchive.hpp>
-#include <boost/archive/text_oarchive.hpp>
-#include <boost/foreach.hpp>
-
-namespace Moses2
-{
-////////////////////////////////////////////////////////////////
-class RecycleData
-{
-public:
-	std::deque<target_text*> tt;
-	std::deque<std::vector<unsigned int>*> huffman_line;
-
-	  ~RecycleData()
-	  {
-		  BOOST_FOREACH (const target_text *obj, tt) {
-			delete obj;
-		  }
-
-		  BOOST_FOREACH (const std::vector<unsigned int> *obj, huffman_line) {
-			delete obj;
-		  }
-	  }
-};
-////////////////////////////////////////////////////////////////
-
-//Sorting for the second
-struct sort_pair {
-  bool operator()(const std::pair<std::string, unsigned int> &left, const std::pair<std::string, unsigned int> &right) {
-    return left.second > right.second; //This puts biggest numbers first.
-  }
-};
-
-struct sort_pair_vec {
-  bool operator()(const std::pair<std::vector<unsigned char>, unsigned int> &left, const std::pair<std::vector<unsigned char>, unsigned int> &right) {
-    return left.second > right.second; //This puts biggest numbers first.
-  }
-};
-
-class Huffman
-{
-  unsigned long uniq_lines; //Unique lines in the file.
-
-  //Containers used when counting the occurence of a given phrase
-  std::map<std::string, unsigned int> target_phrase_words;
-  std::map<std::vector<unsigned char>, unsigned int> word_all1;
-
-  //Same containers as vectors, for sorting
-  std::vector<std::pair<std::string, unsigned int> > target_phrase_words_counts;
-  std::vector<std::pair<std::vector<unsigned char>, unsigned int> > word_all1_counts;
-
-  //Huffman maps
-  std::map<std::string, unsigned int> target_phrase_huffman;
-  std::map<std::vector<unsigned char>, unsigned int> word_all1_huffman;
-
-  //inverted maps
-  std::map<unsigned int, std::string> lookup_target_phrase;
-  std::map<unsigned int, std::vector<unsigned char> > lookup_word_all1;
-
-public:
-  Huffman (const char *);
-  void count_elements (const line_text &line);
-  void assign_values();
-  void serialize_maps(const char * dirname);
-  void produce_lookups();
-
-  std::vector<unsigned int> encode_line(line_text &line, bool log_prob);
-
-  //encode line + variable byte ontop
-  std::vector<unsigned char> full_encode_line(line_text &line, bool log_prob);
-
-  //Getters
-  const std::map<unsigned int, std::string> get_target_lookup_map() const {
-    return lookup_target_phrase;
-  }
-  const std::map<unsigned int, std::vector<unsigned char> > get_word_all1_lookup_map() const {
-    return lookup_word_all1;
-  }
-
-  unsigned long getUniqLines() {
-    return uniq_lines;
-  }
-
-  void AppendLexRO(line_text &line, std::vector<unsigned int> &retvector, bool log_prob) const;
-
-};
-
-class HuffmanDecoder
-{
-  std::map<unsigned int, std::string> lookup_target_phrase;
-  std::map<unsigned int, std::vector<unsigned char> > lookup_word_all1;
-
-public:
-  HuffmanDecoder (const char *);
-  HuffmanDecoder (const std::map<unsigned int, std::string> &, const std::map<unsigned int, std::vector<unsigned char> > &);
-
-  //Getters
-  const std::map<unsigned int, std::string> &get_target_lookup_map() const {
-    return lookup_target_phrase;
-  }
-  const std::map<unsigned int, std::vector<unsigned char> > &get_word_all1_lookup_map() const {
-    return lookup_word_all1;
-  }
-
-  inline const std::string &getTargetWordFromID(unsigned int id);
-
-  std::string getTargetWordsFromIDs(const std::vector<unsigned int> &ids);
-
-  target_text *decode_line (const std::vector<unsigned int> &input,
-		  int num_scores,
-		  int num_lex_scores,
-		  RecycleData &recycler);
-
-  //Variable byte decodes a all target phrases contained here and then passes them to decode_line
-  std::vector<target_text*> full_decode_line (unsigned char lines[],
-		  size_t linesCount,
-		  int num_scores,
-		  int num_lex_scores,
-		  RecycleData &recycler);
-};
-
-std::string getTargetWordsFromIDs(const std::vector<unsigned int> &ids, const std::map<unsigned int, std::string> &lookup_target_phrase);
-
-inline const std::string &getTargetWordFromID(unsigned int id, const std::map<unsigned int, std::string> &lookup_target_phrase);
-
-inline unsigned int reinterpret_float(float * num);
-
-inline float reinterpret_uint(unsigned int * num);
-
-std::vector<unsigned char> vbyte_encode_line(const std::vector<unsigned int> &line);
-inline std::vector<unsigned char> vbyte_encode(unsigned int num);
-std::vector<unsigned int> *vbyte_decode_line(unsigned char line[], size_t linesSize, RecycleData &recycler);
-inline unsigned int bytes_to_int(unsigned char number[], size_t numberSize);
-
-}
-
-
--- a/contrib/other-builds/moses2/legacy/ProbingPT/probing_hash_utils.hh
+++ b/contrib/other-builds/moses2/legacy/ProbingPT/probing_hash_utils.hh
@ -10,13 +10,12 @@
 namespace Moses2
 {

-#define API_VERSION 8
+#define API_VERSION 9

 //Hash table entry
 struct Entry {
  uint64_t key;
  typedef uint64_t Key;
-  unsigned int bytes_toread;

  uint64_t GetKey() const {
    return key;
@ -26,12 +25,7 @@ struct Entry {
    key = to;
  }

-  uint64_t GetValue() const {
-    return value;
-  }
-
  uint64_t value;
-  uint64_t targetInd;
 };

 //Define table
--- a/contrib/other-builds/moses2/legacy/ProbingPT/quering.cpp
+++ b/contrib/other-builds/moses2/legacy/ProbingPT/quering.cpp
@ -94,7 +94,9 @@ std::pair<bool, uint64_t> QueryEngine::query(uint64_t key)

  const Entry * entry;
  ret.first = table.Find(key, entry);
-  ret.second = entry->targetInd;
+  if (ret.first) {
+	  ret.second = entry->value;
+  }
  return ret;
 }

--- a/contrib/other-builds/moses2/legacy/ProbingPT/storing.cpp
+++ b/contrib/other-builds/moses2/legacy/ProbingPT/storing.cpp
@ -42,7 +42,6 @@ void createProbingPT(
  float totalSourceCount = 0;

  //Keep track of the size of each group of target phrases
-  uint64_t entrystartidx = 0;
  size_t line_num = 0;

  //Read everything and processs
@ -83,8 +82,7 @@ void createProbingPT(

        //Create an entry for the previous source phrase:
        Entry pesho;
-        pesho.value = entrystartidx;
-        pesho.targetInd = targetInd;
+        pesho.value = targetInd;
        //The key is the sum of hashes of individual words bitshifted by their position in the phrase.
        //Probably not entirerly correct, but fast and seems to work fine in practise.
        pesho.key = 0;
@ -128,8 +126,7 @@ void createProbingPT(
      uint64_t targetInd = storeTarget.Save();

      Entry pesho;
-      pesho.value = entrystartidx;
-      pesho.targetInd = targetInd;
+      pesho.value = targetInd;

      //The key is the sum of hashes of individual words. Probably not entirerly correct, but fast
      pesho.key = 0;