integrate compact lexicalised reordering model

2025-01-05 02:22:21 +03:00 · 2015-12-19 02:32:08 +00:00 · 2015-12-19 02:32:08 +00:00 · da022f8e64
commit da022f8e64
parent d06a8019c5
17 changed files with 3641 additions and 1 deletions
--- a/contrib/other-builds/moses2/Jamfile
+++ b/contrib/other-builds/moses2/Jamfile
@ -74,7 +74,12 @@ external-lib boost_serialization ;
    legacy/ProbingPT/probing_hash_utils.cpp
    legacy/ProbingPT/quering.cpp
    legacy/ProbingPT/vocabid.cpp
-    
+    legacy/CompactPT/BlockHashIndex.cpp
+    legacy/CompactPT/CmphStringVectorAdapter.cpp
+    legacy/CompactPT/LexicalReorderingTableCompact.cpp
+    legacy/CompactPT/MurmurHash3.cpp
+    legacy/CompactPT/ThrowingFwrite.cpp
+
 ../../../moses//moses 
 ../../../OnDiskPt//OnDiskPt 
 ../../..//boost_filesystem  
--- a/contrib/other-builds/moses2/legacy/CompactPT/BlockHashIndex.cpp
+++ b/contrib/other-builds/moses2/legacy/CompactPT/BlockHashIndex.cpp
@ -0,0 +1,424 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "ThrowingFwrite.h"
+#include "BlockHashIndex.h"
+#include "CmphStringVectorAdapter.h"
+#include "util/exception.hh"
+#include "util/string_stream.hh"
+
+#ifdef HAVE_CMPH
+#include "cmph.h"
+#endif
+
+namespace Moses2
+{
+#ifdef WITH_THREADS
+BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
+                               size_t threadsNum)
+  : m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
+    m_fileHandle(0), m_fileHandleStart(0), m_landmarks(true), m_size(0),
+    m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0),
+    m_threadPool(threadsNum)
+{
+#ifndef HAVE_CMPH
+  std::cerr << "minphr: CMPH support not compiled in." << std::endl;
+  exit(1);
+#endif
+}
+#else
+BlockHashIndex::BlockHashIndex(size_t orderBits, size_t fingerPrintBits)
+  : m_orderBits(orderBits), m_fingerPrintBits(fingerPrintBits),
+    m_fileHandle(0), m_fileHandleStart(0), m_size(0),
+    m_lastSaved(-1), m_lastDropped(-1), m_numLoadedRanges(0)
+{
+#ifndef HAVE_CMPH
+  std::cerr << "minphr: CMPH support not compiled in." << std::endl;
+  exit(1);
+#endif
+}
+#endif
+
+BlockHashIndex::~BlockHashIndex()
+{
+#ifdef HAVE_CMPH
+  for(std::vector<void*>::iterator it = m_hashes.begin();
+      it != m_hashes.end(); it++)
+    if(*it != 0)
+      cmph_destroy((cmph_t*)*it);
+
+  for(std::vector<PairedPackedArray<>*>::iterator it = m_arrays.begin();
+      it != m_arrays.end(); it++)
+    if(*it != 0)
+      delete *it;
+#endif
+}
+
+size_t BlockHashIndex::GetHash(const char* key)
+{
+  std::string keyStr(key);
+  size_t i = std::distance(m_landmarks.begin(),
+                           std::upper_bound(m_landmarks.begin(),
+                               m_landmarks.end(), keyStr)) - 1;
+
+  if(i == 0ul-1)
+    return GetSize();
+
+  size_t pos = GetHash(i, key);
+  if(pos != GetSize())
+    return (1ul << m_orderBits) * i + pos;
+  else
+    return GetSize();
+}
+
+size_t BlockHashIndex::GetFprint(const char* key) const
+{
+  size_t hash;
+  MurmurHash3_x86_32(key, std::strlen(key), 100000, &hash);
+  hash &= (1ul << m_fingerPrintBits) - 1;
+  return hash;
+}
+
+size_t BlockHashIndex::GetHash(size_t i, const char* key)
+{
+//#ifdef WITH_THREADS
+//  boost::mutex::scoped_lock lock(m_mutex);
+//#endif
+  //if(m_hashes[i] == 0)
+  //LoadRange(i);
+#ifdef HAVE_CMPH
+  size_t idx = cmph_search((cmph_t*)m_hashes[i], key, (cmph_uint32) strlen(key));
+#else
+  assert(0);
+  size_t idx = 0;
+#endif
+
+  std::pair<size_t, size_t> orderPrint = m_arrays[i]->Get(idx, m_orderBits, m_fingerPrintBits);
+  m_clocks[i] = clock();
+
+  if(GetFprint(key) == orderPrint.second)
+    return orderPrint.first;
+  else
+    return GetSize();
+}
+
+size_t BlockHashIndex::GetHash(std::string key)
+{
+  return GetHash(key.c_str());
+}
+
+size_t BlockHashIndex::operator[](std::string key)
+{
+  return GetHash(key);
+}
+
+size_t BlockHashIndex::operator[](char* key)
+{
+  return GetHash(key);
+}
+
+size_t BlockHashIndex::Save(std::string filename)
+{
+  std::FILE* mphf = std::fopen(filename.c_str(), "w");
+  size_t size = Save(mphf);
+  std::fclose(mphf);
+  return size;
+}
+
+void BlockHashIndex::BeginSave(std::FILE * mphf)
+{
+  m_fileHandle = mphf;
+  ThrowingFwrite(&m_orderBits, sizeof(size_t), 1, m_fileHandle);
+  ThrowingFwrite(&m_fingerPrintBits, sizeof(size_t), 1, m_fileHandle);
+
+  m_fileHandleStart = std::ftell(m_fileHandle);
+
+  size_t relIndexPos = 0;
+  ThrowingFwrite(&relIndexPos, sizeof(size_t), 1, m_fileHandle);
+}
+
+void BlockHashIndex::SaveRange(size_t i)
+{
+#ifdef HAVE_CMPH
+  if(m_seekIndex.size() <= i)
+    m_seekIndex.resize(i+1);
+  m_seekIndex[i] = std::ftell(m_fileHandle) - m_fileHandleStart;
+  cmph_dump((cmph_t*)m_hashes[i], m_fileHandle);
+  m_arrays[i]->Save(m_fileHandle);
+#endif
+}
+
+void BlockHashIndex::SaveLastRange()
+{
+#ifdef WITH_THREADS
+  boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+  while(!m_queue.empty() && m_lastSaved + 1 == -m_queue.top()) {
+    size_t current = -m_queue.top();
+    m_queue.pop();
+    SaveRange(current);
+    m_lastSaved = current;
+  }
+}
+
+void BlockHashIndex::DropRange(size_t i)
+{
+#ifdef HAVE_CMPH
+  if(m_hashes[i] != 0) {
+    cmph_destroy((cmph_t*)m_hashes[i]);
+    m_hashes[i] = 0;
+  }
+  if(m_arrays[i] != 0) {
+    delete m_arrays[i];
+    m_arrays[i] = 0;
+    m_clocks[i] = 0;
+  }
+  m_numLoadedRanges--;
+#endif
+}
+
+void BlockHashIndex::DropLastRange()
+{
+#ifdef WITH_THREADS
+  boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+  while(m_lastDropped != m_lastSaved)
+    DropRange(++m_lastDropped);
+}
+
+#ifdef WITH_THREADS
+void BlockHashIndex::WaitAll()
+{
+  m_threadPool.Stop(true);
+}
+#endif
+
+size_t BlockHashIndex::FinalizeSave()
+{
+#ifdef WITH_THREADS
+  m_threadPool.Stop(true);
+#endif
+
+  SaveLastRange();
+
+  size_t relIndexPos = std::ftell(m_fileHandle) - m_fileHandleStart;
+
+  std::fseek(m_fileHandle, m_fileHandleStart, SEEK_SET);
+  ThrowingFwrite(&relIndexPos, sizeof(size_t), 1, m_fileHandle);
+
+  std::fseek(m_fileHandle, m_fileHandleStart + relIndexPos, SEEK_SET);
+  m_landmarks.save(m_fileHandle);
+
+  size_t seekIndexSize = m_seekIndex.size();
+  ThrowingFwrite(&seekIndexSize, sizeof(size_t), 1, m_fileHandle);
+  ThrowingFwrite(&m_seekIndex[0], sizeof(size_t), seekIndexSize, m_fileHandle);
+
+  ThrowingFwrite(&m_size, sizeof(size_t), 1, m_fileHandle);
+
+  size_t fileHandleStop = std::ftell(m_fileHandle);
+  return fileHandleStop - m_fileHandleStart + sizeof(m_orderBits)
+         + sizeof(m_fingerPrintBits);
+}
+
+size_t BlockHashIndex::Save(std::FILE * mphf)
+{
+  m_queue = std::priority_queue<int>();
+  BeginSave(mphf);
+  for(size_t i = 0; i < m_hashes.size(); i++)
+    SaveRange(i);
+  return FinalizeSave();
+}
+
+size_t BlockHashIndex::LoadIndex(std::FILE* mphf)
+{
+  m_fileHandle = mphf;
+
+  size_t beginning = std::ftell(mphf);
+
+  size_t read = 0;
+  read += std::fread(&m_orderBits, sizeof(size_t), 1, mphf);
+  read += std::fread(&m_fingerPrintBits, sizeof(size_t), 1, mphf);
+  m_fileHandleStart = std::ftell(m_fileHandle);
+
+  size_t relIndexPos;
+  read += std::fread(&relIndexPos, sizeof(size_t), 1, mphf);
+  std::fseek(m_fileHandle, m_fileHandleStart + relIndexPos, SEEK_SET);
+
+  m_landmarks.load(mphf);
+
+  size_t seekIndexSize;
+  read += std::fread(&seekIndexSize, sizeof(size_t), 1, m_fileHandle);
+  m_seekIndex.resize(seekIndexSize);
+  read += std::fread(&m_seekIndex[0], sizeof(size_t), seekIndexSize, m_fileHandle);
+  m_hashes.resize(seekIndexSize, 0);
+  m_clocks.resize(seekIndexSize, 0);
+  m_arrays.resize(seekIndexSize, 0);
+
+  read += std::fread(&m_size, sizeof(size_t), 1, m_fileHandle);
+
+  size_t end = std::ftell(mphf);
+
+  return end - beginning;
+}
+
+void BlockHashIndex::LoadRange(size_t i)
+{
+#ifdef HAVE_CMPH
+  std::fseek(m_fileHandle, m_fileHandleStart + m_seekIndex[i], SEEK_SET);
+  cmph_t* hash = cmph_load(m_fileHandle);
+  m_arrays[i] = new PairedPackedArray<>(0, m_orderBits,
+                                        m_fingerPrintBits);
+  m_arrays[i]->Load(m_fileHandle);
+
+  m_hashes[i] = (void*)hash;
+  m_clocks[i] = clock();
+
+  m_numLoadedRanges++;
+#endif
+}
+
+size_t BlockHashIndex::Load(std::string filename)
+{
+  std::FILE* mphf = std::fopen(filename.c_str(), "r");
+  size_t size = Load(mphf);
+  std::fclose(mphf);
+  return size;
+}
+
+size_t BlockHashIndex::Load(std::FILE * mphf)
+{
+  size_t byteSize = LoadIndex(mphf);
+  size_t end = std::ftell(mphf);
+
+  for(size_t i = 0; i < m_seekIndex.size(); i++)
+    LoadRange(i);
+  std::fseek(m_fileHandle, end, SEEK_SET);
+  return byteSize;
+}
+
+size_t BlockHashIndex::GetSize() const
+{
+  return m_size;
+}
+
+void BlockHashIndex::KeepNLastRanges(float ratio, float tolerance)
+{
+  /*
+  #ifdef WITH_THREADS
+  boost::mutex::scoped_lock lock(m_mutex);
+  #endif
+  size_t n = m_hashes.size() * ratio;
+  size_t max = n * (1 + tolerance);
+  if(m_numLoadedRanges > max) {
+    typedef std::vector<std::pair<clock_t, size_t> > LastLoaded;
+    LastLoaded lastLoaded;
+    for(size_t i = 0; i < m_hashes.size(); i++)
+      if(m_hashes[i] != 0)
+        lastLoaded.push_back(std::make_pair(m_clocks[i], i));
+
+    std::sort(lastLoaded.begin(), lastLoaded.end());
+    for(LastLoaded::reverse_iterator it = lastLoaded.rbegin() + size_t(n * (1 - tolerance));
+        it != lastLoaded.rend(); it++)
+      DropRange(it->second);
+  }*/
+}
+
+void BlockHashIndex::CalcHash(size_t current, void* source_void)
+{
+#ifdef HAVE_CMPH
+  cmph_io_adapter_t* source = (cmph_io_adapter_t*) source_void;
+  cmph_config_t *config = cmph_config_new(source);
+  cmph_config_set_algo(config, CMPH_CHD);
+
+  cmph_t* hash = cmph_new(config);
+  PairedPackedArray<> *pv =
+    new PairedPackedArray<>(source->nkeys, m_orderBits, m_fingerPrintBits);
+
+  size_t i = 0;
+
+  source->rewind(source->data);
+
+  std::string lastKey = "";
+  while(i < source->nkeys) {
+    unsigned keylen;
+    char* key;
+    source->read(source->data, &key, &keylen);
+    std::string temp(key, keylen);
+    source->dispose(source->data, key, keylen);
+
+    if(lastKey > temp) {
+      if(source->nkeys != 2 || temp != "###DUMMY_KEY###") {
+        util::StringStream strme;
+        strme << "ERROR: Input file does not appear to be sorted with  LC_ALL=C sort\n";
+        strme << "1: " << lastKey << "\n";
+        strme << "2: " << temp << "\n";
+        UTIL_THROW2(strme.str());
+      }
+    }
+    lastKey = temp;
+
+    size_t fprint = GetFprint(temp.c_str());
+    size_t idx = cmph_search(hash, temp.c_str(),
+                             (cmph_uint32) temp.size());
+
+    pv->Set(idx, i, fprint, m_orderBits, m_fingerPrintBits);
+    i++;
+  }
+
+  cmph_config_destroy(config);
+
+#ifdef WITH_THREADS
+  boost::mutex::scoped_lock lock(m_mutex);
+#endif
+
+  if(m_hashes.size() <= current) {
+    m_hashes.resize(current + 1, 0);
+    m_arrays.resize(current + 1, 0);
+    m_clocks.resize(current + 1, 0);
+  }
+
+  m_hashes[current] = (void*)hash;
+  m_arrays[current] = pv;
+  m_clocks[current] = clock();
+  m_queue.push(-current);
+#endif
+}
+
+#ifdef HAVE_CMPH
+void* BlockHashIndex::vectorAdapter(std::vector<std::string>& v)
+{
+  return (void*)CmphVectorAdapter(v);
+}
+
+void* BlockHashIndex::vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv)
+{
+  return (void*)CmphStringVectorAdapter(sv);
+}
+
+void* BlockHashIndex::vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv)
+{
+  return (void*)CmphStringVectorAdapter(sv);
+}
+#endif
+
+}
--- a/contrib/other-builds/moses2/legacy/CompactPT/BlockHashIndex.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/BlockHashIndex.h
@ -0,0 +1,192 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_BlockHashIndex_h
+#define moses_BlockHashIndex_h
+
+#include <iostream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <queue>
+#include <cstring>
+#include <cstdio>
+
+#include "MurmurHash3.h"
+#include "StringVector.h"
+#include "PackedArray.h"
+#include "util/exception.hh"
+#include "util/string_stream.hh"
+
+#ifdef WITH_THREADS
+#include "../ThreadPool.h"
+#else
+#include <ctime>
+#endif
+
+#include <boost/shared_ptr.hpp>
+
+namespace Moses2
+{
+
+class BlockHashIndex
+{
+private:
+  std::priority_queue<int> m_queue;
+
+  size_t m_orderBits;
+  size_t m_fingerPrintBits;
+
+  std::FILE* m_fileHandle;
+  size_t m_fileHandleStart;
+
+  StringVector<unsigned char, unsigned long> m_landmarks;
+
+  std::vector<void*> m_hashes;
+  std::vector<clock_t> m_clocks;
+  std::vector<PairedPackedArray<>*> m_arrays;
+
+  std::vector<size_t> m_seekIndex;
+
+  size_t m_size;
+  int m_lastSaved;
+  int m_lastDropped;
+  size_t m_numLoadedRanges;
+
+#ifdef WITH_THREADS
+  ThreadPool m_threadPool;
+  boost::mutex m_mutex;
+
+  template <typename Keys>
+  class HashTask : public Task
+  {
+  public:
+    HashTask(int id, BlockHashIndex& hash, Keys& keys)
+      : m_id(id), m_hash(hash), m_keys(new Keys(keys)) {}
+
+    virtual void Run() {
+      m_hash.CalcHash(m_id, *m_keys);
+    }
+
+    virtual ~HashTask() {
+      delete m_keys;
+    }
+
+  private:
+    int m_id;
+    BlockHashIndex& m_hash;
+    Keys* m_keys;
+  };
+#endif
+
+  size_t GetFprint(const char* key) const;
+  size_t GetHash(size_t i, const char* key);
+
+public:
+#ifdef WITH_THREADS
+  BlockHashIndex(size_t orderBits, size_t fingerPrintBits,
+                 size_t threadsNum = 2);
+#else
+  BlockHashIndex(size_t orderBits, size_t fingerPrintBits);
+#endif
+
+  ~BlockHashIndex();
+
+  size_t GetHash(const char* key);
+  size_t GetHash(std::string key);
+
+  size_t operator[](std::string key);
+  size_t operator[](char* key);
+
+  void BeginSave(std::FILE* mphf);
+  void SaveRange(size_t i);
+  void SaveLastRange();
+  size_t FinalizeSave();
+
+#ifdef WITH_THREADS
+  void WaitAll();
+#endif
+
+  void DropRange(size_t i);
+  void DropLastRange();
+
+  size_t LoadIndex(std::FILE* mphf);
+  void LoadRange(size_t i);
+
+  size_t Save(std::string filename);
+  size_t Save(std::FILE * mphf);
+
+  size_t Load(std::string filename);
+  size_t Load(std::FILE * mphf);
+
+  size_t GetSize() const;
+
+  void KeepNLastRanges(float ratio = 0.1, float tolerance = 0.1);
+
+  template <typename Keys>
+  void AddRange(Keys &keys) {
+    size_t current = m_landmarks.size();
+
+    if(m_landmarks.size() && m_landmarks.back().str() >= keys[0]) {
+      util::StringStream strme;
+      strme << "ERROR: Input file does not appear to be sorted with  LC_ALL=C sort\n";
+      strme << "1: " << m_landmarks.back().str() << "\n";
+      strme << "2: " << keys[0] << "\n";
+      UTIL_THROW2(strme.str());
+    }
+
+    m_landmarks.push_back(keys[0]);
+    m_size += keys.size();
+
+    if(keys.size() == 1) {
+      // add dummy key to avoid null hash
+      keys.push_back("###DUMMY_KEY###");
+    }
+
+#ifdef WITH_THREADS
+
+    boost::shared_ptr<HashTask<Keys> >
+    ht(new HashTask<Keys>(current, *this, keys));
+    m_threadPool.Submit(ht);
+#else
+    CalcHash(current, keys);
+#endif
+  }
+
+  template <typename Keys>
+  void CalcHash(size_t current, Keys &keys) {
+#ifdef HAVE_CMPH
+    void* source = vectorAdapter(keys);
+    CalcHash(current, source);
+#endif
+  }
+
+  void CalcHash(size_t current, void* source);
+
+#ifdef HAVE_CMPH
+  void* vectorAdapter(std::vector<std::string>& v);
+  void* vectorAdapter(StringVector<unsigned, size_t, std::allocator>& sv);
+  void* vectorAdapter(StringVector<unsigned, size_t, MmapAllocator>& sv);
+#endif
+};
+
+}
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/CanonicalHuffman.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/CanonicalHuffman.h
@ -0,0 +1,325 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_CanonicalHuffman_h
+#define moses_CanonicalHuffman_h
+
+#include <string>
+#include <algorithm>
+#include <boost/dynamic_bitset.hpp>
+#include <boost/unordered_map.hpp>
+
+#include "ThrowingFwrite.h"
+
+namespace Moses2
+{
+
+template <typename Data>
+class CanonicalHuffman
+{
+private:
+  std::vector<Data> m_symbols;
+  std::vector<size_t> m_firstCodes;
+  std::vector<size_t> m_lengthIndex;
+
+  typedef boost::unordered_map<Data, boost::dynamic_bitset<> > EncodeMap;
+  EncodeMap m_encodeMap;
+
+  struct MinHeapSorter {
+    std::vector<size_t>& m_vec;
+
+    MinHeapSorter(std::vector<size_t>& vec) : m_vec(vec) { }
+
+    bool operator()(size_t a, size_t b) {
+      return m_vec[a] > m_vec[b];
+    }
+  };
+
+  template <class Iterator>
+  void CalcLengths(Iterator begin, Iterator end, std::vector<size_t>& lengths) {
+    size_t n = std::distance(begin, end);
+    std::vector<size_t> A(2 * n, 0);
+
+    m_symbols.resize(n);
+    size_t i = 0;
+    for(Iterator it = begin; it != end; it++) {
+      m_symbols[i] = it->first;
+
+      A[i] = n + i;
+      A[n + i] = it->second;
+      i++;
+    }
+
+    if(n == 1) {
+      lengths.push_back(1);
+      return;
+    }
+
+    MinHeapSorter hs(A);
+    std::make_heap(A.begin(), A.begin() + n, hs);
+
+    size_t h = n;
+    size_t m1, m2;
+    while(h > 1) {
+      m1 = A[0];
+      std::pop_heap(A.begin(), A.begin() + h, hs);
+
+      h--;
+
+      m2 = A[0];
+      std::pop_heap(A.begin(), A.begin() + h, hs);
+
+      A[h] = A[m1] + A[m2];
+      A[h-1] = h;
+      A[m1] = A[m2] = h;
+
+      std::push_heap(A.begin(), A.begin() + h, hs);
+    }
+
+    A[1] = 0;
+    for(size_t i = 2; i < 2*n; i++)
+      A[i] = A[A[i]] + 1;
+
+    lengths.resize(n);
+    for(size_t i = 0; i < n; i++)
+      lengths[i] = A[i + n];
+  }
+
+  void CalcCodes(std::vector<size_t>& lengths) {
+    std::vector<size_t> numLength;
+    for(std::vector<size_t>::iterator it = lengths.begin();
+        it != lengths.end(); it++) {
+      size_t length = *it;
+      if(numLength.size() <= length)
+        numLength.resize(length + 1, 0);
+      numLength[length]++;
+    }
+
+    m_lengthIndex.resize(numLength.size());
+    m_lengthIndex[0] = 0;
+    for(size_t l = 1; l < numLength.size(); l++)
+      m_lengthIndex[l] = m_lengthIndex[l - 1] + numLength[l - 1];
+
+    size_t maxLength = numLength.size() - 1;
+
+    m_firstCodes.resize(maxLength + 1, 0);
+    for(size_t l = maxLength - 1; l > 0; l--)
+      m_firstCodes[l] = (m_firstCodes[l + 1] + numLength[l + 1]) / 2;
+
+    std::vector<Data> t_symbols;
+    t_symbols.resize(lengths.size());
+
+    std::vector<size_t> nextCode = m_firstCodes;
+    for(size_t i = 0; i < lengths.size(); i++) {
+      Data data = m_symbols[i];
+      size_t length = lengths[i];
+
+      size_t pos = m_lengthIndex[length]
+                   + (nextCode[length] - m_firstCodes[length]);
+      t_symbols[pos] = data;
+
+      nextCode[length] = nextCode[length] + 1;
+    }
+
+    m_symbols.swap(t_symbols);
+  }
+
+  void CreateCodeMap() {
+    for(size_t l = 1; l < m_lengthIndex.size(); l++) {
+      size_t intCode = m_firstCodes[l];
+      size_t num = ((l+1 < m_lengthIndex.size()) ? m_lengthIndex[l+1]
+                    : m_symbols.size()) - m_lengthIndex[l];
+
+      for(size_t i = 0; i < num; i++) {
+        Data data = m_symbols[m_lengthIndex[l] + i];
+        boost::dynamic_bitset<> bitCode(l, intCode);
+        m_encodeMap[data] = bitCode;
+        intCode++;
+      }
+    }
+  }
+
+  const boost::dynamic_bitset<>& Encode(Data data) const {
+    typename EncodeMap::const_iterator it = m_encodeMap.find(data);
+    UTIL_THROW_IF2(it == m_encodeMap.end(), "Cannot find symbol in encoding map");
+    return it->second;
+  }
+
+  template <class BitWrapper>
+  void PutCode(BitWrapper& bitWrapper, const boost::dynamic_bitset<>& code) {
+    for(int j = code.size()-1; j >= 0; j--)
+      bitWrapper.Put(code[j]);
+  }
+
+public:
+
+  template <class Iterator>
+  CanonicalHuffman(Iterator begin, Iterator end, bool forEncoding = true) {
+    std::vector<size_t> lengths;
+    CalcLengths(begin, end, lengths);
+    CalcCodes(lengths);
+
+    if(forEncoding)
+      CreateCodeMap();
+  }
+
+  CanonicalHuffman(std::FILE* pFile, bool forEncoding = false) {
+    Load(pFile);
+
+    if(forEncoding)
+      CreateCodeMap();
+  }
+
+  template <class BitWrapper>
+  void Put(BitWrapper& bitWrapper, Data data) {
+    PutCode(bitWrapper, Encode(data));
+  }
+
+  template <class BitWrapper>
+  Data Read(BitWrapper& bitWrapper) {
+    if(bitWrapper.TellFromEnd()) {
+      size_t intCode = bitWrapper.Read();
+      size_t len = 1;
+      while(intCode < m_firstCodes[len]) {
+        intCode = 2 * intCode + bitWrapper.Read();
+        len++;
+      }
+      return m_symbols[m_lengthIndex[len] + (intCode - m_firstCodes[len])];
+    }
+    return Data();
+  }
+
+  size_t Load(std::FILE* pFile) {
+    size_t start = std::ftell(pFile);
+    size_t read = 0;
+
+    size_t size;
+    read += std::fread(&size, sizeof(size_t), 1, pFile);
+    m_symbols.resize(size);
+    read += std::fread(&m_symbols[0], sizeof(Data), size, pFile);
+
+    read += std::fread(&size, sizeof(size_t), 1, pFile);
+    m_firstCodes.resize(size);
+    read += std::fread(&m_firstCodes[0], sizeof(size_t), size, pFile);
+
+    read += std::fread(&size, sizeof(size_t), 1, pFile);
+    m_lengthIndex.resize(size);
+    read += std::fread(&m_lengthIndex[0], sizeof(size_t), size, pFile);
+
+    return std::ftell(pFile) - start;
+  }
+
+  size_t Save(std::FILE* pFile) {
+    size_t start = std::ftell(pFile);
+
+    size_t size = m_symbols.size();
+    ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
+    ThrowingFwrite(&m_symbols[0], sizeof(Data), size, pFile);
+
+    size = m_firstCodes.size();
+    ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
+    ThrowingFwrite(&m_firstCodes[0], sizeof(size_t), size, pFile);
+
+    size = m_lengthIndex.size();
+    ThrowingFwrite(&size, sizeof(size_t), 1, pFile);
+    ThrowingFwrite(&m_lengthIndex[0], sizeof(size_t), size, pFile);
+
+    return std::ftell(pFile) - start;
+  }
+};
+
+template <class Container = std::string>
+class BitWrapper
+{
+private:
+  Container& m_data;
+
+  typename Container::iterator m_iterator;
+  typename Container::value_type m_currentValue;
+
+  size_t m_valueBits;
+  typename Container::value_type m_mask;
+  size_t m_bitPos;
+
+public:
+
+  BitWrapper(Container &data)
+    : m_data(data), m_iterator(m_data.begin()), m_currentValue(0),
+      m_valueBits(sizeof(typename Container::value_type) * 8),
+      m_mask(1), m_bitPos(0) { }
+
+  bool Read() {
+    if(m_bitPos % m_valueBits == 0) {
+      if(m_iterator != m_data.end())
+        m_currentValue = *m_iterator++;
+    } else
+      m_currentValue = m_currentValue >> 1;
+
+    m_bitPos++;
+    return (m_currentValue & m_mask);
+  }
+
+  void Put(bool bit) {
+    if(m_bitPos % m_valueBits == 0)
+      m_data.push_back(0);
+
+    if(bit)
+      m_data[m_data.size()-1] |= m_mask << (m_bitPos % m_valueBits);
+
+    m_bitPos++;
+  }
+
+  size_t Tell() {
+    return m_bitPos;
+  }
+
+  size_t TellFromEnd() {
+    if(m_data.size() * m_valueBits < m_bitPos)
+      return 0;
+    return m_data.size() * m_valueBits - m_bitPos;
+  }
+
+  void Seek(size_t bitPos) {
+    m_bitPos = bitPos;
+    m_iterator = m_data.begin() + int((m_bitPos-1)/m_valueBits);
+    m_currentValue = (*m_iterator) >> ((m_bitPos-1) % m_valueBits);
+    m_iterator++;
+  }
+
+  void SeekFromEnd(size_t bitPosFromEnd) {
+    size_t bitPos = m_data.size() * m_valueBits - bitPosFromEnd;
+    Seek(bitPos);
+  }
+
+  void Reset() {
+    m_iterator = m_data.begin();
+    m_currentValue = 0;
+    m_bitPos = 0;
+  }
+
+  Container& GetContainer() {
+    return m_data;
+  }
+};
+
+}
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/CmphStringVectorAdapter.cpp
+++ b/contrib/other-builds/moses2/legacy/CompactPT/CmphStringVectorAdapter.cpp
@ -0,0 +1,94 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifdef HAVE_CMPH
+
+#include "CmphStringVectorAdapter.h"
+
+namespace Moses2
+{
+
+void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
+{
+  delete[] key;
+}
+
+void CmphStringVectorAdapterRewind(void *data)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  cmph_vector->position = 0;
+}
+
+//************************************************************************//
+
+cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v)
+{
+  cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+  cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
+  assert(key_source);
+  assert(cmph_vector);
+
+  cmph_vector->vector = (void *)&v;
+  cmph_vector->position = 0;
+  key_source->data = (void *)cmph_vector;
+  key_source->nkeys = v.size();
+
+  return key_source;
+}
+
+int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  std::vector<std::string>* v = (std::vector<std::string>*)cmph_vector->vector;
+  size_t size;
+  *keylen = (*v)[cmph_vector->position].size();
+  size = *keylen;
+  *key = new char[size + 1];
+  std::string temp = (*v)[cmph_vector->position];
+  strcpy(*key, temp.c_str());
+  cmph_vector->position = cmph_vector->position + 1;
+  return (int)(*keylen);
+}
+
+void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen)
+{
+  delete[] key;
+}
+
+void CmphVectorAdapterRewind(void *data)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  cmph_vector->position = 0;
+}
+
+cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v)
+{
+  cmph_io_adapter_t * key_source = CmphVectorAdapterNew(v);
+
+  key_source->read = CmphVectorAdapterRead;
+  key_source->dispose = CmphVectorAdapterDispose;
+  key_source->rewind = CmphVectorAdapterRewind;
+  return key_source;
+}
+
+}
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/CmphStringVectorAdapter.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/CmphStringVectorAdapter.h
@ -0,0 +1,105 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_CmphStringVectorAdapterNew_h
+#define moses_CmphStringVectorAdapterNew_h
+
+#include <cassert>
+#include <cstring>
+
+#ifdef HAVE_CMPH
+#include "cmph.h"
+
+#include "StringVector.h"
+
+namespace Moses2
+{
+
+typedef struct {
+  void *vector;
+  cmph_uint32 position;
+}
+cmph_vector_t;
+
+
+template <typename ValueT, typename PosT, template <typename> class Allocator>
+cmph_io_adapter_t *CmphStringVectorAdapterNew(StringVector<ValueT, PosT, Allocator>& sv)
+{
+  cmph_io_adapter_t * key_source = (cmph_io_adapter_t *)malloc(sizeof(cmph_io_adapter_t));
+  cmph_vector_t * cmph_vector = (cmph_vector_t *)malloc(sizeof(cmph_vector_t));
+  assert(key_source);
+  assert(cmph_vector);
+
+  cmph_vector->vector = (void *)&sv;
+  cmph_vector->position = 0;
+  key_source->data = (void *)cmph_vector;
+  key_source->nkeys = sv.size();
+
+  return key_source;
+}
+
+template <typename ValueT, typename PosT, template <typename> class Allocator>
+int CmphStringVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen)
+{
+  cmph_vector_t *cmph_vector = (cmph_vector_t *)data;
+  StringVector<ValueT, PosT, Allocator>* sv = (StringVector<ValueT, PosT, Allocator>*)cmph_vector->vector;
+  size_t size;
+  *keylen = (*sv)[cmph_vector->position].size();
+  size = *keylen;
+  *key = new char[size + 1];
+  std::string temp = (*sv)[cmph_vector->position];
+  std::strcpy(*key, temp.c_str());
+  cmph_vector->position = cmph_vector->position + 1;
+  return (int)(*keylen);
+}
+
+void CmphStringVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
+
+void CmphStringVectorAdapterRewind(void *data);
+
+template <typename ValueT, typename PosT, template <typename> class Allocator>
+cmph_io_adapter_t* CmphStringVectorAdapter(StringVector<ValueT, PosT, Allocator>& sv)
+{
+  cmph_io_adapter_t * key_source = CmphStringVectorAdapterNew(sv);
+
+  key_source->read = CmphStringVectorAdapterRead<ValueT, PosT, Allocator>;
+  key_source->dispose = CmphStringVectorAdapterDispose;
+  key_source->rewind = CmphStringVectorAdapterRewind;
+  return key_source;
+}
+
+//************************************************************************//
+
+cmph_io_adapter_t *CmphVectorAdapterNew(std::vector<std::string>& v);
+
+int CmphVectorAdapterRead(void *data, char **key, cmph_uint32 *keylen);
+
+void CmphVectorAdapterDispose(void *data, char *key, cmph_uint32 keylen);
+
+void CmphVectorAdapterRewind(void *data);
+
+cmph_io_adapter_t* CmphVectorAdapter(std::vector<std::string>& v);
+
+}
+
+#endif
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/LexicalReorderingTableCompact.cpp
+++ b/contrib/other-builds/moses2/legacy/CompactPT/LexicalReorderingTableCompact.cpp
@ -0,0 +1,195 @@
+// -*- c++ -*-
+// vim:tabstop=2
+// $Id$
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "LexicalReorderingTableCompact.h"
+#include "../../Phrase.h"
+#include "../Util2.h"
+
+namespace Moses2
+{
+
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+
+bool LexicalReorderingTableCompact::s_inMemoryByDefault = false;
+
+LexicalReorderingTableCompact::
+LexicalReorderingTableCompact(const std::string& filePath,
+                              const std::vector<FactorType>& f_factors,
+                              const std::vector<FactorType>& e_factors,
+                              const std::vector<FactorType>& c_factors)
+  : LexicalReorderingTable(f_factors, e_factors, c_factors)
+  , m_inMemory(s_inMemoryByDefault)
+  , m_numScoreComponent(6)
+  , m_multipleScoreTrees(true)
+  , m_hash(10, 16)
+  , m_scoreTrees(1)
+{
+  Load(filePath);
+}
+
+LexicalReorderingTableCompact::
+LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
+                              const std::vector<FactorType>& e_factors,
+                              const std::vector<FactorType>& c_factors)
+  : LexicalReorderingTable(f_factors, e_factors, c_factors)
+  , m_inMemory(s_inMemoryByDefault)
+  , m_numScoreComponent(6)
+  , m_multipleScoreTrees(true)
+  , m_hash(10, 16)
+  , m_scoreTrees(1)
+{ }
+
+LexicalReorderingTableCompact::
+~LexicalReorderingTableCompact()
+{
+  for(size_t i = 0; i < m_scoreTrees.size(); i++)
+    delete m_scoreTrees[i];
+}
+
+std::vector<float>
+LexicalReorderingTableCompact::
+GetScore(const Phrase& f, const Phrase& e, const Phrase& c)
+{
+  std::string key;
+  Scores scores;
+
+  if(0 == c.GetSize())
+    key = MakeKey(f, e, c);
+  else
+    for(size_t i = 0; i <= c.GetSize(); ++i) {
+      // TODO
+      //Phrase sub_c(c.GetSubString(Range(i,c.GetSize()-1)));
+      //key = MakeKey(f,e,sub_c);
+    }
+
+  size_t index = m_hash[key];
+  if(m_hash.GetSize() != index) {
+    std::string scoresString;
+    if(m_inMemory)
+      scoresString = m_scoresMemory[index].str();
+    else
+      scoresString = m_scoresMapped[index].str();
+
+    BitWrapper<> bitStream(scoresString);
+    for(size_t i = 0; i < m_numScoreComponent; i++)
+      scores.push_back(m_scoreTrees[m_multipleScoreTrees ? i : 0]->Read(bitStream));
+
+    return scores;
+  }
+
+  return Scores();
+}
+
+std::string
+LexicalReorderingTableCompact::
+MakeKey(const Phrase& f,
+        const Phrase& e,
+        const Phrase& c) const
+{
+  return MakeKey(Trim(f.GetString(m_FactorsF)),
+                 Trim(e.GetString(m_FactorsE)),
+                 Trim(c.GetString(m_FactorsC)));
+}
+
+std::string
+LexicalReorderingTableCompact::
+MakeKey(const std::string& f,
+        const std::string& e,
+        const std::string& c) const
+{
+  std::string key;
+  if(!f.empty()) key += f;
+  if(!m_FactorsE.empty()) {
+    if(!key.empty()) key += " ||| ";
+    key += e;
+  }
+  if(!m_FactorsC.empty()) {
+    if(!key.empty()) key += " ||| ";
+    key += c;
+  }
+  key += " ||| ";
+  return key;
+}
+
+LexicalReorderingTable*
+LexicalReorderingTableCompact::
+CheckAndLoad
+(const std::string& filePath,
+ const std::vector<FactorType>& f_factors,
+ const std::vector<FactorType>& e_factors,
+ const std::vector<FactorType>& c_factors)
+{
+#ifdef HAVE_CMPH
+  std::string minlexr = ".minlexr";
+  // file name is specified without suffix
+  if(FileExists(filePath + minlexr)) {
+    //there exists a compact binary version use that
+    std::cerr << "Using compact lexical reordering table" << std::endl;
+    return new LexicalReorderingTableCompact(filePath + minlexr, f_factors, e_factors, c_factors);
+  }
+  // file name is specified with suffix
+  if(filePath.substr(filePath.length() - minlexr.length(), minlexr.length()) == minlexr
+      && FileExists(filePath)) {
+    //there exists a compact binary version use that
+	std::cerr << "Using compact lexical reordering table" << std::endl;
+    return new LexicalReorderingTableCompact(filePath, f_factors, e_factors, c_factors);
+  }
+#endif
+  return 0;
+}
+
+void
+LexicalReorderingTableCompact::
+Load(std::string filePath)
+{
+  std::FILE* pFile = std::fopen(filePath.c_str(), "r");
+  UTIL_THROW_IF2(pFile == NULL, "File " << filePath << " could not be opened");
+
+  //if(m_inMemory)
+  m_hash.Load(pFile);
+  //else
+  //m_hash.LoadIndex(pFile);
+
+  size_t read = 0;
+  read += std::fread(&m_numScoreComponent, sizeof(m_numScoreComponent), 1, pFile);
+  read += std::fread(&m_multipleScoreTrees,
+                     sizeof(m_multipleScoreTrees), 1, pFile);
+
+  if(m_multipleScoreTrees) {
+    m_scoreTrees.resize(m_numScoreComponent);
+    for(size_t i = 0; i < m_numScoreComponent; i++)
+      m_scoreTrees[i] = new CanonicalHuffman<float>(pFile);
+  } else {
+    m_scoreTrees.resize(1);
+    m_scoreTrees[0] = new CanonicalHuffman<float>(pFile);
+  }
+
+  if(m_inMemory)
+    m_scoresMemory.load(pFile, false);
+  else
+    m_scoresMapped.load(pFile, true);
+}
+
+
+}
--- a/contrib/other-builds/moses2/legacy/CompactPT/LexicalReorderingTableCompact.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/LexicalReorderingTableCompact.h
@ -0,0 +1,146 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_LexicalReorderingTableCompact_h
+#define moses_LexicalReorderingTableCompact_h
+
+#include "BlockHashIndex.h"
+#include "CanonicalHuffman.h"
+#include "StringVector.h"
+#include "../../TypeDef.h"
+//#include "../../Phrase.h"
+
+namespace Moses2
+{
+class Phrase;
+
+typedef std::vector<float> Scores;
+
+//! additional types
+class LexicalReorderingTable
+{
+public:
+  LexicalReorderingTable(const FactorList& f_factors,
+                         const FactorList& e_factors,
+                         const FactorList& c_factors)
+    : m_FactorsF(f_factors)
+    , m_FactorsE(e_factors)
+    , m_FactorsC(c_factors) { }
+
+  virtual
+  ~LexicalReorderingTable() { }
+
+public:
+
+  virtual
+  Scores
+  GetScore(const Phrase& f, const Phrase& e, const Phrase& c) = 0;
+
+  virtual
+  void
+  InitializeForInput() {
+    /* override for on-demand loading */
+  };
+
+  virtual
+  void
+  InitializeForInputPhrase(const Phrase&) { }
+
+
+  const FactorList& GetFFactorMask() const {
+    return m_FactorsF;
+  }
+  const FactorList& GetEFactorMask() const {
+    return m_FactorsE;
+  }
+  const FactorList& GetCFactorMask() const {
+    return m_FactorsC;
+  }
+
+  virtual
+  void
+  DbgDump(std::ostream* out) const {
+    *out << "Overwrite in subclass...\n";
+  };
+  // why is this not a pure virtual function? - UG
+
+protected:
+  FactorList m_FactorsF;
+  FactorList m_FactorsE;
+  FactorList m_FactorsC;
+};
+
+
+//////////////////////////////////////////////////////////////////////////////////////////////
+class LexicalReorderingTableCompact:
+  public LexicalReorderingTable
+{
+private:
+  static bool s_inMemoryByDefault;
+  bool m_inMemory;
+
+  size_t m_numScoreComponent;
+  bool m_multipleScoreTrees;
+
+  BlockHashIndex m_hash;
+
+  typedef CanonicalHuffman<float> ScoreTree;
+  std::vector<ScoreTree*> m_scoreTrees;
+
+  StringVector<unsigned char, unsigned long, MmapAllocator>  m_scoresMapped;
+  StringVector<unsigned char, unsigned long, std::allocator> m_scoresMemory;
+
+  std::string MakeKey(const Phrase& f, const Phrase& e, const Phrase& c) const;
+  std::string MakeKey(const std::string& f, const std::string& e, const std::string& c) const;
+
+public:
+  LexicalReorderingTableCompact(const std::string& filePath,
+                                const std::vector<FactorType>& f_factors,
+                                const std::vector<FactorType>& e_factors,
+                                const std::vector<FactorType>& c_factors);
+
+  LexicalReorderingTableCompact(const std::vector<FactorType>& f_factors,
+                                const std::vector<FactorType>& e_factors,
+                                const std::vector<FactorType>& c_factors);
+
+  virtual
+  ~LexicalReorderingTableCompact();
+
+  virtual
+  std::vector<float>
+  GetScore(const Phrase& f, const Phrase& e, const Phrase& c);
+
+  static
+  LexicalReorderingTable*
+  CheckAndLoad(const std::string& filePath,
+               const std::vector<FactorType>& f_factors,
+               const std::vector<FactorType>& e_factors,
+               const std::vector<FactorType>& c_factors);
+
+  void
+  Load(std::string filePath);
+
+
+};
+
+}
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/ListCoders.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/ListCoders.h
@ -0,0 +1,387 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_ListCoders_h
+#define moses_ListCoders_h
+
+#include <cmath>
+#include <cassert>
+
+namespace Moses2
+{
+
+template <typename T = unsigned int>
+class VarIntType
+{
+private:
+  template <typename IntType, typename OutIt>
+  static void EncodeSymbol(IntType input, OutIt output) {
+    if(input == 0) {
+      *output = 0;
+      output++;
+      return;
+    }
+
+    T msb = 1 << (sizeof(T)*8-1);
+    IntType mask  = ~msb;
+    IntType shift = (sizeof(T)*8-1);
+
+    while(input) {
+      T res = input & mask;
+      input >>= shift;
+      if(input)
+        res |= msb;
+      *output = res;
+      output++;
+    }
+  };
+
+  template <typename InIt, typename IntType>
+  static void DecodeSymbol(InIt &it, InIt end, IntType &output) {
+    T msb = 1 << (sizeof(T)*8-1);
+    IntType shift = (sizeof(T)*8-1);
+
+    output = 0;
+    size_t i = 0;
+    while(it != end && *it & msb) {
+      IntType temp = *it & ~msb;
+      temp <<= shift*i;
+      output |= temp;
+      it++;
+      i++;
+    }
+    assert(it != end);
+
+    IntType temp = *it;
+    temp <<= shift*i;
+    output |= temp;
+    it++;
+  }
+
+public:
+
+  template <typename InIt, typename OutIt>
+  static void Encode(InIt it, InIt end, OutIt outIt) {
+    while(it != end) {
+      EncodeSymbol(*it, outIt);
+      it++;
+    }
+  }
+
+  template <typename InIt, typename OutIt>
+  static void Decode(InIt &it, InIt end, OutIt outIt) {
+    while(it != end) {
+      size_t output;
+      DecodeSymbol(it, end, output);
+      *outIt = output;
+      outIt++;
+    }
+  }
+
+  template <typename InIt>
+  static size_t DecodeAndSum(InIt &it, InIt end, size_t num) {
+    size_t sum = 0;
+    size_t curr = 0;
+
+    while(it != end && curr < num) {
+      size_t output;
+      DecodeSymbol(it, end, output);
+      sum += output;
+      curr++;
+    }
+
+    return sum;
+  }
+
+};
+
+typedef VarIntType<unsigned char> VarByte;
+
+typedef VarByte VarInt8;
+typedef VarIntType<unsigned short> VarInt16;
+typedef VarIntType<unsigned int>   VarInt32;
+
+class Simple9
+{
+private:
+  typedef unsigned int uint;
+
+  template <typename InIt>
+  inline static void EncodeSymbol(uint &output, InIt it, InIt end) {
+    uint length = end - it;
+
+    uint type = 0;
+    uint bitlength = 0;
+
+    switch(length) {
+    case 1:
+      type = 1;
+      bitlength = 28;
+      break;
+    case 2:
+      type = 2;
+      bitlength = 14;
+      break;
+    case 3:
+      type = 3;
+      bitlength = 9;
+      break;
+    case 4:
+      type = 4;
+      bitlength = 7;
+      break;
+    case 5:
+      type = 5;
+      bitlength = 5;
+      break;
+    case 7:
+      type = 6;
+      bitlength = 4;
+      break;
+    case 9:
+      type = 7;
+      bitlength = 3;
+      break;
+    case 14:
+      type = 8;
+      bitlength = 2;
+      break;
+    case 28:
+      type = 9;
+      bitlength = 1;
+      break;
+    }
+
+    output = 0;
+    output |= (type << 28);
+
+    uint i = 0;
+    while(it != end) {
+      UTIL_THROW_IF2(*it > 268435455, "You are trying to encode " << *it
+                     << " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)");
+
+      uint l = bitlength * (length-i-1);
+      output |= *it << l;
+      it++;
+      i++;
+    }
+  }
+
+  template <typename OutIt>
+  static inline void DecodeSymbol(uint input, OutIt outIt) {
+    uint type = (input >> 28);
+
+    uint bitlen = 0;
+    uint shift = 0;
+    uint mask = 0;
+
+    switch(type) {
+    case 1:
+      bitlen = 28;
+      shift = 0;
+      mask = 268435455;
+      break;
+    case 2:
+      bitlen = 14;
+      shift = 14;
+      mask = 16383;
+      break;
+    case 3:
+      bitlen = 9;
+      shift = 18;
+      mask = 511;
+      break;
+    case 4:
+      bitlen = 7;
+      shift = 21;
+      mask = 127;
+      break;
+    case 5:
+      bitlen = 5;
+      shift = 20;
+      mask = 31;
+      break;
+    case 6:
+      bitlen = 4;
+      shift = 24;
+      mask = 15;
+      break;
+    case 7:
+      bitlen = 3;
+      shift = 24;
+      mask = 7;
+      break;
+    case 8:
+      bitlen = 2;
+      shift = 26;
+      mask = 3;
+      break;
+    case 9:
+      bitlen = 1;
+      shift = 27;
+      mask = 1;
+      break;
+    }
+
+    while(shift > 0) {
+      *outIt = (input >> shift) & mask;
+      shift -= bitlen;
+      outIt++;
+    }
+    *outIt = input & mask;
+    outIt++;
+  }
+
+  static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr) {
+    uint type = (input >> 28);
+
+    uint bitlen = 0;
+    uint shift = 0;
+    uint mask = 0;
+
+    switch(type) {
+    case 1:
+      bitlen = 28;
+      shift = 0;
+      mask = 268435455;
+      break;
+    case 2:
+      bitlen = 14;
+      shift = 14;
+      mask = 16383;
+      break;
+    case 3:
+      bitlen = 9;
+      shift = 18;
+      mask = 511;
+      break;
+    case 4:
+      bitlen = 7;
+      shift = 21;
+      mask = 127;
+      break;
+    case 5:
+      bitlen = 5;
+      shift = 20;
+      mask = 31;
+      break;
+    case 6:
+      bitlen = 4;
+      shift = 24;
+      mask = 15;
+      break;
+    case 7:
+      bitlen = 3;
+      shift = 24;
+      mask = 7;
+      break;
+    case 8:
+      bitlen = 2;
+      shift = 26;
+      mask = 3;
+      break;
+    case 9:
+      bitlen = 1;
+      shift = 27;
+      mask = 1;
+      break;
+    }
+
+    size_t sum = 0;
+    while(shift > 0) {
+      sum += (input >> shift) & mask;
+      shift -= bitlen;
+      if(++curr == num)
+        return sum;
+    }
+    sum += input & mask;
+    curr++;
+    return sum;
+  }
+
+public:
+  template <typename InIt, typename OutIt>
+  static void Encode(InIt it, InIt end, OutIt outIt) {
+    uint parts[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 };
+
+    uint buffer[28];
+    for(InIt i = it; i < end; i++) {
+      uint lastbit = 1;
+      uint lastpos = 0;
+      uint lastyes = 0;
+      uint j = 0;
+
+      double log2 = log(2);
+      while(j < 9 && lastpos < 28 && (i+lastpos) < end) {
+        if(lastpos >= parts[j])
+          j++;
+
+        buffer[lastpos] = *(i + lastpos);
+
+        uint reqbit = ceil(log(buffer[lastpos]+1)/log2);
+        assert(reqbit <= 28);
+
+        uint bit = 28/floor(28/reqbit);
+        if(lastbit < bit)
+          lastbit = bit;
+
+        if(parts[j] > 28/lastbit)
+          break;
+        else if(lastpos == parts[j]-1)
+          lastyes = lastpos;
+
+        lastpos++;
+      }
+      i += lastyes;
+
+      uint length = lastyes + 1;
+      uint output;
+      EncodeSymbol(output, buffer, buffer + length);
+
+      *outIt = output;
+      outIt++;
+    }
+  }
+
+  template <typename InIt, typename OutIt>
+  static void Decode(InIt &it, InIt end, OutIt outIt) {
+    while(it != end) {
+      DecodeSymbol(*it, outIt);
+      it++;
+    }
+  }
+
+  template <typename InIt>
+  static size_t DecodeAndSum(InIt &it, InIt end, size_t num) {
+    size_t sum = 0;
+    size_t curr = 0;
+    while(it != end && curr < num) {
+      sum += DecodeAndSumSymbol(*it, num, curr);
+      it++;
+    }
+    assert(curr == num);
+    return sum;
+  }
+};
+
+}
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/MmapAllocator.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/MmapAllocator.h
@ -0,0 +1,202 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_MmapAllocator_h
+#define moses_MmapAllocator_h
+
+#include <limits>
+#include <iostream>
+#include <cstdio>
+#include <unistd.h>
+
+#if defined(_WIN32) || defined(_WIN64)
+#include <windows.h>
+#include <io.h>
+#else
+#include <sys/mman.h>
+#endif
+
+#include "util/mmap.hh"
+
+namespace Moses2
+{
+template <class T>
+class MmapAllocator
+{
+protected:
+  std::FILE* m_file_ptr;
+  size_t m_file_desc;
+
+  size_t m_page_size;
+  size_t m_map_size;
+
+  char* m_data_ptr;
+  size_t m_data_offset;
+  bool m_fixed;
+  size_t* m_count;
+
+public:
+  typedef T        value_type;
+  typedef T*       pointer;
+  typedef const T* const_pointer;
+  typedef T&       reference;
+  typedef const T& const_reference;
+  typedef std::size_t    size_type;
+  typedef std::ptrdiff_t difference_type;
+
+  MmapAllocator() throw()
+    : m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)),
+      m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
+      m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
+  }
+
+  MmapAllocator(std::FILE* f_ptr) throw()
+    : m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
+      m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
+      m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
+  }
+
+  MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw()
+    : m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
+      m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
+      m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0)) {
+  }
+
+  MmapAllocator(std::string fileName) throw()
+    : m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(fileno(m_file_ptr)),
+      m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
+      m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
+  }
+
+  MmapAllocator(const MmapAllocator& c) throw()
+    : m_file_ptr(c.m_file_ptr), m_file_desc(c.m_file_desc),
+      m_page_size(c.m_page_size), m_map_size(c.m_map_size),
+      m_data_ptr(c.m_data_ptr), m_data_offset(c.m_data_offset),
+      m_fixed(c.m_fixed), m_count(c.m_count) {
+    (*m_count)++;
+  }
+
+  ~MmapAllocator() throw() {
+    if(m_data_ptr && *m_count == 0) {
+      util::UnmapOrThrow(m_data_ptr, m_map_size);
+      if(!m_fixed && std::ftell(m_file_ptr) != -1)
+        std::fclose(m_file_ptr);
+    }
+    (*m_count)--;
+  }
+
+  template <class U>
+  struct rebind {
+    typedef MmapAllocator<U> other;
+  };
+
+  pointer address (reference value) const {
+    return &value;
+  }
+
+  const_pointer address (const_reference value) const {
+    return &value;
+  }
+
+  size_type max_size () const throw() {
+    return std::numeric_limits<size_t>::max() / sizeof(value_type);
+  }
+
+  pointer allocate (size_type num, const void* = 0) {
+    m_map_size = num * sizeof(T);
+
+#if defined(_WIN32) || defined(_WIN64)
+    // On Windows, MAP_SHARED is not defined and MapOrThrow ignores the flags.
+    const int map_shared = 0;
+#else
+    const int map_shared = MAP_SHARED;
+#endif
+    if(!m_fixed) {
+      size_t read = 0;
+      read += ftruncate(m_file_desc, m_map_size);
+      m_data_ptr = (char *)util::MapOrThrow(
+                     m_map_size, true, map_shared, false, m_file_desc, 0);
+      return (pointer)m_data_ptr;
+    } else {
+      const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
+      const size_t relative_offset = m_data_offset - map_offset;
+      const size_t adjusted_map_size = m_map_size + relative_offset;
+
+      m_data_ptr = (char *)util::MapOrThrow(
+                     adjusted_map_size, false, map_shared, false, m_file_desc, map_offset);
+
+      return (pointer)(m_data_ptr + relative_offset);
+    }
+  }
+
+  void deallocate (pointer p, size_type num) {
+    if(!m_fixed) {
+      util::UnmapOrThrow(p, num * sizeof(T));
+    } else {
+      const size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
+      const size_t relative_offset = m_data_offset - map_offset;
+      const size_t adjusted_map_size = m_map_size + relative_offset;
+
+      util::UnmapOrThrow((pointer)((char*)p - relative_offset), adjusted_map_size);
+    }
+  }
+
+  void construct (pointer p, const T& value) {
+    if(!m_fixed)
+      new(p) value_type(value);
+  }
+  void destroy (pointer p) {
+    if(!m_fixed)
+      p->~T();
+  }
+
+  template <class T1, class T2>
+  friend bool operator== (const MmapAllocator<T1>&, const MmapAllocator<T2>&) throw();
+
+  template <class T1, class T2>
+  friend bool operator!= (const MmapAllocator<T1>&, const MmapAllocator<T2>&) throw();
+};
+
+template <class T1, class T2>
+bool operator== (const MmapAllocator<T1>& a1,
+                 const MmapAllocator<T2>& a2) throw()
+{
+  bool equal = true;
+  equal &= a1.m_file_ptr == a2.m_file_ptr;
+  equal &= a1.m_file_desc == a2.m_file_desc;
+  equal &= a1.m_page_size == a2.m_page_size;
+  equal &= a1.m_map_size == a2.m_map_size;
+  equal &= a1.m_data_ptr == a2.m_data_ptr;
+  equal &= a1.m_data_offset == a2.m_data_offset;
+  equal &= a1.m_fixed == a2.m_fixed;
+  return equal;
+}
+
+template <class T1, class T2>
+bool operator!=(const MmapAllocator<T1>& a1,
+                const MmapAllocator<T2>& a2) throw()
+{
+  return !(a1 == a2);
+}
+
+}
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/MonotonicVector.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/MonotonicVector.h
@ -0,0 +1,230 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_MonotonicVector_h
+#define moses_MonotonicVector_h
+
+// MonotonicVector - Represents a monotonic increasing function that maps
+// positive integers of any size onto a given number type. Each value has to be
+// equal or larger than the previous one. Depending on the stepSize it can save
+// up to 90% of memory compared to a std::vector<long>. Time complexity is roughly
+// constant, in the worst case, however, stepSize times slower than a normal
+// std::vector.
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <cstdio>
+#include <cassert>
+
+#include "ThrowingFwrite.h"
+#include "ListCoders.h"
+#include "MmapAllocator.h"
+
+namespace Moses2
+{
+
+template<typename PosT = size_t, typename NumT = size_t, PosT stepSize = 32,
+         template <typename> class Allocator = std::allocator>
+class MonotonicVector
+{
+private:
+  typedef std::vector<NumT, Allocator<NumT> > Anchors;
+  typedef std::vector<unsigned int, Allocator<unsigned int> > Diffs;
+
+  Anchors m_anchors;
+  Diffs m_diffs;
+  std::vector<unsigned int> m_tempDiffs;
+
+  size_t m_size;
+  PosT m_last;
+  bool m_final;
+
+public:
+  typedef PosT value_type;
+
+  MonotonicVector() : m_size(0), m_last(0), m_final(false) {}
+
+  size_t size() const {
+    return m_size + m_tempDiffs.size();
+  }
+
+  PosT at(size_t i) const {
+    PosT s = stepSize;
+    PosT j = m_anchors[i / s];
+    PosT r = i % s;
+
+    typename Diffs::const_iterator it = m_diffs.begin() + j;
+
+    PosT k = 0;
+    k += VarInt32::DecodeAndSum(it, m_diffs.end(), 1);
+    if(i < m_size)
+      k += Simple9::DecodeAndSum(it, m_diffs.end(), r);
+    else if(i < m_size + m_tempDiffs.size())
+      for(size_t l = 0; l < r; l++)
+        k += m_tempDiffs[l];
+
+    return k;
+  }
+
+  PosT operator[](PosT i) const {
+    return at(i);
+  }
+
+  PosT back() const {
+    return at(size()-1);
+  }
+
+  void push_back(PosT i) {
+    assert(m_final != true);
+
+    if(m_anchors.size() == 0 && m_tempDiffs.size() == 0) {
+      m_anchors.push_back(0);
+      VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs));
+      m_last = i;
+      m_size++;
+
+      return;
+    }
+
+    if(m_tempDiffs.size() == stepSize-1) {
+      Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
+                      std::back_inserter(m_diffs));
+      m_anchors.push_back(m_diffs.size());
+      VarInt32::Encode(&i, &i+1, std::back_inserter(m_diffs));
+
+      m_size += m_tempDiffs.size() + 1;
+      m_tempDiffs.clear();
+    } else {
+      PosT last = m_last;
+      PosT diff = i - last;
+      m_tempDiffs.push_back(diff);
+    }
+    m_last = i;
+  }
+
+  void commit() {
+    assert(m_final != true);
+    Simple9::Encode(m_tempDiffs.begin(), m_tempDiffs.end(),
+                    std::back_inserter(m_diffs));
+    m_size += m_tempDiffs.size();
+    m_tempDiffs.clear();
+    m_final = true;
+  }
+
+  size_t usage() {
+    return m_diffs.size() * sizeof(unsigned int)
+           + m_anchors.size() * sizeof(NumT);
+  }
+
+  size_t load(std::FILE* in, bool map = false) {
+    size_t byteSize = 0;
+
+    byteSize += fread(&m_final, sizeof(bool), 1, in) * sizeof(bool);
+    byteSize += fread(&m_size, sizeof(size_t), 1, in) * sizeof(size_t);
+    byteSize += fread(&m_last, sizeof(PosT), 1, in) * sizeof(PosT);
+
+    byteSize += loadVector(m_diffs, in, map);
+    byteSize += loadVector(m_anchors, in, map);
+
+    return byteSize;
+  }
+
+  template <typename ValueT>
+  size_t loadVector(std::vector<ValueT, std::allocator<ValueT> >& v,
+                    std::FILE* in, bool map = false) {
+    // Can only be read into memory. Mapping not possible with std:allocator.
+    assert(map == false);
+
+    size_t byteSize = 0;
+
+    size_t valSize;
+    byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+    v.resize(valSize, 0);
+    byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+
+    return byteSize;
+  }
+
+  template <typename ValueT>
+  size_t loadVector(std::vector<ValueT, MmapAllocator<ValueT> >& v,
+                    std::FILE* in, bool map = false) {
+    size_t byteSize = 0;
+
+    size_t valSize;
+    byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+    if(map == false) {
+      // Read data into temporary file (default constructor of MmapAllocator)
+      // and map memory onto temporary file. Can be resized.
+
+      v.resize(valSize, 0);
+      byteSize += std::fread(&v[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+    } else {
+      // Map it directly on specified region of file "in" starting at valPos
+      // with length valSize * sizeof(ValueT). Mapped region cannot be resized.
+
+      size_t valPos = std::ftell(in);
+
+      Allocator<ValueT> alloc(in, valPos);
+      std::vector<ValueT, Allocator<ValueT> > vTemp(alloc);
+      vTemp.resize(valSize);
+      v.swap(vTemp);
+
+      std::fseek(in, valSize * sizeof(ValueT), SEEK_CUR);
+      byteSize += valSize * sizeof(ValueT);
+    }
+
+    return byteSize;
+  }
+
+  size_t save(std::FILE* out) {
+    if(!m_final)
+      commit();
+
+    bool byteSize = 0;
+    byteSize += ThrowingFwrite(&m_final, sizeof(bool), 1, out) * sizeof(bool);
+    byteSize += ThrowingFwrite(&m_size, sizeof(size_t), 1, out) * sizeof(size_t);
+    byteSize += ThrowingFwrite(&m_last, sizeof(PosT), 1, out) * sizeof(PosT);
+
+    size_t size = m_diffs.size();
+    byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
+    byteSize += ThrowingFwrite(&m_diffs[0], sizeof(unsigned int), size, out) * sizeof(unsigned int);
+
+    size = m_anchors.size();
+    byteSize += ThrowingFwrite(&size, sizeof(size_t), 1, out) * sizeof(size_t);
+    byteSize += ThrowingFwrite(&m_anchors[0], sizeof(NumT), size, out) * sizeof(NumT);
+
+    return byteSize;
+  }
+
+  void swap(MonotonicVector<PosT, NumT, stepSize, Allocator> &mv) {
+    if(!m_final)
+      commit();
+
+    m_diffs.swap(mv.m_diffs);
+    m_anchors.swap(mv.m_anchors);
+  }
+};
+
+}
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/MurmurHash3.cpp
+++ b/contrib/other-builds/moses2/legacy/CompactPT/MurmurHash3.cpp
@ -0,0 +1,425 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+// Note - The x86 and x64 versions do _not_ produce the same results, as the
+// algorithms are optimized for their respective platforms. You can still
+// compile and run any of them on any platform, but your performance with the
+// non-native version will be less than optimal.
+
+#include "MurmurHash3.h"
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+#define FORCE_INLINE	__forceinline
+
+#include <cstdlib>
+
+#define ROTL32(x,y)	_rotl(x,y)
+#define ROTL64(x,y)	_rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x)
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#define	FORCE_INLINE inline __attribute__((always_inline))
+
+inline uint32_t rotl32 ( uint32_t x, int8_t r )
+{
+  return (x << r) | (x >> (32 - r));
+}
+
+inline uint64_t rotl64 ( uint64_t x, int8_t r )
+{
+  return (x << r) | (x >> (64 - r));
+}
+
+#define	ROTL32(x,y)	rotl32(x,y)
+#define ROTL64(x,y)	rotl64(x,y)
+
+#define BIG_CONSTANT(x) (x##LLU)
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+// Block read - if your platform needs to do endian-swapping or can only
+// handle aligned reads, do the conversion here
+
+FORCE_INLINE uint32_t getblock ( const uint32_t * p, int i )
+{
+  return p[i];
+}
+
+FORCE_INLINE uint64_t getblock ( const uint64_t * p, int i )
+{
+  return p[i];
+}
+
+//-----------------------------------------------------------------------------
+// Finalization mix - force all bits of a hash block to avalanche
+
+FORCE_INLINE uint32_t fmix ( uint32_t h )
+{
+  h ^= h >> 16;
+  h *= 0x85ebca6b;
+  h ^= h >> 13;
+  h *= 0xc2b2ae35;
+  h ^= h >> 16;
+
+  return h;
+}
+
+//----------
+
+FORCE_INLINE uint64_t fmix ( uint64_t k )
+{
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xff51afd7ed558ccd);
+  k ^= k >> 33;
+  k *= BIG_CONSTANT(0xc4ceb9fe1a85ec53);
+  k ^= k >> 33;
+
+  return k;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32 ( const void * key, int len,
+                          uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 4;
+
+  uint32_t h1 = seed;
+
+  uint32_t c1 = 0xcc9e2d51;
+  uint32_t c2 = 0x1b873593;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*4);
+
+  for(int i = -nblocks; i; i++) {
+    uint32_t k1 = getblock(blocks,i);
+
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+
+    h1 ^= k1;
+    h1 = ROTL32(h1,13);
+    h1 = h1*5+0xe6546b64;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
+
+  uint32_t k1 = 0;
+
+  switch(len & 3) {
+  case 3:
+    k1 ^= tail[2] << 16;
+  case 2:
+    k1 ^= tail[1] << 8;
+  case 1:
+    k1 ^= tail[0];
+    k1 *= c1;
+    k1 = ROTL32(k1,15);
+    k1 *= c2;
+    h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+
+  h1 = fmix(h1);
+
+  *(uint32_t*)out = h1;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_128 ( const void * key, const int len,
+                           uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint32_t h1 = seed;
+  uint32_t h2 = seed;
+  uint32_t h3 = seed;
+  uint32_t h4 = seed;
+
+  uint32_t c1 = 0x239b961b;
+  uint32_t c2 = 0xab0e9789;
+  uint32_t c3 = 0x38b34ae5;
+  uint32_t c4 = 0xa1e38b93;
+
+  //----------
+  // body
+
+  const uint32_t * blocks = (const uint32_t *)(data + nblocks*16);
+
+  for(int i = -nblocks; i; i++) {
+    uint32_t k1 = getblock(blocks,i*4+0);
+    uint32_t k2 = getblock(blocks,i*4+1);
+    uint32_t k3 = getblock(blocks,i*4+2);
+    uint32_t k4 = getblock(blocks,i*4+3);
+
+    k1 *= c1;
+    k1  = ROTL32(k1,15);
+    k1 *= c2;
+    h1 ^= k1;
+
+    h1 = ROTL32(h1,19);
+    h1 += h2;
+    h1 = h1*5+0x561ccd1b;
+
+    k2 *= c2;
+    k2  = ROTL32(k2,16);
+    k2 *= c3;
+    h2 ^= k2;
+
+    h2 = ROTL32(h2,17);
+    h2 += h3;
+    h2 = h2*5+0x0bcaa747;
+
+    k3 *= c3;
+    k3  = ROTL32(k3,17);
+    k3 *= c4;
+    h3 ^= k3;
+
+    h3 = ROTL32(h3,15);
+    h3 += h4;
+    h3 = h3*5+0x96cd1c35;
+
+    k4 *= c4;
+    k4  = ROTL32(k4,18);
+    k4 *= c1;
+    h4 ^= k4;
+
+    h4 = ROTL32(h4,13);
+    h4 += h1;
+    h4 = h4*5+0x32ac3b17;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint32_t k1 = 0;
+  uint32_t k2 = 0;
+  uint32_t k3 = 0;
+  uint32_t k4 = 0;
+
+  switch(len & 15) {
+  case 15:
+    k4 ^= tail[14] << 16;
+  case 14:
+    k4 ^= tail[13] << 8;
+  case 13:
+    k4 ^= tail[12] << 0;
+    k4 *= c4;
+    k4  = ROTL32(k4,18);
+    k4 *= c1;
+    h4 ^= k4;
+
+  case 12:
+    k3 ^= tail[11] << 24;
+  case 11:
+    k3 ^= tail[10] << 16;
+  case 10:
+    k3 ^= tail[ 9] << 8;
+  case  9:
+    k3 ^= tail[ 8] << 0;
+    k3 *= c3;
+    k3  = ROTL32(k3,17);
+    k3 *= c4;
+    h3 ^= k3;
+
+  case  8:
+    k2 ^= tail[ 7] << 24;
+  case  7:
+    k2 ^= tail[ 6] << 16;
+  case  6:
+    k2 ^= tail[ 5] << 8;
+  case  5:
+    k2 ^= tail[ 4] << 0;
+    k2 *= c2;
+    k2  = ROTL32(k2,16);
+    k2 *= c3;
+    h2 ^= k2;
+
+  case  4:
+    k1 ^= tail[ 3] << 24;
+  case  3:
+    k1 ^= tail[ 2] << 16;
+  case  2:
+    k1 ^= tail[ 1] << 8;
+  case  1:
+    k1 ^= tail[ 0] << 0;
+    k1 *= c1;
+    k1  = ROTL32(k1,15);
+    k1 *= c2;
+    h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+  h2 ^= len;
+  h3 ^= len;
+  h4 ^= len;
+
+  h1 += h2;
+  h1 += h3;
+  h1 += h4;
+  h2 += h1;
+  h3 += h1;
+  h4 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+  h3 = fmix(h3);
+  h4 = fmix(h4);
+
+  h1 += h2;
+  h1 += h3;
+  h1 += h4;
+  h2 += h1;
+  h3 += h1;
+  h4 += h1;
+
+  ((uint32_t*)out)[0] = h1;
+  ((uint32_t*)out)[1] = h2;
+  ((uint32_t*)out)[2] = h3;
+  ((uint32_t*)out)[3] = h4;
+}
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x64_128 ( const void * key, const int len,
+                           const uint32_t seed, void * out )
+{
+  const uint8_t * data = (const uint8_t*)key;
+  const int nblocks = len / 16;
+
+  uint64_t h1 = seed;
+  uint64_t h2 = seed;
+
+  uint64_t c1 = BIG_CONSTANT(0x87c37b91114253d5);
+  uint64_t c2 = BIG_CONSTANT(0x4cf5ad432745937f);
+
+  //----------
+  // body
+
+  const uint64_t * blocks = (const uint64_t *)(data);
+
+  for(int i = 0; i < nblocks; i++) {
+    uint64_t k1 = getblock(blocks,i*2+0);
+    uint64_t k2 = getblock(blocks,i*2+1);
+
+    k1 *= c1;
+    k1  = ROTL64(k1,31);
+    k1 *= c2;
+    h1 ^= k1;
+
+    h1 = ROTL64(h1,27);
+    h1 += h2;
+    h1 = h1*5+0x52dce729;
+
+    k2 *= c2;
+    k2  = ROTL64(k2,33);
+    k2 *= c1;
+    h2 ^= k2;
+
+    h2 = ROTL64(h2,31);
+    h2 += h1;
+    h2 = h2*5+0x38495ab5;
+  }
+
+  //----------
+  // tail
+
+  const uint8_t * tail = (const uint8_t*)(data + nblocks*16);
+
+  uint64_t k1 = 0;
+  uint64_t k2 = 0;
+
+  switch(len & 15) {
+  case 15:
+    k2 ^= uint64_t(tail[14]) << 48;
+  case 14:
+    k2 ^= uint64_t(tail[13]) << 40;
+  case 13:
+    k2 ^= uint64_t(tail[12]) << 32;
+  case 12:
+    k2 ^= uint64_t(tail[11]) << 24;
+  case 11:
+    k2 ^= uint64_t(tail[10]) << 16;
+  case 10:
+    k2 ^= uint64_t(tail[ 9]) << 8;
+  case  9:
+    k2 ^= uint64_t(tail[ 8]) << 0;
+    k2 *= c2;
+    k2  = ROTL64(k2,33);
+    k2 *= c1;
+    h2 ^= k2;
+
+  case  8:
+    k1 ^= uint64_t(tail[ 7]) << 56;
+  case  7:
+    k1 ^= uint64_t(tail[ 6]) << 48;
+  case  6:
+    k1 ^= uint64_t(tail[ 5]) << 40;
+  case  5:
+    k1 ^= uint64_t(tail[ 4]) << 32;
+  case  4:
+    k1 ^= uint64_t(tail[ 3]) << 24;
+  case  3:
+    k1 ^= uint64_t(tail[ 2]) << 16;
+  case  2:
+    k1 ^= uint64_t(tail[ 1]) << 8;
+  case  1:
+    k1 ^= uint64_t(tail[ 0]) << 0;
+    k1 *= c1;
+    k1  = ROTL64(k1,31);
+    k1 *= c2;
+    h1 ^= k1;
+  };
+
+  //----------
+  // finalization
+
+  h1 ^= len;
+  h2 ^= len;
+
+  h1 += h2;
+  h2 += h1;
+
+  h1 = fmix(h1);
+  h2 = fmix(h2);
+
+  h1 += h2;
+  h2 += h1;
+
+  ((uint64_t*)out)[0] = h1;
+  ((uint64_t*)out)[1] = h2;
+}
+
+//-----------------------------------------------------------------------------
+
--- a/contrib/other-builds/moses2/legacy/CompactPT/MurmurHash3.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/MurmurHash3.h
@ -0,0 +1,37 @@
+//-----------------------------------------------------------------------------
+// MurmurHash3 was written by Austin Appleby, and is placed in the public
+// domain. The author hereby disclaims copyright to this source code.
+
+#ifndef _MURMURHASH3_H_
+#define _MURMURHASH3_H_
+
+//-----------------------------------------------------------------------------
+// Platform-specific functions and macros
+
+// Microsoft Visual Studio
+
+#if defined(_MSC_VER)
+
+typedef unsigned char uint8_t;
+typedef unsigned long uint32_t;
+typedef unsigned __int64 uint64_t;
+
+// Other compilers
+
+#else	// defined(_MSC_VER)
+
+#include <stdint.h>
+
+#endif // !defined(_MSC_VER)
+
+//-----------------------------------------------------------------------------
+
+void MurmurHash3_x86_32  ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x86_128 ( const void * key, int len, uint32_t seed, void * out );
+
+void MurmurHash3_x64_128 ( const void * key, int len, uint32_t seed, void * out );
+
+//-----------------------------------------------------------------------------
+
+#endif // _MURMURHASH3_H_
--- a/contrib/other-builds/moses2/legacy/CompactPT/PackedArray.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/PackedArray.h
@ -0,0 +1,187 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_PackedArray_h
+#define moses_PackedArray_h
+
+#include <vector>
+#include <cmath>
+#include <cstring>
+#include <cstdio>
+
+#include "ThrowingFwrite.h"
+
+namespace Moses2
+{
+
+template <typename T = size_t, typename D = unsigned char>
+class PackedArray
+{
+protected:
+  static size_t m_dataBits;
+
+  size_t m_size;
+  size_t m_storageSize;
+  D* m_storage;
+
+public:
+  PackedArray() {
+    m_size = 0;
+    m_storageSize = 0;
+    m_storage = new D[0];
+  }
+
+  PackedArray(size_t size, size_t bits) : m_size(size) {
+    m_storageSize = ceil(float(bits * size) / float(m_dataBits));
+    m_storage = new D[m_storageSize];
+  }
+
+  PackedArray(const PackedArray<T, D> &c) {
+    m_size = c.m_size;
+
+    m_storageSize = c.m_storageSize;
+    m_storage = new D[m_storageSize];
+
+    std::memcpy(m_storage, c.m_storage, m_storageSize * sizeof(D));
+  }
+
+  virtual ~PackedArray() {
+    delete [] m_storage;
+    m_size = 0;
+    m_storageSize = 0;
+    m_storage = 0;
+  }
+
+  T Get(size_t i, size_t bits) const {
+    T out = 0;
+
+    size_t bitstart = (i * bits);
+    size_t bitpos = bitstart;
+
+    size_t zero = ((1ul << (bits)) - 1);
+
+    while(bitpos - bitstart < bits) {
+      size_t pos = bitpos / m_dataBits;
+      size_t off = bitpos % m_dataBits;
+
+      out |= (T(m_storage[pos]) << (bitpos - bitstart)) >> off;
+
+      bitpos += (m_dataBits - off);
+    }
+
+    out &= zero;
+    return out;
+  }
+
+  void Set(size_t i, T v, size_t bits) {
+    size_t bitstart = (i * bits);
+    size_t bitpos = bitstart;
+
+    while(bitpos - bitstart < bits) {
+      size_t pos = bitpos / m_dataBits;
+      size_t off = bitpos % m_dataBits;
+
+      size_t rest = bits - (bitpos - bitstart);
+      D zero = ~((1ul << (rest + off)) - 1) | ((1ul << off) - 1);
+
+      m_storage[pos] &= zero;
+      m_storage[pos] |= v << off;
+      v = v >> (m_dataBits - off);
+      bitpos += (m_dataBits - off);
+    }
+  }
+
+  virtual D*& GetStorage() {
+    return m_storage;
+  }
+
+  virtual size_t GetStorageSize() const {
+    return m_storageSize;
+  }
+
+  virtual size_t Size() const {
+    return m_size;
+  }
+
+  virtual size_t Load(std::FILE* in) {
+    size_t a1 = std::ftell(in);
+
+    size_t read = 0;
+    read += std::fread(&m_size, sizeof(m_size), 1, in);
+    read += std::fread(&m_storageSize, sizeof(m_storageSize), 1, in);
+    delete [] m_storage;
+    m_storage = new D[m_storageSize];
+    read += std::fread(m_storage, sizeof(D), m_storageSize, in);
+
+    size_t a2 = std::ftell(in);
+    return a2 - a1;
+  }
+
+  virtual size_t Save(std::FILE* out) {
+    size_t a1 = std::ftell(out);
+
+    ThrowingFwrite(&m_size, sizeof(m_size), 1, out);
+    ThrowingFwrite(&m_storageSize, sizeof(m_storageSize), 1, out);
+    ThrowingFwrite(m_storage, sizeof(D), m_storageSize, out);
+
+    size_t a2 = std::ftell(out);
+    return a2 - a1;
+  }
+
+};
+
+template <typename T, typename D>
+size_t PackedArray<T, D>::m_dataBits = sizeof(D)*8;
+
+/**************************************************************************/
+
+template <typename T = size_t, typename D = unsigned char>
+class PairedPackedArray : public PackedArray<T,D>
+{
+public:
+  PairedPackedArray() : PackedArray<T,D>() {}
+
+  PairedPackedArray(size_t size, size_t bits1, size_t bits2)
+    : PackedArray<T, D>(size, bits1 + bits2) { }
+
+  void Set(size_t i, T a, T b, size_t bits1, size_t bits2) {
+    T c = 0;
+    c = a | (b << bits1);
+    PackedArray<T,D>::Set(i, c, bits1 + bits2);
+  }
+
+  void Set(size_t i, std::pair<T,T> p, size_t bits1, size_t bits2) {
+    T c = 0;
+    c = p.second | (p.first << bits1);
+    PackedArray<T, D>::Set(i, c);
+  }
+
+  std::pair<T, T> Get(size_t i, size_t bits1, size_t bits2) {
+    T v = PackedArray<T, D>::Get(i, bits1 + bits2);
+    T a = v & ((1 << bits1) - 1);
+    T b = v >> bits1;
+    return std::pair<T, T>(a, b);
+  }
+};
+
+}
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/StringVector.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/StringVector.h
@ -0,0 +1,625 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_StringVector_h
+#define moses_StringVector_h
+
+#include <vector>
+#include <algorithm>
+#include <string>
+#include <iterator>
+#include <cstdio>
+#include <cassert>
+
+#include <boost/iterator/iterator_facade.hpp>
+
+#include "ThrowingFwrite.h"
+#include "MonotonicVector.h"
+#include "MmapAllocator.h"
+
+namespace Moses2
+{
+
+// ********** ValueIteratorRange **********
+
+template <typename ValueIteratorT>
+class ValueIteratorRange
+{
+private:
+  ValueIteratorT m_begin;
+  ValueIteratorT m_end;
+
+public:
+  ValueIteratorRange(ValueIteratorT begin, ValueIteratorT end);
+
+  const ValueIteratorT& begin() const;
+  const ValueIteratorT& end() const;
+  const std::string str() const;
+  operator const std::string() {
+    return str();
+  }
+
+  size_t size() {
+    return std::distance(m_begin, m_end);
+  }
+
+  template <typename StringT>
+  bool operator==(const StringT& o) const;
+  bool operator==(const char* c) const;
+
+  template <typename StringT>
+  bool operator<(const StringT& o) const;
+  bool operator<(const char* c) const;
+};
+
+// ********** StringVector **********
+
+template <typename ValueT = unsigned char, typename PosT = unsigned int,
+         template <typename> class Allocator = std::allocator>
+class StringVector
+{
+protected:
+  bool m_sorted;
+  bool m_memoryMapped;
+
+  std::vector<ValueT, Allocator<ValueT> >* m_charArray;
+  MonotonicVector<PosT, unsigned int, 32> m_positions;
+
+  virtual const ValueT* value_ptr(PosT i) const;
+
+public:
+  //typedef ValueIteratorRange<typename std::vector<ValueT, Allocator<ValueT> >::const_iterator> range;
+  typedef ValueIteratorRange<const ValueT *> range;
+
+  // ********** RangeIterator **********
+
+  class RangeIterator : public boost::iterator_facade<RangeIterator,
+    range, std::random_access_iterator_tag, range, PosT>
+  {
+
+  private:
+    PosT m_index;
+    StringVector<ValueT, PosT, Allocator>* m_container;
+
+  public:
+    RangeIterator();
+    RangeIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+    PosT get_index();
+
+  private:
+    friend class boost::iterator_core_access;
+
+    range dereference() const;
+    bool equal(RangeIterator const& other) const;
+    void increment();
+    void decrement();
+    void advance(PosT n);
+
+    PosT distance_to(RangeIterator const& other) const;
+  };
+
+  // ********** StringIterator **********
+
+  class StringIterator : public boost::iterator_facade<StringIterator,
+    std::string, std::random_access_iterator_tag, const std::string, PosT>
+  {
+
+  private:
+    PosT m_index;
+    StringVector<ValueT, PosT, Allocator>* m_container;
+
+  public:
+    StringIterator();
+    StringIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index=0);
+
+    PosT get_index();
+
+  private:
+    friend class boost::iterator_core_access;
+
+    const std::string dereference() const;
+    bool equal(StringIterator const& other) const;
+    void increment();
+    void decrement();
+    void advance(PosT n);
+    PosT distance_to(StringIterator const& other) const;
+  };
+
+  typedef RangeIterator iterator;
+  typedef StringIterator string_iterator;
+
+  StringVector(bool allocate = false);
+  StringVector(Allocator<ValueT>& alloc);
+
+  virtual ~StringVector() {
+    delete m_charArray;
+  }
+
+  void swap(StringVector<ValueT, PosT, Allocator> &c) {
+    m_positions.commit();
+    m_positions.swap(c.m_positions);
+    m_charArray->swap(*c.m_charArray);
+
+    bool temp = m_sorted;
+    m_sorted = c.m_sorted;
+    c.m_sorted = temp;
+  }
+
+  bool is_sorted() const;
+  PosT size() const;
+  virtual PosT size2() const;
+
+  template<class Iterator> Iterator begin() const;
+  template<class Iterator> Iterator end() const;
+
+  iterator begin() const;
+  iterator end() const;
+
+  PosT length(PosT i) const;
+  //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator begin(PosT i) const;
+  //typename std::vector<ValueT, Allocator<ValueT> >::const_iterator end(PosT i) const;
+  const ValueT* begin(PosT i) const;
+  const ValueT* end(PosT i) const;
+
+  void clear() {
+    m_charArray->clear();
+    m_sorted = true;
+    m_positions = MonotonicVector<PosT, unsigned int, 32>();
+  }
+
+  range at(PosT i) const;
+  range operator[](PosT i) const;
+  range back() const;
+
+  template <typename StringT>
+  void push_back(StringT s);
+  void push_back(const char* c);
+
+  template <typename StringT>
+  PosT find(StringT &s) const;
+  PosT find(const char* c) const;
+
+  virtual size_t load(std::FILE* in, bool memoryMapped = false) {
+    size_t size = 0;
+    m_memoryMapped = memoryMapped;
+
+    size += std::fread(&m_sorted, sizeof(bool), 1, in) * sizeof(bool);
+    size += m_positions.load(in, false);
+
+    size += loadCharArray(m_charArray, in, m_memoryMapped);
+    return size;
+  }
+
+  size_t loadCharArray(std::vector<ValueT, std::allocator<ValueT> >*& c,
+                       std::FILE* in, bool map = false) {
+    // Can only be read into memory. Mapping not possible with std:allocator.
+    assert(map == false);
+
+    size_t byteSize = 0;
+
+    size_t valSize;
+    byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+    c = new std::vector<ValueT, std::allocator<ValueT> >(valSize, 0);
+    byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+
+    return byteSize;
+  }
+
+  size_t loadCharArray(std::vector<ValueT, MmapAllocator<ValueT> >*& c,
+                       std::FILE* in, bool map = false) {
+    size_t byteSize = 0;
+
+    size_t valSize;
+    byteSize += std::fread(&valSize, sizeof(size_t), 1, in) * sizeof(size_t);
+
+    if(map == false) {
+      // Read data into temporary file (default constructor of MmapAllocator)
+      // and map memory onto temporary file. Can be resized.
+      c = new std::vector<ValueT, MmapAllocator<ValueT> >(valSize, 0);
+      byteSize += std::fread(&(*c)[0], sizeof(ValueT), valSize, in) * sizeof(ValueT);
+    } else {
+      // Map it directly on specified region of file "in" starting at valPos
+      // with length valSize * sizeof(ValueT). Mapped region cannot be resized.
+
+      size_t valPos = std::ftell(in);
+      Allocator<ValueT> alloc(in, valPos);
+      c = new std::vector<ValueT, Allocator<ValueT> >(alloc);
+      c->resize(valSize, 0);
+
+      byteSize += valSize * sizeof(ValueT);
+    }
+
+    return byteSize;
+  }
+
+  size_t load(std::string filename, bool memoryMapped = false) {
+    std::FILE* pFile = fopen(filename.c_str(), "r");
+    size_t byteSize = load(pFile, memoryMapped);
+    fclose(pFile);
+    return byteSize;
+  }
+
+  size_t save(std::FILE* out) {
+    size_t byteSize = 0;
+    byteSize += ThrowingFwrite(&m_sorted, sizeof(bool), 1, out) * sizeof(bool);
+
+    byteSize += m_positions.save(out);
+
+    size_t valSize = size2();
+    byteSize += ThrowingFwrite(&valSize, sizeof(size_t), 1, out) * sizeof(size_t);
+    byteSize += ThrowingFwrite(&(*m_charArray)[0], sizeof(ValueT), valSize, out) * sizeof(ValueT);
+
+    return byteSize;
+  }
+
+  size_t save(std::string filename) {
+    std::FILE* pFile = fopen(filename.c_str(), "w");
+    size_t byteSize = save(pFile);
+    fclose(pFile);
+    return byteSize;
+  }
+
+};
+
+// ********** Implementation **********
+
+// ValueIteratorRange
+
+template <typename ValueIteratorT>
+ValueIteratorRange<ValueIteratorT>::ValueIteratorRange(ValueIteratorT begin,
+    ValueIteratorT end) : m_begin(begin), m_end(end) { }
+
+template <typename ValueIteratorT>
+const ValueIteratorT& ValueIteratorRange<ValueIteratorT>::begin() const
+{
+  return m_begin;
+}
+
+template <typename ValueIteratorT>
+const ValueIteratorT& ValueIteratorRange<ValueIteratorT>::end() const
+{
+  return m_end;
+}
+
+template <typename ValueIteratorT>
+const std::string ValueIteratorRange<ValueIteratorT>::str() const
+{
+  std::string dummy;
+  for(ValueIteratorT it = m_begin; it != m_end; it++)
+    dummy.push_back(*it);
+  return dummy;
+}
+
+template <typename ValueIteratorT>
+template <typename StringT>
+bool ValueIteratorRange<ValueIteratorT>::operator==(const StringT& o) const
+{
+  if(std::distance(m_begin, m_end) == std::distance(o.begin(), o.end()))
+    return std::equal(m_begin, m_end, o.begin());
+  else
+    return false;
+}
+
+template <typename ValueIteratorT>
+bool ValueIteratorRange<ValueIteratorT>::operator==(const char* c) const
+{
+  return *this == std::string(c);
+}
+
+template <typename ValueIteratorT>
+template <typename StringT>
+bool ValueIteratorRange<ValueIteratorT>::operator<(const StringT &s2) const
+{
+  return std::lexicographical_compare(m_begin, m_end, s2.begin(), s2.end(),
+                                      std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+}
+
+template <typename ValueIteratorT>
+bool ValueIteratorRange<ValueIteratorT>::operator<(const char* c) const
+{
+  return *this < std::string(c);
+}
+
+template <typename StringT, typename ValueIteratorT>
+bool operator<(const StringT &s1, const ValueIteratorRange<ValueIteratorT> &s2)
+{
+  return std::lexicographical_compare(s1.begin(), s1.end(), s2.begin(), s2.end(),
+                                      std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+}
+
+template <typename ValueIteratorT>
+bool operator<(const char* c, const ValueIteratorRange<ValueIteratorT> &s2)
+{
+  size_t len = std::char_traits<char>::length(c);
+  return std::lexicographical_compare(c, c + len, s2.begin(), s2.end(),
+                                      std::less<typename std::iterator_traits<ValueIteratorT>::value_type>());
+}
+
+template <typename OStream, typename ValueIteratorT>
+OStream& operator<<(OStream &os, ValueIteratorRange<ValueIteratorT> cr)
+{
+  ValueIteratorT it = cr.begin();
+  while(it != cr.end())
+    os << *(it++);
+  return os;
+}
+
+// StringVector
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::StringVector(bool allocate)
+  : m_sorted(true), m_memoryMapped(false),
+    m_charArray(allocate ? new std::vector<ValueT, Allocator<ValueT> >() : 0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::StringVector(Allocator<ValueT> &alloc)
+  : m_sorted(true), m_memoryMapped(false), m_charArray(new std::vector<ValueT, Allocator<ValueT> >(alloc)) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+void StringVector<ValueT, PosT, Allocator>::push_back(StringT s)
+{
+  if(is_sorted() && size() && !(back() < s))
+    m_sorted = false;
+
+  m_positions.push_back(size2());
+  std::copy(s.begin(), s.end(), std::back_inserter(*m_charArray));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::push_back(const char* c)
+{
+  std::string dummy(c);
+  push_back(dummy);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVector<ValueT, PosT, Allocator>::begin() const
+{
+  return Iterator(const_cast<StringVector<ValueT, PosT, Allocator>&>(*this), 0);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename Iterator>
+Iterator StringVector<ValueT, PosT, Allocator>::end() const
+{
+  return Iterator(const_cast<StringVector<ValueT, PosT, Allocator>&>(*this), size());
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT, PosT, Allocator>::begin() const
+{
+  return begin<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::iterator StringVector<ValueT, PosT, Allocator>::end() const
+{
+  return end<iterator>();
+};
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVector<ValueT, PosT, Allocator>::is_sorted() const
+{
+  return m_sorted;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::size() const
+{
+  return m_positions.size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::size2() const
+{
+  return m_charArray->size();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT, Allocator>::at(PosT i) const
+{
+  return range(begin(i), end(i));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT, Allocator>::operator[](PosT i) const
+{
+  return at(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range StringVector<ValueT, PosT, Allocator>::back() const
+{
+  return at(size()-1);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::length(PosT i) const
+{
+  if(i+1 < size())
+    return m_positions[i+1] - m_positions[i];
+  else
+    return size2() - m_positions[i];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const ValueT* StringVector<ValueT, PosT, Allocator>::value_ptr(PosT i) const
+{
+  return &(*m_charArray)[m_positions[i]];
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
+const ValueT* StringVector<ValueT, PosT, Allocator>::begin(PosT i) const
+{
+  //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i));
+  return value_ptr(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+//typename std::vector<ValueT, Allocator<ValueT> >::const_iterator StringVector<ValueT, PosT, Allocator>::end(PosT i) const
+const ValueT* StringVector<ValueT, PosT, Allocator>::end(PosT i) const
+{
+  //return typename std::vector<ValueT, Allocator<ValueT> >::const_iterator(value_ptr(i) + length(i));
+  return value_ptr(i) + length(i);
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+template <typename StringT>
+PosT StringVector<ValueT, PosT, Allocator>::find(StringT &s) const
+{
+  if(m_sorted)
+    return std::distance(begin(), std::lower_bound(begin(), end(), s));
+  return std::distance(begin(), std::find(begin(), end(), s));
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::find(const char* c) const
+{
+  std::string s(c);
+  return find(s);
+}
+
+// RangeIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator() : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::RangeIterator::RangeIterator(StringVector<ValueT, PosT, Allocator> &sv, PosT index)
+  : m_index(index), m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::get_index()
+{
+  return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+typename StringVector<ValueT, PosT, Allocator>::range
+StringVector<ValueT, PosT, Allocator>::RangeIterator::dereference() const
+{
+  return typename StringVector<ValueT, PosT, Allocator>::range(
+           m_container->begin(m_index),
+           m_container->end(m_index)
+         );
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVector<ValueT, PosT, Allocator>::RangeIterator::equal(
+  StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+  return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::RangeIterator::increment()
+{
+  m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::RangeIterator::decrement()
+{
+  m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::RangeIterator::advance(PosT n)
+{
+  m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::RangeIterator::distance_to(
+  StringVector<ValueT, PosT, Allocator>::RangeIterator const& other) const
+{
+  return other.m_index - m_index;
+}
+
+// StringIterator
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator()
+  : m_index(0), m_container(0) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+StringVector<ValueT, PosT, Allocator>::StringIterator::StringIterator(
+  StringVector<ValueT, PosT, Allocator> &sv, PosT index) : m_index(index),
+  m_container(&sv) { }
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::StringIterator::get_index()
+{
+  return m_index;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+const std::string StringVector<ValueT, PosT, Allocator>::StringIterator::dereference() const
+{
+  return StringVector<ValueT, PosT, Allocator>::range(m_container->begin(m_index),
+         m_container->end(m_index)).str();
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+bool StringVector<ValueT, PosT, Allocator>::StringIterator::equal(
+  StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+  return m_index == other.m_index && m_container == other.m_container;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::StringIterator::increment()
+{
+  m_index++;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::StringIterator::decrement()
+{
+  m_index--;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+void StringVector<ValueT, PosT, Allocator>::StringIterator::advance(PosT n)
+{
+  m_index += n;
+}
+
+template<typename ValueT, typename PosT, template <typename> class Allocator>
+PosT StringVector<ValueT, PosT, Allocator>::StringIterator::distance_to(
+  StringVector<ValueT, PosT, Allocator>::StringIterator const& other) const
+{
+  return other.m_index - m_index;
+}
+
+// ********** Some typedefs **********
+
+typedef StringVector<unsigned char, unsigned int> MediumStringVector;
+typedef StringVector<unsigned char, unsigned long> LongStringVector;
+
+}
+
+#endif
--- a/contrib/other-builds/moses2/legacy/CompactPT/ThrowingFwrite.cpp
+++ b/contrib/other-builds/moses2/legacy/CompactPT/ThrowingFwrite.cpp
@ -0,0 +1,30 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#include "ThrowingFwrite.h"
+
+size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream)
+{
+  assert(size);
+  size_t returnValue = std::fwrite(ptr, size, count, stream);
+  UTIL_THROW_IF2(count != returnValue, "Short fwrite; requested size " << size);
+  return returnValue;
+}
--- a/contrib/other-builds/moses2/legacy/CompactPT/ThrowingFwrite.h
+++ b/contrib/other-builds/moses2/legacy/CompactPT/ThrowingFwrite.h
@ -0,0 +1,31 @@
+// $Id$
+// vim:tabstop=2
+/***********************************************************************
+Moses - factored phrase-based language decoder
+Copyright (C) 2006 University of Edinburgh
+
+This library is free software; you can redistribute it and/or
+modify it under the terms of the GNU Lesser General Public
+License as published by the Free Software Foundation; either
+version 2.1 of the License, or (at your option) any later version.
+
+This library is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+Lesser General Public License for more details.
+
+You should have received a copy of the GNU Lesser General Public
+License along with this library; if not, write to the Free Software
+Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
+***********************************************************************/
+
+#ifndef moses_ThrowingFwrite_h
+#define moses_ThrowingFwrite_h
+
+#include <cassert>
+#include <cstdio>
+#include "util/exception.hh"
+
+size_t ThrowingFwrite(const void *ptr, size_t size, size_t count, FILE* stream);
+
+#endif