Merge branch 'master' into hieu

2024-09-19 15:17:10 +03:00 · 2014-01-16 17:46:21 +00:00 · 2014-01-16 17:46:21 +00:00 · 2c66ac995b
commit 2c66ac995b
parent 9555dc657d 4e75911331
31 changed files with 441 additions and 50 deletions
--- a/.gitignore
+++ b/.gitignore
@ -58,7 +58,6 @@ scripts/training/phrase-extract/relax-parse
 scripts/training/phrase-extract/score
 scripts/training/phrase-extract/statistics
 scripts/training/symal/symal
-scripts/training/train-model.perl
 dist
 bin
 previous.sh
@ -72,3 +71,8 @@ mert/sentence-bleu
 .DS_Store
 *.pbxuser
 *.mode1v3
+
+*.exe
+build/
+nbproject/
+
--- a/lm/builder/corpus_count.cc
+++ b/lm/builder/corpus_count.cc
@ -87,7 +87,7 @@ class VocabHandout {
    Table table_;

    std::size_t double_cutoff_;
-    
+
    util::FakeOFStream word_list_;
 };

@ -98,7 +98,7 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
    std::size_t operator()(const WordIndex *start) const {
      return util::MurmurHashNative(start, size_);
    }
-    
+
  private:
    const std::size_t size_;
 };
@ -106,11 +106,11 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
 class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
  public:
    explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}
-    
+
    bool operator()(const WordIndex *first, const WordIndex *second) const {
      return !memcmp(first, second, size_);
-    } 
-    
+    }
+
  private:
    const std::size_t size_;
 };
@ -131,7 +131,7 @@ typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;

 class Writer {
  public:
-    Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size) 
+    Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
      : block_(position), gram_(block_->Get(), order),
        dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
        dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
@ -140,7 +140,7 @@ class Writer {
      dedupe_.Clear();
      assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
      if (order == 1) {
-        // Add special words.  AdjustCounts is responsible if order != 1.    
+        // Add special words.  AdjustCounts is responsible if order != 1.
        AddUnigramWord(kUNK);
        AddUnigramWord(kBOS);
      }
@ -170,16 +170,16 @@ class Writer {
        memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
        return;
      }
-      // Complete the write.  
+      // Complete the write.
      gram_.Count() = 1;
-      // Prepare the next n-gram.  
+      // Prepare the next n-gram.
      if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
        NGram last(gram_);
        gram_.NextInMemory();
        std::copy(last.begin() + 1, last.end(), gram_.begin());
        return;
      }
-      // Block end.  Need to store the context in a temporary buffer.  
+      // Block end.  Need to store the context in a temporary buffer.
      std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
      dedupe_.Clear();
      block_->SetValidSize(block_size_);
@ -207,7 +207,7 @@ class Writer {
    // Hash table combiner implementation.
    Dedupe dedupe_;

-    // Small buffer to hold existing ngrams when shifting across a block boundary.  
+    // Small buffer to hold existing ngrams when shifting across a block boundary.
    boost::scoped_array<WordIndex> buffer_;

    const std::size_t block_size_;
@ -223,7 +223,7 @@ std::size_t CorpusCount::VocabUsage(std::size_t vocab_estimate) {
  return VocabHandout::MemUsage(vocab_estimate);
 }

-CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block) 
+CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block)
  : from_(from), vocab_write_(vocab_write), token_count_(token_count), type_count_(type_count),
    dedupe_mem_size_(Dedupe::Size(entries_per_block, kProbingMultiplier)),
    dedupe_mem_(util::MallocOrThrow(dedupe_mem_size_)) {
--- a/lm/builder/interpolate.cc
+++ b/lm/builder/interpolate.cc
@ -33,12 +33,12 @@ class Callback {
      pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
      probs_[order_minus_1 + 1] = pay.complete.prob;
      pay.complete.prob = log10(pay.complete.prob);
-      // TODO: this is a hack to skip n-grams that don't appear as context.  Pruning will require some different handling.  
-      if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS) {
+      // TODO: this is a hack to skip n-grams that don't appear as context.  Pruning will require some different handling.
+      if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS && backoffs_[order_minus_1].Get()) { // check valid pointer at tht end
        pay.complete.backoff = log10(*static_cast<const float*>(backoffs_[order_minus_1].Get()));
        ++backoffs_[order_minus_1];
      } else {
-        // Not a context.  
+        // Not a context.
        pay.complete.backoff = 0.0;
      }
    }
@ -52,7 +52,7 @@ class Callback {
 };
 } // namespace

-Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs) 
+Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs)
  : uniform_prob_(1.0 / static_cast<float>(unigram_count - 1)), backoffs_(backoffs) {}

 // perform order-wise interpolation
--- a/lm/config.cc
+++ b/lm/config.cc
@ -11,7 +11,11 @@ Config::Config() :
  enumerate_vocab(NULL),
  unknown_missing(COMPLAIN),
  sentence_marker_missing(THROW_UP),
+#if defined(_WIN32) || defined(_WIN64)
+  positive_log_probability(SILENT),
+#else
  positive_log_probability(THROW_UP),
+#endif
  unknown_missing_logprob(-100.0),
  probing_multiplier(1.5),
  building_memory(1073741824ULL), // 1 GB
--- a/lm/filter/arpa_io.hh
+++ b/lm/filter/arpa_io.hh
@ -14,7 +14,10 @@
 #include <string>
 #include <vector>

+#if !defined __MINGW32__
 #include <err.h>
+#endif
+
 #include <string.h>
 #include <stdint.h>

--- a/lm/filter/count_io.hh
+++ b/lm/filter/count_io.hh
@ -5,7 +5,9 @@
 #include <iostream>
 #include <string>

+#if !defined __MINGW32__
 #include <err.h>
+#endif

 #include "util/file_piece.hh"

@ -17,7 +19,12 @@ class CountOutput : boost::noncopyable {

    void AddNGram(const StringPiece &line) {
      if (!(file_ << line << '\n')) {
+#if defined __MINGW32__
+        std::cerr<<"Writing counts file failed"<<std::endl;
+        exit(3);
+#else
        err(3, "Writing counts file failed");
+#endif
      }
    }

@ -35,7 +42,7 @@ class CountOutput : boost::noncopyable {

 class CountBatch {
  public:
-    explicit CountBatch(std::streamsize initial_read) 
+    explicit CountBatch(std::streamsize initial_read)
      : initial_read_(initial_read) {
      buffer_.reserve(initial_read);
    }
@ -68,7 +75,7 @@ class CountBatch {
  private:
    std::streamsize initial_read_;

-    // This could have been a std::string but that's less happy with raw writes.  
+    // This could have been a std::string but that's less happy with raw writes.
    std::vector<char> buffer_;
 };

--- a/lm/filter/filter_main.cc
+++ b/lm/filter/filter_main.cc
@ -57,7 +57,7 @@ typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} Fil
 typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;

 struct Config {
-  Config() : 
+  Config() :
 #ifndef NTHREAD
  batch_size(25000),
  threads(boost::thread::hardware_concurrency()),
@ -202,7 +202,7 @@ int main(int argc, char *argv[]) {
      return 1;
    }
  }
-  
+
  if (config.mode == lm::MODE_UNSET) {
    lm::DisplayHelp(argv[0]);
    return 1;
@ -221,7 +221,12 @@ int main(int argc, char *argv[]) {
  } else if (!strncmp(cmd_input, "model:", 6)) {
    cmd_input += 6;
  } else if (strchr(cmd_input, ':')) {
+#if defined __MINGW32__
+    std::cerr << "Specify vocab: or model: before the input file name, not " << cmd_input << std::endl;
+    exit(1);
+#else
    errx(1, "Specify vocab: or model: before the input file name, not \"%s\"", cmd_input);
+#endif // defined
  } else {
    std::cerr << "Assuming that " << cmd_input << " is a model file" << std::endl;
  }
@ -232,7 +237,12 @@ int main(int argc, char *argv[]) {
  } else {
    cmd_file.open(cmd_input, std::ios::in);
    if (!cmd_file) {
+#if defined __MINGW32__
+      std::cerr << "Could not open input file " << cmd_input << std::endl;
+      exit(2);
+#else
      err(2, "Could not open input file %s", cmd_input);
+#endif // defined
    }
    vocab = &cmd_file;
  }
--- a/lm/filter/vocab.cc
+++ b/lm/filter/vocab.cc
@ -4,7 +4,10 @@
 #include <iostream>

 #include <ctype.h>
+
+#if !defined __MINGW32__
 #include <err.h>
+#endif

 namespace lm {
 namespace vocab {
@ -31,7 +34,7 @@ bool IsLineEnd(std::istream &in) {
 }// namespace

 // Read space separated words in enter separated lines.  These lines can be
-// very long, so don't read an entire line at a time.  
+// very long, so don't read an entire line at a time.
 unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out) {
  in.exceptions(std::istream::badbit);
  unsigned int sentence = 0;
--- a/mert/BleuDocScorer.cpp
+++ b/mert/BleuDocScorer.cpp
@ -18,6 +18,12 @@

 using namespace std;

+#if defined __MINGW32__
+#ifndef uint
+#define uint uint16_t
+#endif // uint
+#endif // if
+
 namespace
 {

--- a/mert/PreProcessFilter.cpp
+++ b/mert/PreProcessFilter.cpp
@ -34,6 +34,9 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
  : m_toFilter(NULL),
    m_fromFilter(NULL)
 {
+#if defined __MINGW32__
+    //TODO(jie): replace this function with boost implementation
+#else
  // Child error signal install
  // sigaction is the replacement for the traditional signal() method
  struct sigaction action;
@ -119,6 +122,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
    perror("Error: fork failed");
    exit(EXIT_FAILURE);
  }
+#endif // defined
 }

 string PreProcessFilter::ProcessSentence(const string& sentence)
--- a/mert/Timer.cpp
+++ b/mert/Timer.cpp
@ -6,10 +6,14 @@
 #include <sys/time.h>
 #endif

+#if defined __MINGW32__
+#include <sys/time.h>
+#endif // defined
+
 namespace
 {

-#if !defined(_WIN32) && !defined(_WIN64)
+#if (!defined(_WIN32) && !defined(_WIN64)) || defined __MINGW32__
 uint64_t GetMicroSeconds(const struct timeval& tv)
 {
  return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
--- a/mert/evaluator.cpp
+++ b/mert/evaluator.cpp
@ -6,6 +6,10 @@
 #include <getopt.h>
 #include <math.h>

+#if defined __MINGW32__
+#include <time.h>
+#endif // defined
+
 #include "Scorer.h"
 #include "ScorerFactory.h"
 #include "Timer.h"
--- a/mingw/Makefile
+++ b/mingw/Makefile
@ -0,0 +1,264 @@
+	
+# Environment
+MKDIR=C:\MinGW\msys\1.0\bin\mkdir.exe
+CP=C:\MinGW\msys\1.0\bin\cp.exe
+GREP=C:\MinGW\msys\1.0\bin\grep.exe
+SED=C:\MinGW\msys\1.0\bin\sed.exe
+RM=C:\MinGW\msys\1.0\bin\rm.exe
+NM=nm
+CCADMIN=CCadmin
+RANLIB=ranlib
+CC=gcc
+CCC=g++
+CXX=g++
+FC=gfortran
+AS=as
+AR=ar
+
+# Macros
+CND_PLATFORM=MinGW-Windows
+CND_DLIB_EXT=dll
+CND_CONF=Debug
+CND_DISTDIR=dist
+CND_BUILDDIR=build
+
+# Object Directory
+OBJECTDIR=${CND_BUILDDIR}\${CND_CONF}\${CND_PLATFORM}
+
+# C Compiler Flags
+CFLAGS=
+
+# CC Compiler Flags
+CCFLAGS=
+CXXFLAGS=-g -Wall -DKENLM_MAX_ORDER=6 -DMAX_NUM_FACTORS=4 -D_WIN32 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I. -ID:\work\moses\dependencies\boost_1_54_0_install\include
+# -DWITH_THREADS
+
+# Fortran Compiler Flags
+FFLAGS=
+
+# Assembler Flags
+ASFLAGS=
+
+# Link Libraries and Options
+LDLIBSOPTIONS=-LD:\work\moses\dependencies\boost_1_54_0_install\lib
+
+.PHONY : all
+all: ${OBJECTDIR}\lmplz.exe ${OBJECTDIR}\moses-cmd.exe ${OBJECTDIR}\build_binary.exe \
+	${OBJECTDIR}\moses-chart-cmd.exe ${OBJECTDIR}\processPhraseTable.exe \
+	${OBJECTDIR}\processLexicalTable.exe ${OBJECTDIR}\CreateOnDiskPt.exe
+	
+${OBJECTDIR}\\%.d: %.cc
+	${MKDIR} -p ${subst \,/,$(@D)}
+	- ${RM} -f $@
+	${CXX} -MM $(CXXFLAGS) $< > $@.temp 
+	${SED} "1s/${*F}.o[ :]*/${subst \,\\\\,${OBJECTDIR}\$*}.o ${subst \,\\\\,${OBJECTDIR}\$*}.d : /g" < $@.temp > $@
+	- ${RM} -f $@.temp 
+
+${OBJECTDIR}\\%.d: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	- ${RM} -f $@
+	${CXX} -MM $(CXXFLAGS) $< > $@.temp 
+	${SED} "1s/${*F}.o[ :]*/${subst \,\\\\,${OBJECTDIR}\$*}.o ${subst \,\\\\,${OBJECTDIR}\$*}.d : /g" < $@.temp > $@
+	- ${RM} -f $@.temp 
+
+# libutil
+UTIL_CC_SLASH = ${wildcard util/*.cc util/stream/*.cc util/double-conversion/*.cc}
+UTIL_CC = ${subst /,\,${UTIL_CC_SLASH}}
+UTIL_O = ${addprefix ${OBJECTDIR}\,${UTIL_CC:%.cc=%.o}}
+sinclude ${UTIL_O:%.o=%.d}
+	
+${UTIL_O}: ${OBJECTDIR}\\%.o: %.cc
+	${MKDIR} -p ${subst \,/,$(@D)}
+	$(CXX) $(CXXFLAGS) -c $< -o $@
+
+${OBJECTDIR}\libutil.a: ${UTIL_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${AR} -r -s ${OBJECTDIR}\libutil.a ${UTIL_O}
+
+# libkenlm
+KENLM_CC_ALL_SLASH = ${wildcard lm/*.cc lm/builder/*.cc lm/filter/*.cc}
+KENLM_CC_ALL = ${subst /,\,${KENLM_CC_ALL_SLASH}}
+KENLM_CC = ${filter-out %main.cc %test.cc,${KENLM_CC_ALL}}
+KENLM_O = ${addprefix ${OBJECTDIR}\,${KENLM_CC:%.cc=%.o}}
+sinclude ${KENLM_O:%.o=%.d}
+
+${KENLM_O}: ${OBJECTDIR}\\%.o: %.cc
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+	
+${OBJECTDIR}\libkenlm.a: ${KENLM_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${AR} -r -s ${OBJECTDIR}\libkenlm.a ${KENLM_O}
+	
+#lmplz
+sinclude ${OBJECTDIR}\lm\builder\lmplz_main.d
+
+${OBJECTDIR}\lm\builder\lmplz_main.o: ${OBJECTDIR}\\%.o: %.cc
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+${OBJECTDIR}\lmplz.exe: ${OBJECTDIR}\lm\builder\lmplz_main.o ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libutil.a 
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\lm\builder\lmplz_main.o \
+		-lkenlm -lutil \
+		-lboost_system-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_thread-mt \
+		-o ${OBJECTDIR}\lmplz.exe
+
+#build_binary
+sinclude ${OBJECTDIR}\lm\build_binary_main.d
+
+${OBJECTDIR}\lm\build_binary_main.o: ${OBJECTDIR}\\%.o: %.cc
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+${OBJECTDIR}\build_binary.exe: ${OBJECTDIR}\lm\build_binary_main.o ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libutil.a
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\lm\build_binary_main.o \
+		-lkenlm -lutil \
+		-lboost_system-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_thread-mt \
+		-o ${OBJECTDIR}\build_binary.exe 
+
+#libondiskpt
+ONDISKPT_CC = ${addprefix OnDiskPt\, OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp OnDiskQuery.cpp}
+ONDISKPT_O = ${addprefix ${OBJECTDIR}\,${ONDISKPT_CC:%.cpp=%.o}}
+sinclude ${ONDISKPT_O:%.o=%.d}
+	
+${ONDISKPT_O}: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+${OBJECTDIR}\libondiskpt.a: ${ONDISKPT_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${AR} -r -s ${OBJECTDIR}\libondiskpt.a ${ONDISKPT_O}
+	
+#libmoses
+MOSES_CC_ALL_SLASH = ${wildcard moses/*.cpp moses/TranslationModel/*.cpp \
+			moses/TranslationModel/fuzzy-match/*.cpp \
+			moses/TranslationModel/DynSAInclude/*.cpp \
+			moses/TranslationModel/RuleTable/*.cpp \
+			moses/TranslationModel/Scope3Parser/*.cpp \
+			moses/TranslationModel/CYKPlusParser/*.cpp \
+			moses/FF/*.cpp \
+			moses/FF/OSM-Feature/*.cpp \
+			moses/FF/LexicalReordering/*.cpp\
+			moses/TranslationModel/CompactPT/*.cpp}
+#lm
+MOSES_CC_ALL_SLASH += ${addprefix moses\LM\, Backward.cpp BackwardLMState.cpp Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp SingleFactor.cpp SkeletonLM.cpp}
+MOSES_CC_ALL = ${subst /,\,${MOSES_CC_ALL_SLASH}}
+MOSES_CC = ${filter-out %Test.cpp moses\Mock%.cpp,${MOSES_CC_ALL}}
+MOSES_O = ${addprefix ${OBJECTDIR}\,${MOSES_CC:%.cpp=%.o}}
+sinclude ${MOSES_O:%.o=%.d}
+	
+${MOSES_O}: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+SEARCH_CC = ${addprefix search\, edge_generator.cc nbest.cc rule.cc vertex.cc}
+SEARCH_O = ${addprefix ${OBJECTDIR}\,${SEARCH_CC:%.cc=%.o}}
+sinclude ${SEARCH_O:%.o=%.d}
+	
+${SEARCH_O}: ${OBJECTDIR}\\%.o: %.cc
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+${OBJECTDIR}\libmoses.a: ${MOSES_O} ${SEARCH_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${AR} -r -s ${OBJECTDIR}\libmoses.a ${MOSES_O} ${SEARCH_O}
+	
+#libmosescmd
+MOSESCMD_CC = ${addprefix moses-cmd\, IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp}
+MOSESCMD_O = ${addprefix ${OBJECTDIR}\,${MOSESCMD_CC:%.cpp=%.o}}
+sinclude ${MOSESCMD_O:%.o=%.d}
+	
+${MOSESCMD_O}: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+#moses-cmd
+sinclude ${OBJECTDIR}\moses-cmd\Main.d
+
+${OBJECTDIR}\moses-cmd\Main.o: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+	
+${OBJECTDIR}\moses-cmd.exe: ${OBJECTDIR}\libmoses.a \
+				${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libondiskpt.a \
+				${OBJECTDIR}\libutil.a ${OBJECTDIR}\moses-cmd\Main.o ${MOSESCMD_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\moses-cmd\Main.o ${MOSESCMD_O} \
+		-lmoses	-lkenlm -londiskpt -lutil \
+		-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
+		-lz -lbz2 -ldl -lmman \
+		-o ${OBJECTDIR}\moses-cmd.exe 
+
+#moseschartcmd
+MOSESCHARTCMD_CC = ${addprefix moses-chart-cmd\, Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp}
+MOSESCHARTCMD_O = ${addprefix ${OBJECTDIR}\,${MOSESCHARTCMD_CC:%.cpp=%.o}}
+sinclude ${MOSESCHARTCMD_O:%.o=%.d}
+	
+${MOSESCHARTCMD_O}: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+	
+${OBJECTDIR}\moses-chart-cmd.exe: ${OBJECTDIR}\libmoses.a \
+				${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libondiskpt.a \
+				${OBJECTDIR}\libutil.a ${MOSESCHARTCMD_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${MOSESCHARTCMD_O} \
+		-lmoses	-lkenlm -londiskpt -lutil \
+		-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
+		-lz -lbz2 -ldl -lmman \
+		-o ${OBJECTDIR}\moses-chart-cmd.exe 
+	
+#processPhraseTable
+PROCESSPHRASETABLE_CC = ${addprefix misc\, GenerateTuples.cpp  processPhraseTable.cpp}
+PROCESSPHRASETABLE_O = ${addprefix ${OBJECTDIR}\,${PROCESSPHRASETABLE_CC:%.cpp=%.o}}
+sinclude ${PROCESSPHRASETABLE_O:%.o=%.d}
+	
+${PROCESSPHRASETABLE_O}: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+	
+${OBJECTDIR}\processPhraseTable.exe: ${OBJECTDIR}\libmoses.a ${OBJECTDIR}\libutil.a ${PROCESSPHRASETABLE_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${PROCESSPHRASETABLE_O} \
+		-lmoses	-lutil \
+		-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
+		-lz -lbz2 -ldl -lmman \
+		-o ${OBJECTDIR}\processPhraseTable.exe 
+	
+#processLexicalTable
+PROCESSLEXTABLE_CC = ${addprefix misc\, processLexicalTable.cpp}
+PROCESSLEXTABLE_O = ${addprefix ${OBJECTDIR}\,${PROCESSLEXTABLE_CC:%.cpp=%.o}}
+sinclude ${PROCESSLEXTABLE_O:%.o=%.d}
+	
+${PROCESSLEXTABLE_O}: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+	
+${OBJECTDIR}\processLexicalTable.exe: ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libmoses.a ${OBJECTDIR}\libutil.a ${OBJECTDIR}\libondiskpt.a ${PROCESSLEXTABLE_O}
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${PROCESSLEXTABLE_O} \
+		-lmoses	-lkenlm -londiskpt -lutil \
+		-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
+		-lz -lbz2 -ldl -lmman \
+		-o ${OBJECTDIR}\processLexicalTable.exe 
+	
+#ondiskpt
+sinclude ${OBJECTDIR}\OnDiskPt\Main.d
+
+${OBJECTDIR}\OnDiskPt\Main.o: ${OBJECTDIR}\\%.o: %.cpp
+	${MKDIR} -p ${subst \,/,$(@D)}
+	${CXX} ${CXXFLAGS} -c $< -o $@
+
+${OBJECTDIR}\CreateOnDiskPt.exe: ${OBJECTDIR}\OnDiskPt\Main.o ${OBJECTDIR}\libondiskpt.a ${OBJECTDIR}\libmoses.a ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libutil.a
+	${MKDIR} -p ${subst \,/,${OBJECTDIR}}
+	${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\OnDiskPt\Main.o \
+		-londiskpt -lmoses -lkenlm -lutil \
+		-lboost_system-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_thread-mt \
+		-lz -lbz2 -ldl -lmman \
+		-o ${OBJECTDIR}\CreateOnDiskPt.exe 
+
+.PHONY : clean
+clean: 
+
--- a/moses-cmd/Main.cpp
+++ b/moses-cmd/Main.cpp
@ -571,7 +571,7 @@ int main(int argc, char** argv)
 #ifdef HAVE_PROTOBUF
    GOOGLE_PROTOBUF_VERIFY_VERSION;
 #endif
-
+    
    // echo command line, if verbose
    IFVERBOSE(1) {
      TRACE_ERR("command: ");
--- a/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
+++ b/moses/FF/LexicalReordering/LexicalReorderingTable.cpp
@ -8,7 +8,7 @@
 #include "moses/TargetPhrase.h"
 #include "moses/TargetPhraseCollection.h"

-#ifdef HAVE_CMPH
+#if !defined WIN32 || defined __MINGW32__ || defined HAVE_CMPH
 #include "moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
 #endif

--- a/moses/FeatureVector.cpp
+++ b/moses/FeatureVector.cpp
@ -25,6 +25,10 @@
 #include <sstream>
 #include <stdexcept>

+#if defined __MINGW32__ && defined WITH_THREADS
+#include <boost/thread/locks.hpp>
+#endif // WITH_THREADS
+
 #include "FeatureVector.h"
 #include "util/string_piece_hash.hh"

--- a/moses/File.h
+++ b/moses/File.h
@ -20,9 +20,15 @@ namespace Moses
 {

 #ifdef WIN32
+#ifdef __MINGW32__
+#define OFF_T __int64
+#define FTELLO(f) ftello64(f)
+#define FSEEKO(file, offset, origin) fseeko64(file, offset, origin)
+#else
 #define OFF_T __int64
 #define FTELLO(file) _ftelli64(file)
 #define FSEEKO(file, offset, origin) _fseeki64(file, offset, origin)
+#endif

 #else
 #define OFF_T off_t
--- a/moses/Manager.cpp
+++ b/moses/Manager.cpp
@ -405,13 +405,13 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
      //cerr << endl;

      //draw the sample
-      float random = log((float)rand()/RAND_MAX);
+      float frandom = log((float)rand()/RAND_MAX);
      size_t position = 1;
      float sum = candidateScores[0];
-      for (; position < candidateScores.size() && sum < random; ++position) {
+      for (; position < candidateScores.size() && sum < frandom; ++position) {
        sum = log_sum(sum,candidateScores[position]);
      }
-      //cerr << "Random: " << random << " Chose " << position-1 << endl;
+      //cerr << "Random: " << frandom << " Chose " << position-1 << endl;
      const Hypothesis* chosen =  candidates[position-1];
      path.push_back(chosen);
    }
--- a/moses/TranslationModel/CompactPT/BlockHashIndex.h
+++ b/moses/TranslationModel/CompactPT/BlockHashIndex.h
@ -37,6 +37,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA

 #ifdef WITH_THREADS
 #include "moses/ThreadPool.h"
+#else
+#include <time.h>
 #endif

 namespace Moses
--- a/moses/TranslationModel/CompactPT/MmapAllocator.h
+++ b/moses/TranslationModel/CompactPT/MmapAllocator.h
@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <cstdio>
 #include <unistd.h>

+#ifndef __MMAN_PAGE_SIZE__
+#define __MMAN_PAGE_SIZE__ sysconf(_SC_PAGE_SIZE)
+#endif
+
 namespace Moses
 {
 template <class T>
@ -56,25 +60,25 @@ public:

  MmapAllocator() throw()
    : m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)),
-      m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+      m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
      m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
  }

  MmapAllocator(std::FILE* f_ptr) throw()
    : m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
-      m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+      m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
      m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
  }

  MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw()
    : m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
-      m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+      m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
      m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0)) {
  }

  MmapAllocator(std::string fileName) throw()
    : m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(fileno(m_file_ptr)),
-      m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
+      m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
      m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
  }

--- a/moses/TranslationModel/DynSAInclude/types.h
+++ b/moses/TranslationModel/DynSAInclude/types.h
@ -8,7 +8,7 @@
 #include <typeinfo>
 #include <stdint.h>

-#ifdef WIN32
+#if defined WIN32 && !defined __MINGW32__
 #define iterate(c, i) for(decltype(c.begin()) i = c.begin(); i != c.end(); ++i)
 #define piterate(c, i) for(decltype(c->begin()) i = c->begin(); i != c->end(); ++i)
 #define riterate(c, i) for(decltype(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)
--- a/moses/TranslationModel/PhraseDictionary.h
+++ b/moses/TranslationModel/PhraseDictionary.h
@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 #include <boost/thread/tss.hpp>
 #else
 #include <boost/scoped_ptr.hpp>
+#include <time.h>
 #endif

 #include "moses/Phrase.h"
--- a/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
+++ b/moses/TranslationModel/RuleTable/PhraseDictionaryFuzzyMatch.cpp
@ -49,6 +49,32 @@

 using namespace std;

+#if defined __MINGW32__ && !defined mkdtemp
+#include <windows.h>
+#include <errno.h>
+char *mkdtemp(char *tempbuf) {
+  int rand_value = 0;
+  char* tempbase = NULL;
+  char tempbasebuf[MAX_PATH] = "";
+
+  if (strcmp(&tempbuf[strlen(tempbuf)-6], "XXXXXX")) {
+    errno = EINVAL;
+    return NULL;
+  }
+
+  srand((unsigned)time(0));
+  rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
+  tempbase = strrchr(tempbuf, '/');
+  tempbase = tempbase ? tempbase+1 : tempbuf;
+  strcpy(tempbasebuf, tempbase);
+  sprintf(&tempbasebuf[strlen(tempbasebuf)-6], "%d", rand_value);
+  ::GetTempPath(MAX_PATH, tempbuf);
+  strcat(tempbuf, tempbasebuf);
+  ::CreateDirectory(tempbuf, NULL);
+  return tempbuf;
+}
+#endif
+
 namespace Moses
 {

@ -96,6 +122,9 @@ SetParameter(const std::string& key, const std::string& value)

 int removedirectoryrecursively(const char *dirname)
 {
+#if defined __MINGW32__
+    //TODO(jie): replace this function with boost implementation
+#else
  DIR *dir;
  struct dirent *entry;
  char path[PATH_MAX];
@ -141,13 +170,17 @@ int removedirectoryrecursively(const char *dirname)
   * printing here, see above)
   */
  //printf("(not really) Deleting: %s\n", dirname);
-
+#endif
  return 1;
 }

 void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
 {
+#if defined __MINGW32__
+  char dirName[] = "moses.XXXXXX";
+#else
  char dirName[] = "/tmp/moses.XXXXXX";
+#endif // defined
  char *temp = mkdtemp(dirName);
  UTIL_THROW_IF2(temp == NULL,
 		  "Couldn't create temporary directory " << dirName);
--- a/scripts/ems/example/config.basic
+++ b/scripts/ems/example/config.basic
@ -369,7 +369,7 @@ alignment-symmetrization-method = grow-diag-final-and

 ### create a bilingual concordancer for the model
 #
-#biconcor = $moses-script-dir/ems/biconcor/biconcor
+#biconcor = $moses-bin-dir/biconcor

 ############################################################

--- a/scripts/ems/example/config.factored
+++ b/scripts/ems/example/config.factored
@ -389,7 +389,7 @@ alignment-symmetrization-method = grow-diag-final-and

 ### create a bilingual concordancer for the model
 #
-#biconcor = $moses-script-dir/ems/biconcor/biconcor
+#biconcor = $moses-bin-dir/biconcor

 ### lexicalized reordering: specify orientation type
 # (default: only distance-based reordering model)
--- a/scripts/ems/example/config.hierarchical
+++ b/scripts/ems/example/config.hierarchical
@ -369,7 +369,7 @@ alignment-symmetrization-method = grow-diag-final-and

 ### create a bilingual concordancer for the model
 #
-#biconcor = $moses-script-dir/ems/biconcor/biconcor
+#biconcor = $moses-bin-dir/biconcor

 ### lexicalized reordering: specify orientation type
 # (default: only distance-based reordering model)
--- a/scripts/ems/example/config.syntax
+++ b/scripts/ems/example/config.syntax
@ -373,7 +373,7 @@ alignment-symmetrization-method = grow-diag-final-and

 ### create a bilingual concordancer for the model
 #
-#biconcor = $moses-script-dir/ems/biconcor/biconcor
+#biconcor = $moses-bin-dir/biconcor

 ### lexicalized reordering: specify orientation type
 # (default: only distance-based reordering model)
--- a/scripts/ems/example/config.toy
+++ b/scripts/ems/example/config.toy
@ -353,7 +353,7 @@ alignment-symmetrization-method = grow-diag-final-and

 ### create a bilingual concordancer for the model
 #
-#biconcor = $moses-script-dir/ems/biconcor/biconcor
+#biconcor = $moses-bin-dir/biconcor

 ############################################################

--- a/util/file.cc
+++ b/util/file.cc
@ -17,7 +17,10 @@
 #include <fcntl.h>
 #include <stdint.h>

-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+#include <windows.h>
+#include <unistd.h>
+#elif defined(_WIN32) || defined(_WIN64)
 #include <windows.h>
 #include <io.h>
 #include <algorithm>
@ -76,7 +79,12 @@ int CreateOrThrow(const char *name) {
 }

 uint64_t SizeFile(int fd) {
-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+  struct stat sb;
+  int ret = fstat(fd, &sb);
+  if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
+  return sb.st_size;
+#elif defined(_WIN32) || defined(_WIN64)
  __int64 ret = _filelengthi64(fd);
  return (ret == -1) ? kBadSize : ret;
 #else // Not windows.
@ -100,7 +108,9 @@ uint64_t SizeOrThrow(int fd) {
 }

 void ResizeOrThrow(int fd, uint64_t to) {
-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+    int ret = ftruncate
+#elif defined(_WIN32) || defined(_WIN64)
    errno_t ret = _chsize_s
 #elif defined(OS_ANDROID)
    int ret = ftruncate64
@ -162,7 +172,7 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
 void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
  uint8_t *to = static_cast<uint8_t*>(to_void);
 #if defined(_WIN32) || defined(_WIN64)
-  UTIL_THROW(Exception, "This pread implementation for windows is broken.  Please send me a patch that does not change the file pointer.  Atomically.  Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
+  //UTIL_THROW(Exception, "This pread implementation for windows is broken.  Please send me a patch that does not change the file pointer.  Atomically.  Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
  const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
 #endif
  for (;size ;) {
@ -251,7 +261,9 @@ typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
 // Can't we all just get along?
 void InternalSeek(int fd, int64_t off, int whence) {
  if (
-#if defined(_WIN32) || defined(_WIN64)
+#if defined __MINGW32__
+    (off_t)-1 == lseek(fd, off, whence)
+#elif defined(_WIN32) || defined(_WIN64)
    (__int64)-1 == _lseeki64(fd, off, whence)
 #elif defined(OS_ANDROID)
    (off64_t)-1 == lseek64(fd, off, whence)
--- a/util/file_piece_test.cc
+++ b/util/file_piece_test.cc
@ -1,4 +1,4 @@
-// Tests might fail if you have creative characters in your path.  Sue me.  
+// Tests might fail if you have creative characters in your path.  Sue me.
 #include "util/file_piece.hh"

 #include "util/file.hh"
@ -55,7 +55,7 @@ BOOST_AUTO_TEST_CASE(MMapReadLine) {

 #if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
 /* Apple isn't happy with the popen, fileno, dup.  And I don't want to
- * reimplement popen.  This is an issue with the test.  
+ * reimplement popen.  This is an issue with the test.
 */
 /* read() implementation */
 BOOST_AUTO_TEST_CASE(StreamReadLine) {
@ -67,7 +67,7 @@ BOOST_AUTO_TEST_CASE(StreamReadLine) {

  FILE *catter = popen(popen_args.c_str(), "r");
  BOOST_REQUIRE(catter);
-  
+
  FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
  std::string ref_line;
  while (getline(ref, ref_line)) {
@ -107,8 +107,8 @@ BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
 }

 // gzip stream.  Apple doesn't like popen, fileno, dup.  This is an issue with
-// the test.  
-#ifndef __APPLE__
+// the test.
+#if !defined __APPLE__ && !defined __MINGW32__
 BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
  std::fstream ref(FileLocation().c_str(), std::ios::in);

@ -117,7 +117,7 @@ BOOST_AUTO_TEST_CASE(StreamZipReadLine) {

  FILE * catter = popen(command.c_str(), "r");
  BOOST_REQUIRE(catter);
-  
+
  FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
  std::string ref_line;
  while (getline(ref, ref_line)) {
--- a/util/read_compressed_test.cc
+++ b/util/read_compressed_test.cc
@ -12,6 +12,22 @@

 #include <stdlib.h>

+#if defined __MINGW32__
+#include <time.h>
+#include <fcntl.h>
+
+#if !defined mkstemp
+int mkstemp(char * stemplate)
+{
+    char *filename = mktemp(stemplate);
+    if (filename == NULL)
+        return -1;
+    return open(filename, O_RDWR | O_CREAT, 0600);
+}
+#endif
+
+#endif // defined
+
 namespace util {
 namespace {