Merge branch 'master' into hieu

This commit is contained in:
Hieu Hoang 2014-01-16 17:46:21 +00:00
commit 2c66ac995b
31 changed files with 441 additions and 50 deletions

6
.gitignore vendored
View File

@ -58,7 +58,6 @@ scripts/training/phrase-extract/relax-parse
scripts/training/phrase-extract/score
scripts/training/phrase-extract/statistics
scripts/training/symal/symal
scripts/training/train-model.perl
dist
bin
previous.sh
@ -72,3 +71,8 @@ mert/sentence-bleu
.DS_Store
*.pbxuser
*.mode1v3
*.exe
build/
nbproject/

View File

@ -87,7 +87,7 @@ class VocabHandout {
Table table_;
std::size_t double_cutoff_;
util::FakeOFStream word_list_;
};
@ -98,7 +98,7 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
std::size_t operator()(const WordIndex *start) const {
return util::MurmurHashNative(start, size_);
}
private:
const std::size_t size_;
};
@ -106,11 +106,11 @@ class DedupeHash : public std::unary_function<const WordIndex *, bool> {
class DedupeEquals : public std::binary_function<const WordIndex *, const WordIndex *, bool> {
public:
explicit DedupeEquals(std::size_t order) : size_(order * sizeof(WordIndex)) {}
bool operator()(const WordIndex *first, const WordIndex *second) const {
return !memcmp(first, second, size_);
}
}
private:
const std::size_t size_;
};
@ -131,7 +131,7 @@ typedef util::ProbingHashTable<DedupeEntry, DedupeHash, DedupeEquals> Dedupe;
class Writer {
public:
Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
Writer(std::size_t order, const util::stream::ChainPosition &position, void *dedupe_mem, std::size_t dedupe_mem_size)
: block_(position), gram_(block_->Get(), order),
dedupe_invalid_(order, std::numeric_limits<WordIndex>::max()),
dedupe_(dedupe_mem, dedupe_mem_size, &dedupe_invalid_[0], DedupeHash(order), DedupeEquals(order)),
@ -140,7 +140,7 @@ class Writer {
dedupe_.Clear();
assert(Dedupe::Size(position.GetChain().BlockSize() / position.GetChain().EntrySize(), kProbingMultiplier) == dedupe_mem_size);
if (order == 1) {
// Add special words. AdjustCounts is responsible if order != 1.
// Add special words. AdjustCounts is responsible if order != 1.
AddUnigramWord(kUNK);
AddUnigramWord(kBOS);
}
@ -170,16 +170,16 @@ class Writer {
memmove(gram_.begin(), gram_.begin() + 1, sizeof(WordIndex) * (gram_.Order() - 1));
return;
}
// Complete the write.
// Complete the write.
gram_.Count() = 1;
// Prepare the next n-gram.
// Prepare the next n-gram.
if (reinterpret_cast<uint8_t*>(gram_.begin()) + gram_.TotalSize() != static_cast<uint8_t*>(block_->Get()) + block_size_) {
NGram last(gram_);
gram_.NextInMemory();
std::copy(last.begin() + 1, last.end(), gram_.begin());
return;
}
// Block end. Need to store the context in a temporary buffer.
// Block end. Need to store the context in a temporary buffer.
std::copy(gram_.begin() + 1, gram_.end(), buffer_.get());
dedupe_.Clear();
block_->SetValidSize(block_size_);
@ -207,7 +207,7 @@ class Writer {
// Hash table combiner implementation.
Dedupe dedupe_;
// Small buffer to hold existing ngrams when shifting across a block boundary.
// Small buffer to hold existing ngrams when shifting across a block boundary.
boost::scoped_array<WordIndex> buffer_;
const std::size_t block_size_;
@ -223,7 +223,7 @@ std::size_t CorpusCount::VocabUsage(std::size_t vocab_estimate) {
return VocabHandout::MemUsage(vocab_estimate);
}
CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block)
CorpusCount::CorpusCount(util::FilePiece &from, int vocab_write, uint64_t &token_count, WordIndex &type_count, std::size_t entries_per_block)
: from_(from), vocab_write_(vocab_write), token_count_(token_count), type_count_(type_count),
dedupe_mem_size_(Dedupe::Size(entries_per_block, kProbingMultiplier)),
dedupe_mem_(util::MallocOrThrow(dedupe_mem_size_)) {

View File

@ -33,12 +33,12 @@ class Callback {
pay.complete.prob = pay.uninterp.prob + pay.uninterp.gamma * probs_[order_minus_1];
probs_[order_minus_1 + 1] = pay.complete.prob;
pay.complete.prob = log10(pay.complete.prob);
// TODO: this is a hack to skip n-grams that don't appear as context. Pruning will require some different handling.
if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS) {
// TODO: this is a hack to skip n-grams that don't appear as context. Pruning will require some different handling.
if (order_minus_1 < backoffs_.size() && *(gram.end() - 1) != kUNK && *(gram.end() - 1) != kEOS && backoffs_[order_minus_1].Get()) { // check valid pointer at tht end
pay.complete.backoff = log10(*static_cast<const float*>(backoffs_[order_minus_1].Get()));
++backoffs_[order_minus_1];
} else {
// Not a context.
// Not a context.
pay.complete.backoff = 0.0;
}
}
@ -52,7 +52,7 @@ class Callback {
};
} // namespace
Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs)
Interpolate::Interpolate(uint64_t unigram_count, const ChainPositions &backoffs)
: uniform_prob_(1.0 / static_cast<float>(unigram_count - 1)), backoffs_(backoffs) {}
// perform order-wise interpolation

View File

@ -11,7 +11,11 @@ Config::Config() :
enumerate_vocab(NULL),
unknown_missing(COMPLAIN),
sentence_marker_missing(THROW_UP),
#if defined(_WIN32) || defined(_WIN64)
positive_log_probability(SILENT),
#else
positive_log_probability(THROW_UP),
#endif
unknown_missing_logprob(-100.0),
probing_multiplier(1.5),
building_memory(1073741824ULL), // 1 GB

View File

@ -14,7 +14,10 @@
#include <string>
#include <vector>
#if !defined __MINGW32__
#include <err.h>
#endif
#include <string.h>
#include <stdint.h>

View File

@ -5,7 +5,9 @@
#include <iostream>
#include <string>
#if !defined __MINGW32__
#include <err.h>
#endif
#include "util/file_piece.hh"
@ -17,7 +19,12 @@ class CountOutput : boost::noncopyable {
void AddNGram(const StringPiece &line) {
if (!(file_ << line << '\n')) {
#if defined __MINGW32__
std::cerr<<"Writing counts file failed"<<std::endl;
exit(3);
#else
err(3, "Writing counts file failed");
#endif
}
}
@ -35,7 +42,7 @@ class CountOutput : boost::noncopyable {
class CountBatch {
public:
explicit CountBatch(std::streamsize initial_read)
explicit CountBatch(std::streamsize initial_read)
: initial_read_(initial_read) {
buffer_.reserve(initial_read);
}
@ -68,7 +75,7 @@ class CountBatch {
private:
std::streamsize initial_read_;
// This could have been a std::string but that's less happy with raw writes.
// This could have been a std::string but that's less happy with raw writes.
std::vector<char> buffer_;
};

View File

@ -57,7 +57,7 @@ typedef enum {MODE_COPY, MODE_SINGLE, MODE_MULTIPLE, MODE_UNION, MODE_UNSET} Fil
typedef enum {FORMAT_ARPA, FORMAT_COUNT} Format;
struct Config {
Config() :
Config() :
#ifndef NTHREAD
batch_size(25000),
threads(boost::thread::hardware_concurrency()),
@ -202,7 +202,7 @@ int main(int argc, char *argv[]) {
return 1;
}
}
if (config.mode == lm::MODE_UNSET) {
lm::DisplayHelp(argv[0]);
return 1;
@ -221,7 +221,12 @@ int main(int argc, char *argv[]) {
} else if (!strncmp(cmd_input, "model:", 6)) {
cmd_input += 6;
} else if (strchr(cmd_input, ':')) {
#if defined __MINGW32__
std::cerr << "Specify vocab: or model: before the input file name, not " << cmd_input << std::endl;
exit(1);
#else
errx(1, "Specify vocab: or model: before the input file name, not \"%s\"", cmd_input);
#endif // defined
} else {
std::cerr << "Assuming that " << cmd_input << " is a model file" << std::endl;
}
@ -232,7 +237,12 @@ int main(int argc, char *argv[]) {
} else {
cmd_file.open(cmd_input, std::ios::in);
if (!cmd_file) {
#if defined __MINGW32__
std::cerr << "Could not open input file " << cmd_input << std::endl;
exit(2);
#else
err(2, "Could not open input file %s", cmd_input);
#endif // defined
}
vocab = &cmd_file;
}

View File

@ -4,7 +4,10 @@
#include <iostream>
#include <ctype.h>
#if !defined __MINGW32__
#include <err.h>
#endif
namespace lm {
namespace vocab {
@ -31,7 +34,7 @@ bool IsLineEnd(std::istream &in) {
}// namespace
// Read space separated words in enter separated lines. These lines can be
// very long, so don't read an entire line at a time.
// very long, so don't read an entire line at a time.
unsigned int ReadMultiple(std::istream &in, boost::unordered_map<std::string, std::vector<unsigned int> > &out) {
in.exceptions(std::istream::badbit);
unsigned int sentence = 0;

View File

@ -18,6 +18,12 @@
using namespace std;
#if defined __MINGW32__
#ifndef uint
#define uint uint16_t
#endif // uint
#endif // if
namespace
{

View File

@ -34,6 +34,9 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
: m_toFilter(NULL),
m_fromFilter(NULL)
{
#if defined __MINGW32__
//TODO(jie): replace this function with boost implementation
#else
// Child error signal install
// sigaction is the replacement for the traditional signal() method
struct sigaction action;
@ -119,6 +122,7 @@ PreProcessFilter::PreProcessFilter(const string& filterCommand)
perror("Error: fork failed");
exit(EXIT_FAILURE);
}
#endif // defined
}
string PreProcessFilter::ProcessSentence(const string& sentence)

View File

@ -6,10 +6,14 @@
#include <sys/time.h>
#endif
#if defined __MINGW32__
#include <sys/time.h>
#endif // defined
namespace
{
#if !defined(_WIN32) && !defined(_WIN64)
#if (!defined(_WIN32) && !defined(_WIN64)) || defined __MINGW32__
uint64_t GetMicroSeconds(const struct timeval& tv)
{
return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;

View File

@ -6,6 +6,10 @@
#include <getopt.h>
#include <math.h>
#if defined __MINGW32__
#include <time.h>
#endif // defined
#include "Scorer.h"
#include "ScorerFactory.h"
#include "Timer.h"

264
mingw/Makefile Normal file
View File

@ -0,0 +1,264 @@
# Environment
MKDIR=C:\MinGW\msys\1.0\bin\mkdir.exe
CP=C:\MinGW\msys\1.0\bin\cp.exe
GREP=C:\MinGW\msys\1.0\bin\grep.exe
SED=C:\MinGW\msys\1.0\bin\sed.exe
RM=C:\MinGW\msys\1.0\bin\rm.exe
NM=nm
CCADMIN=CCadmin
RANLIB=ranlib
CC=gcc
CCC=g++
CXX=g++
FC=gfortran
AS=as
AR=ar
# Macros
CND_PLATFORM=MinGW-Windows
CND_DLIB_EXT=dll
CND_CONF=Debug
CND_DISTDIR=dist
CND_BUILDDIR=build
# Object Directory
OBJECTDIR=${CND_BUILDDIR}\${CND_CONF}\${CND_PLATFORM}
# C Compiler Flags
CFLAGS=
# CC Compiler Flags
CCFLAGS=
CXXFLAGS=-g -Wall -DKENLM_MAX_ORDER=6 -DMAX_NUM_FACTORS=4 -D_WIN32 -D_FILE_OFFSET_BITS=64 -D_LARGE_FILES -I. -ID:\work\moses\dependencies\boost_1_54_0_install\include
# -DWITH_THREADS
# Fortran Compiler Flags
FFLAGS=
# Assembler Flags
ASFLAGS=
# Link Libraries and Options
LDLIBSOPTIONS=-LD:\work\moses\dependencies\boost_1_54_0_install\lib
.PHONY : all
all: ${OBJECTDIR}\lmplz.exe ${OBJECTDIR}\moses-cmd.exe ${OBJECTDIR}\build_binary.exe \
${OBJECTDIR}\moses-chart-cmd.exe ${OBJECTDIR}\processPhraseTable.exe \
${OBJECTDIR}\processLexicalTable.exe ${OBJECTDIR}\CreateOnDiskPt.exe
${OBJECTDIR}\\%.d: %.cc
${MKDIR} -p ${subst \,/,$(@D)}
- ${RM} -f $@
${CXX} -MM $(CXXFLAGS) $< > $@.temp
${SED} "1s/${*F}.o[ :]*/${subst \,\\\\,${OBJECTDIR}\$*}.o ${subst \,\\\\,${OBJECTDIR}\$*}.d : /g" < $@.temp > $@
- ${RM} -f $@.temp
${OBJECTDIR}\\%.d: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
- ${RM} -f $@
${CXX} -MM $(CXXFLAGS) $< > $@.temp
${SED} "1s/${*F}.o[ :]*/${subst \,\\\\,${OBJECTDIR}\$*}.o ${subst \,\\\\,${OBJECTDIR}\$*}.d : /g" < $@.temp > $@
- ${RM} -f $@.temp
# libutil
UTIL_CC_SLASH = ${wildcard util/*.cc util/stream/*.cc util/double-conversion/*.cc}
UTIL_CC = ${subst /,\,${UTIL_CC_SLASH}}
UTIL_O = ${addprefix ${OBJECTDIR}\,${UTIL_CC:%.cc=%.o}}
sinclude ${UTIL_O:%.o=%.d}
${UTIL_O}: ${OBJECTDIR}\\%.o: %.cc
${MKDIR} -p ${subst \,/,$(@D)}
$(CXX) $(CXXFLAGS) -c $< -o $@
${OBJECTDIR}\libutil.a: ${UTIL_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${AR} -r -s ${OBJECTDIR}\libutil.a ${UTIL_O}
# libkenlm
KENLM_CC_ALL_SLASH = ${wildcard lm/*.cc lm/builder/*.cc lm/filter/*.cc}
KENLM_CC_ALL = ${subst /,\,${KENLM_CC_ALL_SLASH}}
KENLM_CC = ${filter-out %main.cc %test.cc,${KENLM_CC_ALL}}
KENLM_O = ${addprefix ${OBJECTDIR}\,${KENLM_CC:%.cc=%.o}}
sinclude ${KENLM_O:%.o=%.d}
${KENLM_O}: ${OBJECTDIR}\\%.o: %.cc
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\libkenlm.a: ${KENLM_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${AR} -r -s ${OBJECTDIR}\libkenlm.a ${KENLM_O}
#lmplz
sinclude ${OBJECTDIR}\lm\builder\lmplz_main.d
${OBJECTDIR}\lm\builder\lmplz_main.o: ${OBJECTDIR}\\%.o: %.cc
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\lmplz.exe: ${OBJECTDIR}\lm\builder\lmplz_main.o ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libutil.a
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\lm\builder\lmplz_main.o \
-lkenlm -lutil \
-lboost_system-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_thread-mt \
-o ${OBJECTDIR}\lmplz.exe
#build_binary
sinclude ${OBJECTDIR}\lm\build_binary_main.d
${OBJECTDIR}\lm\build_binary_main.o: ${OBJECTDIR}\\%.o: %.cc
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\build_binary.exe: ${OBJECTDIR}\lm\build_binary_main.o ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libutil.a
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\lm\build_binary_main.o \
-lkenlm -lutil \
-lboost_system-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_thread-mt \
-o ${OBJECTDIR}\build_binary.exe
#libondiskpt
ONDISKPT_CC = ${addprefix OnDiskPt\, OnDiskWrapper.cpp SourcePhrase.cpp TargetPhrase.cpp Word.cpp Phrase.cpp PhraseNode.cpp TargetPhraseCollection.cpp Vocab.cpp OnDiskQuery.cpp}
ONDISKPT_O = ${addprefix ${OBJECTDIR}\,${ONDISKPT_CC:%.cpp=%.o}}
sinclude ${ONDISKPT_O:%.o=%.d}
${ONDISKPT_O}: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\libondiskpt.a: ${ONDISKPT_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${AR} -r -s ${OBJECTDIR}\libondiskpt.a ${ONDISKPT_O}
#libmoses
MOSES_CC_ALL_SLASH = ${wildcard moses/*.cpp moses/TranslationModel/*.cpp \
moses/TranslationModel/fuzzy-match/*.cpp \
moses/TranslationModel/DynSAInclude/*.cpp \
moses/TranslationModel/RuleTable/*.cpp \
moses/TranslationModel/Scope3Parser/*.cpp \
moses/TranslationModel/CYKPlusParser/*.cpp \
moses/FF/*.cpp \
moses/FF/OSM-Feature/*.cpp \
moses/FF/LexicalReordering/*.cpp\
moses/TranslationModel/CompactPT/*.cpp}
#lm
MOSES_CC_ALL_SLASH += ${addprefix moses\LM\, Backward.cpp BackwardLMState.cpp Base.cpp Implementation.cpp Joint.cpp Ken.cpp MultiFactor.cpp SingleFactor.cpp SkeletonLM.cpp}
MOSES_CC_ALL = ${subst /,\,${MOSES_CC_ALL_SLASH}}
MOSES_CC = ${filter-out %Test.cpp moses\Mock%.cpp,${MOSES_CC_ALL}}
MOSES_O = ${addprefix ${OBJECTDIR}\,${MOSES_CC:%.cpp=%.o}}
sinclude ${MOSES_O:%.o=%.d}
${MOSES_O}: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
SEARCH_CC = ${addprefix search\, edge_generator.cc nbest.cc rule.cc vertex.cc}
SEARCH_O = ${addprefix ${OBJECTDIR}\,${SEARCH_CC:%.cc=%.o}}
sinclude ${SEARCH_O:%.o=%.d}
${SEARCH_O}: ${OBJECTDIR}\\%.o: %.cc
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\libmoses.a: ${MOSES_O} ${SEARCH_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${AR} -r -s ${OBJECTDIR}\libmoses.a ${MOSES_O} ${SEARCH_O}
#libmosescmd
MOSESCMD_CC = ${addprefix moses-cmd\, IOWrapper.cpp mbr.cpp LatticeMBR.cpp TranslationAnalysis.cpp}
MOSESCMD_O = ${addprefix ${OBJECTDIR}\,${MOSESCMD_CC:%.cpp=%.o}}
sinclude ${MOSESCMD_O:%.o=%.d}
${MOSESCMD_O}: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
#moses-cmd
sinclude ${OBJECTDIR}\moses-cmd\Main.d
${OBJECTDIR}\moses-cmd\Main.o: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\moses-cmd.exe: ${OBJECTDIR}\libmoses.a \
${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libondiskpt.a \
${OBJECTDIR}\libutil.a ${OBJECTDIR}\moses-cmd\Main.o ${MOSESCMD_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\moses-cmd\Main.o ${MOSESCMD_O} \
-lmoses -lkenlm -londiskpt -lutil \
-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
-lz -lbz2 -ldl -lmman \
-o ${OBJECTDIR}\moses-cmd.exe
#moseschartcmd
MOSESCHARTCMD_CC = ${addprefix moses-chart-cmd\, Main.cpp mbr.cpp IOWrapper.cpp TranslationAnalysis.cpp}
MOSESCHARTCMD_O = ${addprefix ${OBJECTDIR}\,${MOSESCHARTCMD_CC:%.cpp=%.o}}
sinclude ${MOSESCHARTCMD_O:%.o=%.d}
${MOSESCHARTCMD_O}: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\moses-chart-cmd.exe: ${OBJECTDIR}\libmoses.a \
${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libondiskpt.a \
${OBJECTDIR}\libutil.a ${MOSESCHARTCMD_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${MOSESCHARTCMD_O} \
-lmoses -lkenlm -londiskpt -lutil \
-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
-lz -lbz2 -ldl -lmman \
-o ${OBJECTDIR}\moses-chart-cmd.exe
#processPhraseTable
PROCESSPHRASETABLE_CC = ${addprefix misc\, GenerateTuples.cpp processPhraseTable.cpp}
PROCESSPHRASETABLE_O = ${addprefix ${OBJECTDIR}\,${PROCESSPHRASETABLE_CC:%.cpp=%.o}}
sinclude ${PROCESSPHRASETABLE_O:%.o=%.d}
${PROCESSPHRASETABLE_O}: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\processPhraseTable.exe: ${OBJECTDIR}\libmoses.a ${OBJECTDIR}\libutil.a ${PROCESSPHRASETABLE_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${PROCESSPHRASETABLE_O} \
-lmoses -lutil \
-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
-lz -lbz2 -ldl -lmman \
-o ${OBJECTDIR}\processPhraseTable.exe
#processLexicalTable
PROCESSLEXTABLE_CC = ${addprefix misc\, processLexicalTable.cpp}
PROCESSLEXTABLE_O = ${addprefix ${OBJECTDIR}\,${PROCESSLEXTABLE_CC:%.cpp=%.o}}
sinclude ${PROCESSLEXTABLE_O:%.o=%.d}
${PROCESSLEXTABLE_O}: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\processLexicalTable.exe: ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libmoses.a ${OBJECTDIR}\libutil.a ${OBJECTDIR}\libondiskpt.a ${PROCESSLEXTABLE_O}
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${PROCESSLEXTABLE_O} \
-lmoses -lkenlm -londiskpt -lutil \
-lboost_iostreams-mt -lboost_bzip2-mt -lboost_zlib-mt -lboost_system-mt -lboost_filesystem-mt -lboost_thread-mt \
-lz -lbz2 -ldl -lmman \
-o ${OBJECTDIR}\processLexicalTable.exe
#ondiskpt
sinclude ${OBJECTDIR}\OnDiskPt\Main.d
${OBJECTDIR}\OnDiskPt\Main.o: ${OBJECTDIR}\\%.o: %.cpp
${MKDIR} -p ${subst \,/,$(@D)}
${CXX} ${CXXFLAGS} -c $< -o $@
${OBJECTDIR}\CreateOnDiskPt.exe: ${OBJECTDIR}\OnDiskPt\Main.o ${OBJECTDIR}\libondiskpt.a ${OBJECTDIR}\libmoses.a ${OBJECTDIR}\libkenlm.a ${OBJECTDIR}\libutil.a
${MKDIR} -p ${subst \,/,${OBJECTDIR}}
${CXX} ${LDLIBSOPTIONS} -L ${OBJECTDIR} -static ${OBJECTDIR}\OnDiskPt\Main.o \
-londiskpt -lmoses -lkenlm -lutil \
-lboost_system-mt -lboost_filesystem-mt -lboost_program_options-mt -lboost_thread-mt \
-lz -lbz2 -ldl -lmman \
-o ${OBJECTDIR}\CreateOnDiskPt.exe
.PHONY : clean
clean:

View File

@ -571,7 +571,7 @@ int main(int argc, char** argv)
#ifdef HAVE_PROTOBUF
GOOGLE_PROTOBUF_VERIFY_VERSION;
#endif
// echo command line, if verbose
IFVERBOSE(1) {
TRACE_ERR("command: ");

View File

@ -8,7 +8,7 @@
#include "moses/TargetPhrase.h"
#include "moses/TargetPhraseCollection.h"
#ifdef HAVE_CMPH
#if !defined WIN32 || defined __MINGW32__ || defined HAVE_CMPH
#include "moses/TranslationModel/CompactPT/LexicalReorderingTableCompact.h"
#endif

View File

@ -25,6 +25,10 @@
#include <sstream>
#include <stdexcept>
#if defined __MINGW32__ && defined WITH_THREADS
#include <boost/thread/locks.hpp>
#endif // WITH_THREADS
#include "FeatureVector.h"
#include "util/string_piece_hash.hh"

View File

@ -20,9 +20,15 @@ namespace Moses
{
#ifdef WIN32
#ifdef __MINGW32__
#define OFF_T __int64
#define FTELLO(f) ftello64(f)
#define FSEEKO(file, offset, origin) fseeko64(file, offset, origin)
#else
#define OFF_T __int64
#define FTELLO(file) _ftelli64(file)
#define FSEEKO(file, offset, origin) _fseeki64(file, offset, origin)
#endif
#else
#define OFF_T off_t

View File

@ -405,13 +405,13 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
//cerr << endl;
//draw the sample
float random = log((float)rand()/RAND_MAX);
float frandom = log((float)rand()/RAND_MAX);
size_t position = 1;
float sum = candidateScores[0];
for (; position < candidateScores.size() && sum < random; ++position) {
for (; position < candidateScores.size() && sum < frandom; ++position) {
sum = log_sum(sum,candidateScores[position]);
}
//cerr << "Random: " << random << " Chose " << position-1 << endl;
//cerr << "Random: " << frandom << " Chose " << position-1 << endl;
const Hypothesis* chosen = candidates[position-1];
path.push_back(chosen);
}

View File

@ -37,6 +37,8 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#ifdef WITH_THREADS
#include "moses/ThreadPool.h"
#else
#include <time.h>
#endif
namespace Moses

View File

@ -28,6 +28,10 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <cstdio>
#include <unistd.h>
#ifndef __MMAN_PAGE_SIZE__
#define __MMAN_PAGE_SIZE__ sysconf(_SC_PAGE_SIZE)
#endif
namespace Moses
{
template <class T>
@ -56,25 +60,25 @@ public:
MmapAllocator() throw()
: m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)),
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr) throw()
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw()
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0)) {
}
MmapAllocator(std::string fileName) throw()
: m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(fileno(m_file_ptr)),
m_page_size(sysconf(_SC_PAGE_SIZE)), m_map_size(0), m_data_ptr(0),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}

View File

@ -8,7 +8,7 @@
#include <typeinfo>
#include <stdint.h>
#ifdef WIN32
#if defined WIN32 && !defined __MINGW32__
#define iterate(c, i) for(decltype(c.begin()) i = c.begin(); i != c.end(); ++i)
#define piterate(c, i) for(decltype(c->begin()) i = c->begin(); i != c->end(); ++i)
#define riterate(c, i) for(decltype(c.rbegin()) i = c.rbegin(); i != c.rend(); ++i)

View File

@ -35,6 +35,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <boost/thread/tss.hpp>
#else
#include <boost/scoped_ptr.hpp>
#include <time.h>
#endif
#include "moses/Phrase.h"

View File

@ -49,6 +49,32 @@
using namespace std;
#if defined __MINGW32__ && !defined mkdtemp
#include <windows.h>
#include <errno.h>
char *mkdtemp(char *tempbuf) {
int rand_value = 0;
char* tempbase = NULL;
char tempbasebuf[MAX_PATH] = "";
if (strcmp(&tempbuf[strlen(tempbuf)-6], "XXXXXX")) {
errno = EINVAL;
return NULL;
}
srand((unsigned)time(0));
rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
tempbase = strrchr(tempbuf, '/');
tempbase = tempbase ? tempbase+1 : tempbuf;
strcpy(tempbasebuf, tempbase);
sprintf(&tempbasebuf[strlen(tempbasebuf)-6], "%d", rand_value);
::GetTempPath(MAX_PATH, tempbuf);
strcat(tempbuf, tempbasebuf);
::CreateDirectory(tempbuf, NULL);
return tempbuf;
}
#endif
namespace Moses
{
@ -96,6 +122,9 @@ SetParameter(const std::string& key, const std::string& value)
int removedirectoryrecursively(const char *dirname)
{
#if defined __MINGW32__
//TODO(jie): replace this function with boost implementation
#else
DIR *dir;
struct dirent *entry;
char path[PATH_MAX];
@ -141,13 +170,17 @@ int removedirectoryrecursively(const char *dirname)
* printing here, see above)
*/
//printf("(not really) Deleting: %s\n", dirname);
#endif
return 1;
}
void PhraseDictionaryFuzzyMatch::InitializeForInput(InputType const& inputSentence)
{
#if defined __MINGW32__
char dirName[] = "moses.XXXXXX";
#else
char dirName[] = "/tmp/moses.XXXXXX";
#endif // defined
char *temp = mkdtemp(dirName);
UTIL_THROW_IF2(temp == NULL,
"Couldn't create temporary directory " << dirName);

View File

@ -369,7 +369,7 @@ alignment-symmetrization-method = grow-diag-final-and
### create a bilingual concordancer for the model
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor
#biconcor = $moses-bin-dir/biconcor
############################################################

View File

@ -389,7 +389,7 @@ alignment-symmetrization-method = grow-diag-final-and
### create a bilingual concordancer for the model
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor
#biconcor = $moses-bin-dir/biconcor
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)

View File

@ -369,7 +369,7 @@ alignment-symmetrization-method = grow-diag-final-and
### create a bilingual concordancer for the model
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor
#biconcor = $moses-bin-dir/biconcor
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)

View File

@ -373,7 +373,7 @@ alignment-symmetrization-method = grow-diag-final-and
### create a bilingual concordancer for the model
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor
#biconcor = $moses-bin-dir/biconcor
### lexicalized reordering: specify orientation type
# (default: only distance-based reordering model)

View File

@ -353,7 +353,7 @@ alignment-symmetrization-method = grow-diag-final-and
### create a bilingual concordancer for the model
#
#biconcor = $moses-script-dir/ems/biconcor/biconcor
#biconcor = $moses-bin-dir/biconcor
############################################################

View File

@ -17,7 +17,10 @@
#include <fcntl.h>
#include <stdint.h>
#if defined(_WIN32) || defined(_WIN64)
#if defined __MINGW32__
#include <windows.h>
#include <unistd.h>
#elif defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
#include <algorithm>
@ -76,7 +79,12 @@ int CreateOrThrow(const char *name) {
}
uint64_t SizeFile(int fd) {
#if defined(_WIN32) || defined(_WIN64)
#if defined __MINGW32__
struct stat sb;
int ret = fstat(fd, &sb);
if (ret == -1 || (!sb.st_size && !S_ISREG(sb.st_mode))) return kBadSize;
return sb.st_size;
#elif defined(_WIN32) || defined(_WIN64)
__int64 ret = _filelengthi64(fd);
return (ret == -1) ? kBadSize : ret;
#else // Not windows.
@ -100,7 +108,9 @@ uint64_t SizeOrThrow(int fd) {
}
void ResizeOrThrow(int fd, uint64_t to) {
#if defined(_WIN32) || defined(_WIN64)
#if defined __MINGW32__
int ret = ftruncate
#elif defined(_WIN32) || defined(_WIN64)
errno_t ret = _chsize_s
#elif defined(OS_ANDROID)
int ret = ftruncate64
@ -162,7 +172,7 @@ std::size_t ReadOrEOF(int fd, void *to_void, std::size_t amount) {
void PReadOrThrow(int fd, void *to_void, std::size_t size, uint64_t off) {
uint8_t *to = static_cast<uint8_t*>(to_void);
#if defined(_WIN32) || defined(_WIN64)
UTIL_THROW(Exception, "This pread implementation for windows is broken. Please send me a patch that does not change the file pointer. Atomically. Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
//UTIL_THROW(Exception, "This pread implementation for windows is broken. Please send me a patch that does not change the file pointer. Atomically. Or send me an implementation of pwrite that is allowed to change the file pointer but can be called concurrently with pread.");
const std::size_t kMaxDWORD = static_cast<std::size_t>(4294967295UL);
#endif
for (;size ;) {
@ -251,7 +261,9 @@ typedef CheckOffT<sizeof(off_t)>::True IgnoredType;
// Can't we all just get along?
void InternalSeek(int fd, int64_t off, int whence) {
if (
#if defined(_WIN32) || defined(_WIN64)
#if defined __MINGW32__
(off_t)-1 == lseek(fd, off, whence)
#elif defined(_WIN32) || defined(_WIN64)
(__int64)-1 == _lseeki64(fd, off, whence)
#elif defined(OS_ANDROID)
(off64_t)-1 == lseek64(fd, off, whence)

View File

@ -1,4 +1,4 @@
// Tests might fail if you have creative characters in your path. Sue me.
// Tests might fail if you have creative characters in your path. Sue me.
#include "util/file_piece.hh"
#include "util/file.hh"
@ -55,7 +55,7 @@ BOOST_AUTO_TEST_CASE(MMapReadLine) {
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__APPLE__)
/* Apple isn't happy with the popen, fileno, dup. And I don't want to
* reimplement popen. This is an issue with the test.
* reimplement popen. This is an issue with the test.
*/
/* read() implementation */
BOOST_AUTO_TEST_CASE(StreamReadLine) {
@ -67,7 +67,7 @@ BOOST_AUTO_TEST_CASE(StreamReadLine) {
FILE *catter = popen(popen_args.c_str(), "r");
BOOST_REQUIRE(catter);
FilePiece test(dup(fileno(catter)), "file_piece.cc", NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {
@ -107,8 +107,8 @@ BOOST_AUTO_TEST_CASE(PlainZipReadLine) {
}
// gzip stream. Apple doesn't like popen, fileno, dup. This is an issue with
// the test.
#ifndef __APPLE__
// the test.
#if !defined __APPLE__ && !defined __MINGW32__
BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
std::fstream ref(FileLocation().c_str(), std::ios::in);
@ -117,7 +117,7 @@ BOOST_AUTO_TEST_CASE(StreamZipReadLine) {
FILE * catter = popen(command.c_str(), "r");
BOOST_REQUIRE(catter);
FilePiece test(dup(fileno(catter)), "file_piece.cc.gz", NULL, 1);
std::string ref_line;
while (getline(ref, ref_line)) {

View File

@ -12,6 +12,22 @@
#include <stdlib.h>
#if defined __MINGW32__
#include <time.h>
#include <fcntl.h>
#if !defined mkstemp
int mkstemp(char * stemplate)
{
char *filename = mktemp(stemplate);
if (filename == NULL)
return -1;
return open(filename, O_RDWR | O_CREAT, 0600);
}
#endif
#endif // defined
namespace util {
namespace {