Initial check-in.

This commit is contained in:
= 2013-09-25 00:51:50 +01:00
parent db849ce2f5
commit e3ba4a9890
86 changed files with 61757 additions and 0 deletions

View File

@ -0,0 +1,156 @@
#include "mmsapt.h"
#include <boost/foreach.hpp>
#include <boost/tokenizer.hpp>
namespace Moses
{
using namespace bitext;
using namespace std;
using namespace boost;
void
parseLine(string const& line, map<string,string> & params)
{
char_separator<char> sep("; ");
tokenizer<char_separator<char> > tokens(line,sep);
BOOST_FOREACH(string const& t,tokens)
{
size_t i = t.find_first_not_of(" =");
size_t j = t.find_first_of(" =",i+1);
size_t k = t.find_first_not_of(" =",j+1);
assert(i != string::npos);
assert(k != string::npos);
params[t.substr(i,j)] = t.substr(k);
}
}
Mmsapt::
Mmsapt(string const& description, string const& line)
: PhraseDictionary(description,line)
{
this->init(line);
}
Mmsapt::
Mmsapt(string const& line)
: PhraseDictionary("Mmsapt",line)
{
this->init(line);
}
void
Mmsapt::
init(string const& line)
{
map<string,string> param;
parseLine(line,param);
bname = param["base"];
L1 = param["L1"];
L2 = param["L2"];
assert(bname.size());
assert(L1.size());
assert(L2.size());
map<string,string>::const_iterator m;
m = param.find("smooth");
lbop_parameter = m != param.end() ? atof(m->second.c_str()) : .05;
m = param.find("max-samples");
default_sample_size = m != param.end() ? atoi(m->second.c_str()) : 1000;
this->m_numScoreComponents = atoi(param["num-features"].c_str());
// num_features = 0;
m = param.find("ifactor");
input_factor = m != param.end() ? atoi(m->second.c_str()) : 0;
}
void
Mmsapt::
Load()
{
bt.open(bname, L1, L2);
size_t num_feats;
num_feats = calc_pfwd.init(0,lbop_parameter);
num_feats = calc_pbwd.init(num_feats,lbop_parameter);
num_feats = calc_lex.init(num_feats, bname + L1 + "-" + L2 + ".lex");
num_feats = apply_pp.init(num_feats);
assert (num_feats == this->m_numScoreComponents);
// cerr << "MMSAPT provides " << num_feats << " features at "
// << __FILE__ << ":" << __LINE__ << endl;
}
// this is not the most efficient way of phrase lookup!
TargetPhraseCollection const*
Mmsapt::
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
{
TSA<Token>::tree_iterator m(bt.I1.get());
for (size_t i = 0; i < src.GetSize(); ++i)
{
Factor const* f = src.GetFactor(i,input_factor);
id_type wid = (*bt.V1)[f->ToString()];
// cout << (*bt.V1)[wid] << " ";
if (!m.extend(wid)) break;
}
#if 0
cout << endl;
Token const* sphrase = m.getToken(0);
for (size_t i = 0; i < m.size(); ++i)
cout << (*bt.V1)[sphrase[i].id()] << " ";
cout << endl;
#endif
sptr<pstats> s;
if (m.size() < src.GetSize()) return NULL;
{
boost::lock_guard<boost::mutex> guard(this->lock);
s = bt.lookup(m);
}
PhrasePair pp; pp.init(m.getPid(), *s, this->m_numScoreComponents);
TargetPhraseCollection* ret = new TargetPhraseCollection();
vector<FactorType> ofact(1,0);
boost::unordered_map<uint64_t,jstats>::const_iterator t;
for (t = s->trg.begin(); t != s->trg.end(); ++t)
{
pp.update(t->first,t->second);
calc_pfwd(bt,pp);
calc_pbwd(bt,pp);
calc_lex (bt,pp);
apply_pp (bt,pp);
uint32_t sid,off,len;
parse_pid(t->first,sid,off,len);
size_t stop = off + len;
Token const* x = bt.T2->sntStart(sid);
TargetPhrase* tp = new TargetPhrase();
for (size_t k = off; k < stop; ++k)
{
StringPiece wrd = (*bt.V2)[x[k].id()];
Word w; w.CreateFromString(Output,ofact,wrd,false);
tp->AddWord(w);
}
tp->GetScoreBreakdown().Assign(this,pp.fvals);
tp->Evaluate(src);
ret->Add(tp);
}
ret->NthElement(m_tableLimit);
#if 0
sort(ret->begin(), ret->end(), CompareTargetPhrase());
cout << "SOURCE PHRASE: " << src << endl;
size_t i = 0;
for (TargetPhraseCollection::iterator r = ret->begin(); r != ret->end(); ++r)
{
cout << ++i << " " << **r << endl;
}
#endif
return ret;
}
ChartRuleLookupManager*
Mmsapt::
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &)
{
throw "CreateRuleLookupManager is currently not supported in Moses!";
}
}

View File

@ -0,0 +1,73 @@
// -*- c++ -*-
// Sampling phrase table implementation based on memory-mapped suffix arrays.
// Design and code by Ulrich Germann.
#pragma once
#include <boost/thread.hpp>
#include "moses/generic/sorting/VectorIndexSorter.h"
#include "moses/generic/sampling/Sampling.h"
#include "moses/generic/file_io/ug_stream.h"
#include "moses/mm/ug_mm_ttrack.h"
#include "moses/mm/ug_mm_tsa.h"
#include "moses/mm/tpt_tokenindex.h"
#include "moses/mm/ug_corpus_token.h"
#include "moses/mm/ug_typedefs.h"
#include "moses/mm/tpt_pickler.h"
#include "moses/mm/ug_bitext.h"
#include "moses/mm/ug_lexical_phrase_scorer2.h"
#include "moses/InputFileStream.h"
#include "moses/FactorTypeSet.h"
#include "moses/TargetPhrase.h"
#include <boost/dynamic_bitset.hpp>
#include "moses/TargetPhraseCollection.h"
#include <map>
#include "PhraseDictionary.h"
using namespace std;
namespace Moses
{
using namespace bitext;
class Mmsapt : public PhraseDictionary
{
typedef L2R_Token<SimpleWordId> Token;
typedef mmBitext<Token> mmbitext;
mmbitext bt;
// string description;
string bname;
string L1;
string L2;
float lbop_parameter;
size_t default_sample_size;
// size_t num_features;
size_t input_factor;
size_t output_factor; // we can actually return entire Tokens!
// built-in feature functions
PScorePfwd<Token> calc_pfwd;
PScorePbwd<Token> calc_pbwd;
PScoreLex<Token> calc_lex; // this one I'd like to see as an external ff eventually
PScorePP<Token> apply_pp; // apply phrase penalty
void init(string const& line);
mutable boost::mutex lock;
public:
Mmsapt(string const& description, string const& line);
Mmsapt(string const& line);
void
Load();
TargetPhraseCollection const*
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
//! Create a sentence-specific manager for SCFG rule lookup.
ChartRuleLookupManager*
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &);
private:
};
} // end namespace

View File

@ -0,0 +1,57 @@
// -*- c++ -*-
// (c) 2006,2007,2008 Ulrich Germann
// makes opening files a little more convenient
#include "ug_stream.h"
namespace ugdiss
{
using namespace std;
using namespace boost::iostreams;
filtering_istream*
open_input_stream(string fname)
{
filtering_istream* ret = new filtering_istream();
open_input_stream(fname,*ret);
return ret;
}
filtering_ostream*
open_output_stream(string fname)
{
filtering_ostream* ret = new filtering_ostream();
open_output_stream(fname,*ret);
return ret;
}
void
open_input_stream(string fname, filtering_istream& in)
{
if (fname.size()>3 && fname.substr(fname.size()-3,3)==".gz")
{
in.push(gzip_decompressor());
}
else if (fname.size() > 4 && fname.substr(fname.size()-4,4)==".bz2")
{
in.push(bzip2_decompressor());
}
in.push(file_source(fname.c_str()));
}
void
open_output_stream(string fname, filtering_ostream& out)
{
if ((fname.size() > 3 && fname.substr(fname.size()-3,3)==".gz") ||
(fname.size() > 4 && fname.substr(fname.size()-4,4)==".gz_"))
{
out.push(gzip_compressor());
}
else if ((fname.size() > 4 && fname.substr(fname.size()-4,4)==".bz2") ||
(fname.size() > 5 && fname.substr(fname.size()-5,5)==".bz2_"))
{
out.push(bzip2_compressor());
}
out.push(file_sink(fname.c_str()));
}
}

View File

@ -0,0 +1,37 @@
// -*- c++ -*-
// (c) 2006,2007,2008 Ulrich Germann
// makes opening files a little more convenient
#ifndef __UG_STREAM_HH
#define __UG_STREAM_HH
#include <boost/iostreams/device/file.hpp>
#include <boost/iostreams/categories.hpp> // input_filter_tag
#include <boost/iostreams/operations.hpp> // get, WOULD_BLOCK
#include <boost/iostreams/copy.hpp> // get, WOULD_BLOCK
#include <boost/iostreams/filtering_stream.hpp>
#include <boost/iostreams/filter/bzip2.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include <iostream>
#include <fstream>
#include <string>
namespace ugdiss
{
using namespace std;
using namespace boost::iostreams;
/** open input file that is possibly compressed
* decompression filters are automatically added based on the file name
* gzip for .gz; bzip2 for bz2.
*/
filtering_istream* open_input_stream(string fname);
void open_input_stream(string fname, filtering_istream& in);
// filtering_streambuf<input>* open_input_stream(string fname);
filtering_ostream* open_output_stream(string fname);
void open_output_stream(string fname, filtering_ostream& in);
}
#endif

View File

@ -0,0 +1,46 @@
// -*- c++ -*-
// (c) 2009 Ulrich Germann
// boilerplate code to declutter my usual interpret_args() routine
#include "ug_get_options.h"
#include <fstream>
#include <string>
#include <iostream>
namespace ugdiss
{
using namespace std;
void
get_options(int ac, char* av[], progopts& o, posopts& a, optsmap& vm,
char const* cfgFileParam)
{
// only get named parameters from command line
po::store(po::command_line_parser(ac,av).options(o).run(),vm);
if (cfgFileParam && vm.count(cfgFileParam))
{
string cfgFile = vm[cfgFileParam].as<string>();
if (!cfgFile.empty())
{
if (!access(cfgFile.c_str(),F_OK))
{
ifstream cfg(cfgFile.c_str());
po::store(po::parse_config_file(cfg,o),vm);
}
else
{
cerr << "Error: cannot find config file '"
<< cfgFile << "'!" << endl;
exit(1);
}
}
}
// process positional args, ignoring those set in the config file
if (a.max_total_count())
po::store(po::command_line_parser(ac,av)
.options(o).positional(a).run(),vm);
po::notify(vm); // IMPORTANT
}
}

View File

@ -0,0 +1,24 @@
// -*- c++ -*-
// (c) 2009 Ulrich Germann
// boilerplate code to declutter my usual interpret_args() routine
#ifndef __ug_get_options_h
#define __ug_get_options_h
#include <boost/program_options.hpp>
namespace ugdiss
{
namespace po=boost::program_options;
typedef po::options_description progopts;
typedef po::positional_options_description posopts;
typedef po::variables_map optsmap;
void
get_options(int ac, char* av[],
progopts & o,
posopts & a,
optsmap & vm,
char const* cfgFileParam=NULL);
}
#endif

41
moses/mm/Jamfile Normal file
View File

@ -0,0 +1,41 @@
exe mtt-build :
mtt-build.cc
$(TOP)/moses/generic//generic
$(TOP)//boost_iostreams
$(TOP)//boost_program_options
$(TOP)/moses/mm//mm
$(TOP)/util//kenutil
;
exe mtt-dump :
mtt-dump.cc
$(TOP)/moses/generic//generic
$(TOP)//boost_iostreams
$(TOP)//boost_program_options
$(TOP)/moses/mm//mm
$(TOP)/util//kenutil
;
exe symal2mam :
symal2mam.cc
$(TOP)/moses/generic//generic
$(TOP)//boost_iostreams
$(TOP)//boost_program_options
$(TOP)/moses/mm//mm
$(TOP)/util//kenutil
;
exe custom-pt :
custom-pt.cc
$(TOP)/moses/generic//generic
$(TOP)//boost_iostreams
$(TOP)//boost_program_options
$(TOP)/moses/mm//mm
$(TOP)/util//kenutil
;
install $(PREFIX)/bin : mtt-build mtt-dump symal2mam custom-pt ;
fakelib mm : [ glob ug_*.cc tpt_*.cc ] ;

100
moses/mm/Makefile Normal file
View File

@ -0,0 +1,100 @@
# Some systems apparently distinguish between shell
# variables and environment variables. The latter are
# visible to the make utility, the former apparently not,
# so we need to set them if they are not defined yet
# ===============================================================================
# COMPILATION PREFERENCES
# ===============================================================================
# CCACHE: if set to ccache, use ccache to speed up compilation
# OPTI: optimization level
# PROF: profiler switches
CCACHE = ccache
OPTI = 3
EXE_TAG = exe
PROF =
# PROF = -g -pg
# ===============================================================================
SHELL = bash
MAKEFLAGS += --warn-undefined-variables
.DEFAULT_GOAL = all
.SUFFIXES:
# ===============================================================================
# COMPILATION 'LOCALIZATION'
HOST ?= $(shell hostname)
HOSTTYPE ?= $(shell uname -m)
MOSES_ROOT = ${HOME}/code/moses/master/mosesdecoder
WDIR = build/${HOSTTYPE}/${OPTI}
VPATH = ${HOME}/code/moses/master/mosesdecoder/
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}
CXXFLAGS += -DMAX_NUM_FACTORS=4
CXXFLAGS += -DKENLM_MAX_ORDER=5
modirs := $(addprefix -I,$(shell find ${MOSES_ROOT}/moses ${MOSES_ROOT}/contrib -type d))
CXXFLAGS += -I${MOSES_ROOT}
INCLUDES =
BZLIB =
BOOSTLIBTAG =
REQLIBS = m z pthread lzma ${BZLIB} \
boost_thread${BOOSTLIBTAG} \
boost_iostreams${BOOSTLIBTAG} \
boost_program_options${BOOSTLIBTAG} \
boost_system${BOOSTLIBTAG} \
boost_filesystem${BOOSTLIBTAG}
# icuuc icuio icui18n \
LIBS = $(addprefix -l, ${REQLIBS} moses)
LIBDIRS = -L${HOME}/code/moses/master/mosesdecoder/lib
BINDIR = bin
ifeq "$(OPTI)" "0"
BINPREF = debug.
else
BINPREF =
endif
OBJ2 :=
define compile
DEP += ${WDIR}/$(basename $(notdir $1)).d
${WDIR}/$(basename $(notdir $1)).o : $1 $(wildcard $(basename $1).h)
@echo -e "COMPILING $1"
@mkdir -p $$(@D)
${CXX} ${CXXFLAGS} -MD -MP -c $$(abspath $$<) -o $$@
endef
programs = mtt-build mtt-dump symam2mam custom-pt mmlex-build
all: $(addprefix ${BINDIR}/${BINPREF}, $(programs))
@echo $^
clean:
rm -f ${WDIR}/*.o ${WDIR}/*.d
custom-pt: ${BINDIR}/${BINPREF}custom-pt
echo $^
INMOGEN = $(wildcard ${MOSES_ROOT}/moses/generic/*/*.cpp)
OBJ = $(patsubst %.cc,%.o,$(wildcard $(patsubst %.h,%.cc,$(wildcard *.h))))
OBJ += $(patsubst %.cpp,%.o,${INMOGEN})
EXE = $(patsubst %.cc,%.o,$(filter-out $(patsubst %.h,%.cc,$(wildcard *.h)),$(wildcard *.cc)))
$(foreach cpp,${INMOGEN},$(eval $(call compile,${cpp})))
$(foreach cpp,$(wildcard *.cc),$(eval $(call compile,${cpp})))
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): $(addprefix ${WDIR}/,$(notdir ${OBJ}))
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): ${MOSES_ROOT}/lib/libmoses.a
${BINDIR}/${BINPREF}%: ${WDIR}/%.o
echo PREREQS: $<
$(CXX) $(CXXFLAGS) -o $@ $^ ${LIBDIRS} ${LIBS}
.SECONDARY:
-include $(DEP)

View File

@ -0,0 +1,316 @@
build/x86_64/0/tpt_pickler.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h \
/usr/include/c++/4.5/iostream \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/bits/wordsize.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
/usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
/usr/include/ctype.h /usr/include/bits/types.h \
/usr/include/bits/typesizes.h /usr/include/endian.h \
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
/usr/include/c++/4.5/bits/ios_base.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/basic_ios.tcc \
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
/usr/include/c++/4.5/bits/istream.tcc /usr/include/c++/4.5/vector \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/map \
/usr/include/c++/4.5/bits/stl_tree.h /usr/include/c++/4.5/bits/stl_map.h \
/usr/include/c++/4.5/bits/stl_multimap.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h \
/usr/include/byteswap.h /usr/include/c++/4.5/cassert \
/usr/include/assert.h /usr/include/sys/stat.h /usr/include/bits/stat.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h:
/usr/include/c++/4.5/iostream:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/bits/wordsize.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/map:
/usr/include/c++/4.5/bits/stl_tree.h:
/usr/include/c++/4.5/bits/stl_map.h:
/usr/include/c++/4.5/bits/stl_multimap.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h:
/usr/include/byteswap.h:
/usr/include/c++/4.5/cassert:
/usr/include/assert.h:
/usr/include/sys/stat.h:
/usr/include/bits/stat.h:

View File

@ -0,0 +1,294 @@
build/x86_64/0/tpt_tightindex.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.cc \
/usr/include/c++/4.5/iostream \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/bits/wordsize.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
/usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
/usr/include/ctype.h /usr/include/bits/types.h \
/usr/include/bits/typesizes.h /usr/include/endian.h \
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
/usr/include/c++/4.5/bits/ios_base.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/basic_ios.tcc \
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
/usr/include/c++/4.5/bits/istream.tcc /usr/include/assert.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h \
/usr/include/c++/4.5/map /usr/include/c++/4.5/bits/stl_tree.h \
/usr/include/c++/4.5/bits/stl_map.h \
/usr/include/c++/4.5/bits/stl_multimap.h /usr/include/c++/4.5/sstream \
/usr/include/c++/4.5/bits/sstream.tcc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h /usr/include/c++/4.5/cassert
/usr/include/c++/4.5/iostream:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/bits/wordsize.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/assert.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h:
/usr/include/c++/4.5/map:
/usr/include/c++/4.5/bits/stl_tree.h:
/usr/include/c++/4.5/bits/stl_map.h:
/usr/include/c++/4.5/bits/stl_multimap.h:
/usr/include/c++/4.5/sstream:
/usr/include/c++/4.5/bits/sstream.tcc:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/usr/include/c++/4.5/cassert:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,542 @@
build/x86_64/0/ug_conll_record.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
/usr/include/boost/dynamic_bitset.hpp \
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
/usr/include/assert.h /usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
/usr/include/gnu/stubs-64.h /usr/include/c++/4.5/string \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
/usr/include/endian.h /usr/include/bits/endian.h \
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
/usr/include/sys/select.h /usr/include/bits/select.h \
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
/usr/include/c++/4.5/bits/algorithmfwd.h \
/usr/include/c++/4.5/bits/stl_heap.h \
/usr/include/c++/4.5/bits/stl_tempbuf.h \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
/usr/include/bits/stdio_lim.h \
/usr/include/boost/dynamic_bitset/config.hpp \
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
/usr/include/boost/config/select_compiler_config.hpp \
/usr/include/boost/config/compiler/gcc.hpp \
/usr/include/boost/config/select_stdlib_config.hpp \
/usr/include/boost/config/no_tr1/utility.hpp \
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
/usr/include/boost/config/select_platform_config.hpp \
/usr/include/boost/config/platform/linux.hpp \
/usr/include/boost/config/posix_features.hpp \
/usr/include/boost/config/suffix.hpp \
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
/usr/include/c++/4.5/bits/locale_classes.h \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
/usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
/usr/include/c++/4.5/ctime \
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
/usr/include/c++/4.5/bits/ostream.tcc \
/usr/include/c++/4.5/bits/istream.tcc \
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
/usr/include/c++/4.5/backward/auto_ptr.h \
/usr/include/boost/detail/dynamic_bitset.hpp \
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
/usr/include/c++/4.5/bits/stream_iterator.h \
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
/usr/include/boost/pending/integer_log2.hpp \
/usr/include/boost/shared_ptr.hpp \
/usr/include/boost/smart_ptr/shared_ptr.hpp \
/usr/include/boost/config/no_tr1/memory.hpp \
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
/usr/include/boost/throw_exception.hpp \
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
/usr/include/boost/exception/exception.hpp \
/usr/include/boost/current_function.hpp \
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
/usr/include/c++/4.5/functional \
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
/usr/include/boost/memory_order.hpp \
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
/usr/include/boost/scoped_ptr.hpp \
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
/usr/include/boost/dynamic_bitset.hpp:
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
/usr/include/assert.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/bits/wordsize.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/stdexcept:
/usr/include/c++/4.5/algorithm:
/usr/include/c++/4.5/utility:
/usr/include/c++/4.5/bits/stl_relops.h:
/usr/include/c++/4.5/bits/stl_algo.h:
/usr/include/c++/4.5/cstdlib:
/usr/include/stdlib.h:
/usr/include/bits/waitflags.h:
/usr/include/bits/waitstatus.h:
/usr/include/sys/types.h:
/usr/include/sys/select.h:
/usr/include/bits/select.h:
/usr/include/sys/sysmacros.h:
/usr/include/alloca.h:
/usr/include/c++/4.5/bits/algorithmfwd.h:
/usr/include/c++/4.5/bits/stl_heap.h:
/usr/include/c++/4.5/bits/stl_tempbuf.h:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/climits:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
/usr/include/limits.h:
/usr/include/bits/posix1_lim.h:
/usr/include/bits/local_lim.h:
/usr/include/linux/limits.h:
/usr/include/bits/posix2_lim.h:
/usr/include/bits/xopen_lim.h:
/usr/include/bits/stdio_lim.h:
/usr/include/boost/dynamic_bitset/config.hpp:
/usr/include/boost/config.hpp:
/usr/include/boost/config/user.hpp:
/usr/include/boost/config/select_compiler_config.hpp:
/usr/include/boost/config/compiler/gcc.hpp:
/usr/include/boost/config/select_stdlib_config.hpp:
/usr/include/boost/config/no_tr1/utility.hpp:
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
/usr/include/boost/config/select_platform_config.hpp:
/usr/include/boost/config/platform/linux.hpp:
/usr/include/boost/config/posix_features.hpp:
/usr/include/boost/config/suffix.hpp:
/usr/include/boost/detail/workaround.hpp:
/usr/include/c++/4.5/locale:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
/usr/include/c++/4.5/ctime:
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
/usr/include/libintl.h:
/usr/include/c++/4.5/bits/codecvt.h:
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/boost/dynamic_bitset_fwd.hpp:
/usr/include/c++/4.5/memory:
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
/usr/include/c++/4.5/backward/auto_ptr.h:
/usr/include/boost/detail/dynamic_bitset.hpp:
/usr/include/boost/detail/iterator.hpp:
/usr/include/c++/4.5/iterator:
/usr/include/c++/4.5/bits/stream_iterator.h:
/usr/include/boost/static_assert.hpp:
/usr/include/boost/limits.hpp:
/usr/include/c++/4.5/limits:
/usr/include/boost/pending/lowest_bit.hpp:
/usr/include/boost/pending/integer_log2.hpp:
/usr/include/boost/shared_ptr.hpp:
/usr/include/boost/smart_ptr/shared_ptr.hpp:
/usr/include/boost/config/no_tr1/memory.hpp:
/usr/include/boost/assert.hpp:
/usr/include/boost/checked_delete.hpp:
/usr/include/boost/throw_exception.hpp:
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
/usr/include/boost/exception/exception.hpp:
/usr/include/boost/current_function.hpp:
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
/usr/include/boost/detail/sp_typeinfo.hpp:
/usr/include/c++/4.5/typeinfo:
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
/usr/include/c++/4.5/functional:
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
/usr/include/boost/memory_order.hpp:
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
/usr/include/boost/scoped_ptr.hpp:
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
build/x86_64/0/ug_mmbitext.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_mmbitext.cc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,552 @@
build/x86_64/0/ug_tsa_array_entry.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
/usr/include/c++/4.5/cassert /usr/include/assert.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
/usr/include/gnu/stubs-64.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
/usr/include/boost/dynamic_bitset.hpp \
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
/usr/include/c++/4.5/string \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
/usr/include/endian.h /usr/include/bits/endian.h \
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
/usr/include/sys/select.h /usr/include/bits/select.h \
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
/usr/include/c++/4.5/bits/algorithmfwd.h \
/usr/include/c++/4.5/bits/stl_heap.h \
/usr/include/c++/4.5/bits/stl_tempbuf.h \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
/usr/include/bits/stdio_lim.h \
/usr/include/boost/dynamic_bitset/config.hpp \
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
/usr/include/boost/config/select_compiler_config.hpp \
/usr/include/boost/config/compiler/gcc.hpp \
/usr/include/boost/config/select_stdlib_config.hpp \
/usr/include/boost/config/no_tr1/utility.hpp \
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
/usr/include/boost/config/select_platform_config.hpp \
/usr/include/boost/config/platform/linux.hpp \
/usr/include/boost/config/posix_features.hpp \
/usr/include/boost/config/suffix.hpp \
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
/usr/include/c++/4.5/bits/locale_classes.h \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
/usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
/usr/include/c++/4.5/ctime \
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
/usr/include/c++/4.5/bits/ostream.tcc \
/usr/include/c++/4.5/bits/istream.tcc \
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
/usr/include/c++/4.5/backward/auto_ptr.h \
/usr/include/boost/detail/dynamic_bitset.hpp \
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
/usr/include/c++/4.5/bits/stream_iterator.h \
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
/usr/include/boost/pending/integer_log2.hpp \
/usr/include/boost/shared_ptr.hpp \
/usr/include/boost/smart_ptr/shared_ptr.hpp \
/usr/include/boost/config/no_tr1/memory.hpp \
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
/usr/include/boost/throw_exception.hpp \
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
/usr/include/boost/exception/exception.hpp \
/usr/include/boost/current_function.hpp \
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
/usr/include/c++/4.5/functional \
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
/usr/include/boost/memory_order.hpp \
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
/usr/include/boost/scoped_ptr.hpp \
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
/usr/include/c++/4.5/cassert:
/usr/include/assert.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/bits/wordsize.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
/usr/include/boost/dynamic_bitset.hpp:
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/stdexcept:
/usr/include/c++/4.5/algorithm:
/usr/include/c++/4.5/utility:
/usr/include/c++/4.5/bits/stl_relops.h:
/usr/include/c++/4.5/bits/stl_algo.h:
/usr/include/c++/4.5/cstdlib:
/usr/include/stdlib.h:
/usr/include/bits/waitflags.h:
/usr/include/bits/waitstatus.h:
/usr/include/sys/types.h:
/usr/include/sys/select.h:
/usr/include/bits/select.h:
/usr/include/sys/sysmacros.h:
/usr/include/alloca.h:
/usr/include/c++/4.5/bits/algorithmfwd.h:
/usr/include/c++/4.5/bits/stl_heap.h:
/usr/include/c++/4.5/bits/stl_tempbuf.h:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/climits:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
/usr/include/limits.h:
/usr/include/bits/posix1_lim.h:
/usr/include/bits/local_lim.h:
/usr/include/linux/limits.h:
/usr/include/bits/posix2_lim.h:
/usr/include/bits/xopen_lim.h:
/usr/include/bits/stdio_lim.h:
/usr/include/boost/dynamic_bitset/config.hpp:
/usr/include/boost/config.hpp:
/usr/include/boost/config/user.hpp:
/usr/include/boost/config/select_compiler_config.hpp:
/usr/include/boost/config/compiler/gcc.hpp:
/usr/include/boost/config/select_stdlib_config.hpp:
/usr/include/boost/config/no_tr1/utility.hpp:
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
/usr/include/boost/config/select_platform_config.hpp:
/usr/include/boost/config/platform/linux.hpp:
/usr/include/boost/config/posix_features.hpp:
/usr/include/boost/config/suffix.hpp:
/usr/include/boost/detail/workaround.hpp:
/usr/include/c++/4.5/locale:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
/usr/include/c++/4.5/ctime:
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
/usr/include/libintl.h:
/usr/include/c++/4.5/bits/codecvt.h:
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/boost/dynamic_bitset_fwd.hpp:
/usr/include/c++/4.5/memory:
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
/usr/include/c++/4.5/backward/auto_ptr.h:
/usr/include/boost/detail/dynamic_bitset.hpp:
/usr/include/boost/detail/iterator.hpp:
/usr/include/c++/4.5/iterator:
/usr/include/c++/4.5/bits/stream_iterator.h:
/usr/include/boost/static_assert.hpp:
/usr/include/boost/limits.hpp:
/usr/include/c++/4.5/limits:
/usr/include/boost/pending/lowest_bit.hpp:
/usr/include/boost/pending/integer_log2.hpp:
/usr/include/boost/shared_ptr.hpp:
/usr/include/boost/smart_ptr/shared_ptr.hpp:
/usr/include/boost/config/no_tr1/memory.hpp:
/usr/include/boost/assert.hpp:
/usr/include/boost/checked_delete.hpp:
/usr/include/boost/throw_exception.hpp:
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
/usr/include/boost/exception/exception.hpp:
/usr/include/boost/current_function.hpp:
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
/usr/include/boost/detail/sp_typeinfo.hpp:
/usr/include/c++/4.5/typeinfo:
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
/usr/include/c++/4.5/functional:
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
/usr/include/boost/memory_order.hpp:
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
/usr/include/boost/scoped_ptr.hpp:
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h:

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,546 @@
build/x86_64/0/ug_ttrack_position.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
/usr/include/c++/4.5/cassert /usr/include/assert.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
/usr/include/gnu/stubs-64.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
/usr/include/boost/dynamic_bitset.hpp \
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
/usr/include/c++/4.5/string \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
/usr/include/endian.h /usr/include/bits/endian.h \
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
/usr/include/sys/select.h /usr/include/bits/select.h \
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
/usr/include/c++/4.5/bits/algorithmfwd.h \
/usr/include/c++/4.5/bits/stl_heap.h \
/usr/include/c++/4.5/bits/stl_tempbuf.h \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
/usr/include/bits/stdio_lim.h \
/usr/include/boost/dynamic_bitset/config.hpp \
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
/usr/include/boost/config/select_compiler_config.hpp \
/usr/include/boost/config/compiler/gcc.hpp \
/usr/include/boost/config/select_stdlib_config.hpp \
/usr/include/boost/config/no_tr1/utility.hpp \
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
/usr/include/boost/config/select_platform_config.hpp \
/usr/include/boost/config/platform/linux.hpp \
/usr/include/boost/config/posix_features.hpp \
/usr/include/boost/config/suffix.hpp \
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
/usr/include/c++/4.5/bits/locale_classes.h \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
/usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
/usr/include/c++/4.5/ctime \
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
/usr/include/c++/4.5/bits/ostream.tcc \
/usr/include/c++/4.5/bits/istream.tcc \
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
/usr/include/c++/4.5/backward/auto_ptr.h \
/usr/include/boost/detail/dynamic_bitset.hpp \
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
/usr/include/c++/4.5/bits/stream_iterator.h \
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
/usr/include/boost/pending/integer_log2.hpp \
/usr/include/boost/shared_ptr.hpp \
/usr/include/boost/smart_ptr/shared_ptr.hpp \
/usr/include/boost/config/no_tr1/memory.hpp \
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
/usr/include/boost/throw_exception.hpp \
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
/usr/include/boost/exception/exception.hpp \
/usr/include/boost/current_function.hpp \
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
/usr/include/c++/4.5/functional \
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
/usr/include/boost/memory_order.hpp \
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
/usr/include/boost/scoped_ptr.hpp \
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
/usr/include/c++/4.5/cassert:
/usr/include/assert.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/bits/wordsize.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
/usr/include/boost/dynamic_bitset.hpp:
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/stdexcept:
/usr/include/c++/4.5/algorithm:
/usr/include/c++/4.5/utility:
/usr/include/c++/4.5/bits/stl_relops.h:
/usr/include/c++/4.5/bits/stl_algo.h:
/usr/include/c++/4.5/cstdlib:
/usr/include/stdlib.h:
/usr/include/bits/waitflags.h:
/usr/include/bits/waitstatus.h:
/usr/include/sys/types.h:
/usr/include/sys/select.h:
/usr/include/bits/select.h:
/usr/include/sys/sysmacros.h:
/usr/include/alloca.h:
/usr/include/c++/4.5/bits/algorithmfwd.h:
/usr/include/c++/4.5/bits/stl_heap.h:
/usr/include/c++/4.5/bits/stl_tempbuf.h:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/climits:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
/usr/include/limits.h:
/usr/include/bits/posix1_lim.h:
/usr/include/bits/local_lim.h:
/usr/include/linux/limits.h:
/usr/include/bits/posix2_lim.h:
/usr/include/bits/xopen_lim.h:
/usr/include/bits/stdio_lim.h:
/usr/include/boost/dynamic_bitset/config.hpp:
/usr/include/boost/config.hpp:
/usr/include/boost/config/user.hpp:
/usr/include/boost/config/select_compiler_config.hpp:
/usr/include/boost/config/compiler/gcc.hpp:
/usr/include/boost/config/select_stdlib_config.hpp:
/usr/include/boost/config/no_tr1/utility.hpp:
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
/usr/include/boost/config/select_platform_config.hpp:
/usr/include/boost/config/platform/linux.hpp:
/usr/include/boost/config/posix_features.hpp:
/usr/include/boost/config/suffix.hpp:
/usr/include/boost/detail/workaround.hpp:
/usr/include/c++/4.5/locale:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
/usr/include/c++/4.5/ctime:
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
/usr/include/libintl.h:
/usr/include/c++/4.5/bits/codecvt.h:
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/boost/dynamic_bitset_fwd.hpp:
/usr/include/c++/4.5/memory:
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
/usr/include/c++/4.5/backward/auto_ptr.h:
/usr/include/boost/detail/dynamic_bitset.hpp:
/usr/include/boost/detail/iterator.hpp:
/usr/include/c++/4.5/iterator:
/usr/include/c++/4.5/bits/stream_iterator.h:
/usr/include/boost/static_assert.hpp:
/usr/include/boost/limits.hpp:
/usr/include/c++/4.5/limits:
/usr/include/boost/pending/lowest_bit.hpp:
/usr/include/boost/pending/integer_log2.hpp:
/usr/include/boost/shared_ptr.hpp:
/usr/include/boost/smart_ptr/shared_ptr.hpp:
/usr/include/boost/config/no_tr1/memory.hpp:
/usr/include/boost/assert.hpp:
/usr/include/boost/checked_delete.hpp:
/usr/include/boost/throw_exception.hpp:
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
/usr/include/boost/exception/exception.hpp:
/usr/include/boost/current_function.hpp:
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
/usr/include/boost/detail/sp_typeinfo.hpp:
/usr/include/c++/4.5/typeinfo:
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
/usr/include/c++/4.5/functional:
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
/usr/include/boost/memory_order.hpp:
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
/usr/include/boost/scoped_ptr.hpp:
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,316 @@
build/x86_64/3/tpt_pickler.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h \
/usr/include/c++/4.5/iostream \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/bits/wordsize.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
/usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
/usr/include/ctype.h /usr/include/bits/types.h \
/usr/include/bits/typesizes.h /usr/include/endian.h \
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
/usr/include/c++/4.5/bits/ios_base.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/basic_ios.tcc \
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
/usr/include/c++/4.5/bits/istream.tcc /usr/include/c++/4.5/vector \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/map \
/usr/include/c++/4.5/bits/stl_tree.h /usr/include/c++/4.5/bits/stl_map.h \
/usr/include/c++/4.5/bits/stl_multimap.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h \
/usr/include/byteswap.h /usr/include/c++/4.5/cassert \
/usr/include/assert.h /usr/include/sys/stat.h /usr/include/bits/stat.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h:
/usr/include/c++/4.5/iostream:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/bits/wordsize.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/map:
/usr/include/c++/4.5/bits/stl_tree.h:
/usr/include/c++/4.5/bits/stl_map.h:
/usr/include/c++/4.5/bits/stl_multimap.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h:
/usr/include/byteswap.h:
/usr/include/c++/4.5/cassert:
/usr/include/assert.h:
/usr/include/sys/stat.h:
/usr/include/bits/stat.h:

View File

@ -0,0 +1,294 @@
build/x86_64/3/tpt_tightindex.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.cc \
/usr/include/c++/4.5/iostream \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/bits/wordsize.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
/usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
/usr/include/ctype.h /usr/include/bits/types.h \
/usr/include/bits/typesizes.h /usr/include/endian.h \
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
/usr/include/c++/4.5/bits/ios_base.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/basic_ios.tcc \
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
/usr/include/c++/4.5/bits/istream.tcc /usr/include/assert.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h \
/usr/include/c++/4.5/map /usr/include/c++/4.5/bits/stl_tree.h \
/usr/include/c++/4.5/bits/stl_map.h \
/usr/include/c++/4.5/bits/stl_multimap.h /usr/include/c++/4.5/sstream \
/usr/include/c++/4.5/bits/sstream.tcc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h /usr/include/c++/4.5/cassert
/usr/include/c++/4.5/iostream:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/bits/wordsize.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/assert.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h:
/usr/include/c++/4.5/map:
/usr/include/c++/4.5/bits/stl_tree.h:
/usr/include/c++/4.5/bits/stl_map.h:
/usr/include/c++/4.5/bits/stl_multimap.h:
/usr/include/c++/4.5/sstream:
/usr/include/c++/4.5/bits/sstream.tcc:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/usr/include/c++/4.5/cassert:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,542 @@
build/x86_64/3/ug_conll_record.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
/usr/include/boost/dynamic_bitset.hpp \
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
/usr/include/assert.h /usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
/usr/include/gnu/stubs-64.h /usr/include/c++/4.5/string \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
/usr/include/endian.h /usr/include/bits/endian.h \
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
/usr/include/sys/select.h /usr/include/bits/select.h \
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
/usr/include/c++/4.5/bits/algorithmfwd.h \
/usr/include/c++/4.5/bits/stl_heap.h \
/usr/include/c++/4.5/bits/stl_tempbuf.h \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
/usr/include/bits/stdio_lim.h \
/usr/include/boost/dynamic_bitset/config.hpp \
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
/usr/include/boost/config/select_compiler_config.hpp \
/usr/include/boost/config/compiler/gcc.hpp \
/usr/include/boost/config/select_stdlib_config.hpp \
/usr/include/boost/config/no_tr1/utility.hpp \
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
/usr/include/boost/config/select_platform_config.hpp \
/usr/include/boost/config/platform/linux.hpp \
/usr/include/boost/config/posix_features.hpp \
/usr/include/boost/config/suffix.hpp \
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
/usr/include/c++/4.5/bits/locale_classes.h \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
/usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
/usr/include/c++/4.5/ctime \
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
/usr/include/c++/4.5/bits/ostream.tcc \
/usr/include/c++/4.5/bits/istream.tcc \
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
/usr/include/c++/4.5/backward/auto_ptr.h \
/usr/include/boost/detail/dynamic_bitset.hpp \
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
/usr/include/c++/4.5/bits/stream_iterator.h \
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
/usr/include/boost/pending/integer_log2.hpp \
/usr/include/boost/shared_ptr.hpp \
/usr/include/boost/smart_ptr/shared_ptr.hpp \
/usr/include/boost/config/no_tr1/memory.hpp \
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
/usr/include/boost/throw_exception.hpp \
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
/usr/include/boost/exception/exception.hpp \
/usr/include/boost/current_function.hpp \
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
/usr/include/c++/4.5/functional \
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
/usr/include/boost/memory_order.hpp \
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
/usr/include/boost/scoped_ptr.hpp \
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
/usr/include/boost/dynamic_bitset.hpp:
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
/usr/include/assert.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/bits/wordsize.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/stdexcept:
/usr/include/c++/4.5/algorithm:
/usr/include/c++/4.5/utility:
/usr/include/c++/4.5/bits/stl_relops.h:
/usr/include/c++/4.5/bits/stl_algo.h:
/usr/include/c++/4.5/cstdlib:
/usr/include/stdlib.h:
/usr/include/bits/waitflags.h:
/usr/include/bits/waitstatus.h:
/usr/include/sys/types.h:
/usr/include/sys/select.h:
/usr/include/bits/select.h:
/usr/include/sys/sysmacros.h:
/usr/include/alloca.h:
/usr/include/c++/4.5/bits/algorithmfwd.h:
/usr/include/c++/4.5/bits/stl_heap.h:
/usr/include/c++/4.5/bits/stl_tempbuf.h:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/climits:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
/usr/include/limits.h:
/usr/include/bits/posix1_lim.h:
/usr/include/bits/local_lim.h:
/usr/include/linux/limits.h:
/usr/include/bits/posix2_lim.h:
/usr/include/bits/xopen_lim.h:
/usr/include/bits/stdio_lim.h:
/usr/include/boost/dynamic_bitset/config.hpp:
/usr/include/boost/config.hpp:
/usr/include/boost/config/user.hpp:
/usr/include/boost/config/select_compiler_config.hpp:
/usr/include/boost/config/compiler/gcc.hpp:
/usr/include/boost/config/select_stdlib_config.hpp:
/usr/include/boost/config/no_tr1/utility.hpp:
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
/usr/include/boost/config/select_platform_config.hpp:
/usr/include/boost/config/platform/linux.hpp:
/usr/include/boost/config/posix_features.hpp:
/usr/include/boost/config/suffix.hpp:
/usr/include/boost/detail/workaround.hpp:
/usr/include/c++/4.5/locale:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
/usr/include/c++/4.5/ctime:
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
/usr/include/libintl.h:
/usr/include/c++/4.5/bits/codecvt.h:
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/boost/dynamic_bitset_fwd.hpp:
/usr/include/c++/4.5/memory:
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
/usr/include/c++/4.5/backward/auto_ptr.h:
/usr/include/boost/detail/dynamic_bitset.hpp:
/usr/include/boost/detail/iterator.hpp:
/usr/include/c++/4.5/iterator:
/usr/include/c++/4.5/bits/stream_iterator.h:
/usr/include/boost/static_assert.hpp:
/usr/include/boost/limits.hpp:
/usr/include/c++/4.5/limits:
/usr/include/boost/pending/lowest_bit.hpp:
/usr/include/boost/pending/integer_log2.hpp:
/usr/include/boost/shared_ptr.hpp:
/usr/include/boost/smart_ptr/shared_ptr.hpp:
/usr/include/boost/config/no_tr1/memory.hpp:
/usr/include/boost/assert.hpp:
/usr/include/boost/checked_delete.hpp:
/usr/include/boost/throw_exception.hpp:
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
/usr/include/boost/exception/exception.hpp:
/usr/include/boost/current_function.hpp:
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
/usr/include/boost/detail/sp_typeinfo.hpp:
/usr/include/c++/4.5/typeinfo:
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
/usr/include/c++/4.5/functional:
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
/usr/include/boost/memory_order.hpp:
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
/usr/include/boost/scoped_ptr.hpp:
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,2 @@
build/x86_64/3/ug_mmbitext.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_mmbitext.cc

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,552 @@
build/x86_64/3/ug_tsa_array_entry.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
/usr/include/c++/4.5/cassert /usr/include/assert.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
/usr/include/gnu/stubs-64.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
/usr/include/boost/dynamic_bitset.hpp \
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
/usr/include/c++/4.5/string \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
/usr/include/endian.h /usr/include/bits/endian.h \
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
/usr/include/sys/select.h /usr/include/bits/select.h \
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
/usr/include/c++/4.5/bits/algorithmfwd.h \
/usr/include/c++/4.5/bits/stl_heap.h \
/usr/include/c++/4.5/bits/stl_tempbuf.h \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
/usr/include/bits/stdio_lim.h \
/usr/include/boost/dynamic_bitset/config.hpp \
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
/usr/include/boost/config/select_compiler_config.hpp \
/usr/include/boost/config/compiler/gcc.hpp \
/usr/include/boost/config/select_stdlib_config.hpp \
/usr/include/boost/config/no_tr1/utility.hpp \
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
/usr/include/boost/config/select_platform_config.hpp \
/usr/include/boost/config/platform/linux.hpp \
/usr/include/boost/config/posix_features.hpp \
/usr/include/boost/config/suffix.hpp \
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
/usr/include/c++/4.5/bits/locale_classes.h \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
/usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
/usr/include/c++/4.5/ctime \
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
/usr/include/c++/4.5/bits/ostream.tcc \
/usr/include/c++/4.5/bits/istream.tcc \
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
/usr/include/c++/4.5/backward/auto_ptr.h \
/usr/include/boost/detail/dynamic_bitset.hpp \
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
/usr/include/c++/4.5/bits/stream_iterator.h \
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
/usr/include/boost/pending/integer_log2.hpp \
/usr/include/boost/shared_ptr.hpp \
/usr/include/boost/smart_ptr/shared_ptr.hpp \
/usr/include/boost/config/no_tr1/memory.hpp \
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
/usr/include/boost/throw_exception.hpp \
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
/usr/include/boost/exception/exception.hpp \
/usr/include/boost/current_function.hpp \
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
/usr/include/c++/4.5/functional \
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
/usr/include/boost/memory_order.hpp \
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
/usr/include/boost/scoped_ptr.hpp \
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
/usr/include/c++/4.5/cassert:
/usr/include/assert.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/bits/wordsize.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
/usr/include/boost/dynamic_bitset.hpp:
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/stdexcept:
/usr/include/c++/4.5/algorithm:
/usr/include/c++/4.5/utility:
/usr/include/c++/4.5/bits/stl_relops.h:
/usr/include/c++/4.5/bits/stl_algo.h:
/usr/include/c++/4.5/cstdlib:
/usr/include/stdlib.h:
/usr/include/bits/waitflags.h:
/usr/include/bits/waitstatus.h:
/usr/include/sys/types.h:
/usr/include/sys/select.h:
/usr/include/bits/select.h:
/usr/include/sys/sysmacros.h:
/usr/include/alloca.h:
/usr/include/c++/4.5/bits/algorithmfwd.h:
/usr/include/c++/4.5/bits/stl_heap.h:
/usr/include/c++/4.5/bits/stl_tempbuf.h:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/climits:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
/usr/include/limits.h:
/usr/include/bits/posix1_lim.h:
/usr/include/bits/local_lim.h:
/usr/include/linux/limits.h:
/usr/include/bits/posix2_lim.h:
/usr/include/bits/xopen_lim.h:
/usr/include/bits/stdio_lim.h:
/usr/include/boost/dynamic_bitset/config.hpp:
/usr/include/boost/config.hpp:
/usr/include/boost/config/user.hpp:
/usr/include/boost/config/select_compiler_config.hpp:
/usr/include/boost/config/compiler/gcc.hpp:
/usr/include/boost/config/select_stdlib_config.hpp:
/usr/include/boost/config/no_tr1/utility.hpp:
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
/usr/include/boost/config/select_platform_config.hpp:
/usr/include/boost/config/platform/linux.hpp:
/usr/include/boost/config/posix_features.hpp:
/usr/include/boost/config/suffix.hpp:
/usr/include/boost/detail/workaround.hpp:
/usr/include/c++/4.5/locale:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
/usr/include/c++/4.5/ctime:
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
/usr/include/libintl.h:
/usr/include/c++/4.5/bits/codecvt.h:
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/boost/dynamic_bitset_fwd.hpp:
/usr/include/c++/4.5/memory:
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
/usr/include/c++/4.5/backward/auto_ptr.h:
/usr/include/boost/detail/dynamic_bitset.hpp:
/usr/include/boost/detail/iterator.hpp:
/usr/include/c++/4.5/iterator:
/usr/include/c++/4.5/bits/stream_iterator.h:
/usr/include/boost/static_assert.hpp:
/usr/include/boost/limits.hpp:
/usr/include/c++/4.5/limits:
/usr/include/boost/pending/lowest_bit.hpp:
/usr/include/boost/pending/integer_log2.hpp:
/usr/include/boost/shared_ptr.hpp:
/usr/include/boost/smart_ptr/shared_ptr.hpp:
/usr/include/boost/config/no_tr1/memory.hpp:
/usr/include/boost/assert.hpp:
/usr/include/boost/checked_delete.hpp:
/usr/include/boost/throw_exception.hpp:
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
/usr/include/boost/exception/exception.hpp:
/usr/include/boost/current_function.hpp:
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
/usr/include/boost/detail/sp_typeinfo.hpp:
/usr/include/c++/4.5/typeinfo:
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
/usr/include/c++/4.5/functional:
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
/usr/include/boost/memory_order.hpp:
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
/usr/include/boost/scoped_ptr.hpp:
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h:

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,546 @@
build/x86_64/3/ug_ttrack_position.o: \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.cc \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
/usr/include/c++/4.5/cassert /usr/include/assert.h \
/usr/include/features.h /usr/include/sys/cdefs.h \
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
/usr/include/gnu/stubs-64.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
/usr/include/boost/dynamic_bitset.hpp \
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
/usr/include/c++/4.5/string \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
/usr/include/c++/4.5/bits/stringfwd.h \
/usr/include/c++/4.5/bits/char_traits.h \
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
/usr/include/c++/4.5/bits/functexcept.h \
/usr/include/c++/4.5/exception_defines.h \
/usr/include/c++/4.5/bits/cpp_type_traits.h \
/usr/include/c++/4.5/ext/type_traits.h \
/usr/include/c++/4.5/ext/numeric_traits.h \
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
/usr/include/c++/4.5/bits/concept_check.h \
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
/usr/include/c++/4.5/bits/stl_iterator.h \
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
/usr/include/bits/wchar.h /usr/include/xlocale.h \
/usr/include/c++/4.5/bits/allocator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
/usr/include/c++/4.5/clocale /usr/include/locale.h \
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
/usr/include/endian.h /usr/include/bits/endian.h \
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
/usr/include/c++/4.5/cxxabi-forced.h \
/usr/include/c++/4.5/bits/stl_function.h \
/usr/include/c++/4.5/backward/binders.h \
/usr/include/c++/4.5/bits/basic_string.h \
/usr/include/c++/4.5/ext/atomicity.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
/usr/include/bits/setjmp.h /usr/include/unistd.h \
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
/usr/include/bits/confname.h /usr/include/getopt.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
/usr/include/c++/4.5/initializer_list \
/usr/include/c++/4.5/bits/basic_string.tcc \
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
/usr/include/sys/select.h /usr/include/bits/select.h \
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
/usr/include/c++/4.5/bits/algorithmfwd.h \
/usr/include/c++/4.5/bits/stl_heap.h \
/usr/include/c++/4.5/bits/stl_tempbuf.h \
/usr/include/c++/4.5/bits/stl_construct.h \
/usr/include/c++/4.5/bits/stl_uninitialized.h \
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
/usr/include/c++/4.5/bits/stl_bvector.h \
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
/usr/include/bits/stdio_lim.h \
/usr/include/boost/dynamic_bitset/config.hpp \
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
/usr/include/boost/config/select_compiler_config.hpp \
/usr/include/boost/config/compiler/gcc.hpp \
/usr/include/boost/config/select_stdlib_config.hpp \
/usr/include/boost/config/no_tr1/utility.hpp \
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
/usr/include/boost/config/select_platform_config.hpp \
/usr/include/boost/config/platform/linux.hpp \
/usr/include/boost/config/posix_features.hpp \
/usr/include/boost/config/suffix.hpp \
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
/usr/include/c++/4.5/bits/locale_classes.h \
/usr/include/c++/4.5/bits/locale_classes.tcc \
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
/usr/include/wctype.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
/usr/include/c++/4.5/bits/streambuf.tcc \
/usr/include/c++/4.5/bits/streambuf_iterator.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
/usr/include/c++/4.5/bits/locale_facets.tcc \
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
/usr/include/c++/4.5/ctime \
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
/usr/include/c++/4.5/bits/basic_ios.h \
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
/usr/include/c++/4.5/bits/ostream.tcc \
/usr/include/c++/4.5/bits/istream.tcc \
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
/usr/include/c++/4.5/backward/auto_ptr.h \
/usr/include/boost/detail/dynamic_bitset.hpp \
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
/usr/include/c++/4.5/bits/stream_iterator.h \
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
/usr/include/boost/pending/integer_log2.hpp \
/usr/include/boost/shared_ptr.hpp \
/usr/include/boost/smart_ptr/shared_ptr.hpp \
/usr/include/boost/config/no_tr1/memory.hpp \
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
/usr/include/boost/throw_exception.hpp \
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
/usr/include/boost/exception/exception.hpp \
/usr/include/boost/current_function.hpp \
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
/usr/include/c++/4.5/functional \
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
/usr/include/boost/memory_order.hpp \
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
/usr/include/boost/scoped_ptr.hpp \
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
/usr/include/stdint.h \
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
/usr/include/c++/4.5/cassert:
/usr/include/assert.h:
/usr/include/features.h:
/usr/include/sys/cdefs.h:
/usr/include/bits/wordsize.h:
/usr/include/gnu/stubs.h:
/usr/include/gnu/stubs-64.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
/usr/include/boost/dynamic_bitset.hpp:
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
/usr/include/c++/4.5/string:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
/usr/include/c++/4.5/bits/stringfwd.h:
/usr/include/c++/4.5/bits/char_traits.h:
/usr/include/c++/4.5/bits/stl_algobase.h:
/usr/include/c++/4.5/cstddef:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
/usr/include/c++/4.5/bits/functexcept.h:
/usr/include/c++/4.5/exception_defines.h:
/usr/include/c++/4.5/bits/cpp_type_traits.h:
/usr/include/c++/4.5/ext/type_traits.h:
/usr/include/c++/4.5/ext/numeric_traits.h:
/usr/include/c++/4.5/bits/stl_pair.h:
/usr/include/c++/4.5/bits/move.h:
/usr/include/c++/4.5/bits/concept_check.h:
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
/usr/include/c++/4.5/bits/stl_iterator.h:
/usr/include/c++/4.5/debug/debug.h:
/usr/include/c++/4.5/bits/postypes.h:
/usr/include/c++/4.5/cwchar:
/usr/include/wchar.h:
/usr/include/stdio.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
/usr/include/bits/wchar.h:
/usr/include/xlocale.h:
/usr/include/c++/4.5/bits/allocator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
/usr/include/c++/4.5/ext/new_allocator.h:
/usr/include/c++/4.5/new:
/usr/include/c++/4.5/exception:
/usr/include/c++/4.5/bits/localefwd.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
/usr/include/c++/4.5/clocale:
/usr/include/locale.h:
/usr/include/bits/locale.h:
/usr/include/c++/4.5/iosfwd:
/usr/include/c++/4.5/cctype:
/usr/include/ctype.h:
/usr/include/bits/types.h:
/usr/include/bits/typesizes.h:
/usr/include/endian.h:
/usr/include/bits/endian.h:
/usr/include/bits/byteswap.h:
/usr/include/c++/4.5/bits/ostream_insert.h:
/usr/include/c++/4.5/cxxabi-forced.h:
/usr/include/c++/4.5/bits/stl_function.h:
/usr/include/c++/4.5/backward/binders.h:
/usr/include/c++/4.5/bits/basic_string.h:
/usr/include/c++/4.5/ext/atomicity.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
/usr/include/pthread.h:
/usr/include/sched.h:
/usr/include/time.h:
/usr/include/bits/sched.h:
/usr/include/bits/time.h:
/usr/include/signal.h:
/usr/include/bits/sigset.h:
/usr/include/bits/pthreadtypes.h:
/usr/include/bits/setjmp.h:
/usr/include/unistd.h:
/usr/include/bits/posix_opt.h:
/usr/include/bits/environments.h:
/usr/include/bits/confname.h:
/usr/include/getopt.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
/usr/include/c++/4.5/initializer_list:
/usr/include/c++/4.5/bits/basic_string.tcc:
/usr/include/c++/4.5/stdexcept:
/usr/include/c++/4.5/algorithm:
/usr/include/c++/4.5/utility:
/usr/include/c++/4.5/bits/stl_relops.h:
/usr/include/c++/4.5/bits/stl_algo.h:
/usr/include/c++/4.5/cstdlib:
/usr/include/stdlib.h:
/usr/include/bits/waitflags.h:
/usr/include/bits/waitstatus.h:
/usr/include/sys/types.h:
/usr/include/sys/select.h:
/usr/include/bits/select.h:
/usr/include/sys/sysmacros.h:
/usr/include/alloca.h:
/usr/include/c++/4.5/bits/algorithmfwd.h:
/usr/include/c++/4.5/bits/stl_heap.h:
/usr/include/c++/4.5/bits/stl_tempbuf.h:
/usr/include/c++/4.5/bits/stl_construct.h:
/usr/include/c++/4.5/bits/stl_uninitialized.h:
/usr/include/c++/4.5/vector:
/usr/include/c++/4.5/bits/stl_vector.h:
/usr/include/c++/4.5/bits/stl_bvector.h:
/usr/include/c++/4.5/bits/vector.tcc:
/usr/include/c++/4.5/climits:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
/usr/include/limits.h:
/usr/include/bits/posix1_lim.h:
/usr/include/bits/local_lim.h:
/usr/include/linux/limits.h:
/usr/include/bits/posix2_lim.h:
/usr/include/bits/xopen_lim.h:
/usr/include/bits/stdio_lim.h:
/usr/include/boost/dynamic_bitset/config.hpp:
/usr/include/boost/config.hpp:
/usr/include/boost/config/user.hpp:
/usr/include/boost/config/select_compiler_config.hpp:
/usr/include/boost/config/compiler/gcc.hpp:
/usr/include/boost/config/select_stdlib_config.hpp:
/usr/include/boost/config/no_tr1/utility.hpp:
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
/usr/include/boost/config/select_platform_config.hpp:
/usr/include/boost/config/platform/linux.hpp:
/usr/include/boost/config/posix_features.hpp:
/usr/include/boost/config/suffix.hpp:
/usr/include/boost/detail/workaround.hpp:
/usr/include/c++/4.5/locale:
/usr/include/c++/4.5/bits/locale_classes.h:
/usr/include/c++/4.5/bits/locale_classes.tcc:
/usr/include/c++/4.5/bits/locale_facets.h:
/usr/include/c++/4.5/cwctype:
/usr/include/wctype.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
/usr/include/c++/4.5/bits/ios_base.h:
/usr/include/c++/4.5/streambuf:
/usr/include/c++/4.5/bits/streambuf.tcc:
/usr/include/c++/4.5/bits/streambuf_iterator.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
/usr/include/c++/4.5/bits/locale_facets.tcc:
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
/usr/include/c++/4.5/ctime:
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
/usr/include/libintl.h:
/usr/include/c++/4.5/bits/codecvt.h:
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
/usr/include/c++/4.5/istream:
/usr/include/c++/4.5/ios:
/usr/include/c++/4.5/bits/basic_ios.h:
/usr/include/c++/4.5/bits/basic_ios.tcc:
/usr/include/c++/4.5/ostream:
/usr/include/c++/4.5/bits/ostream.tcc:
/usr/include/c++/4.5/bits/istream.tcc:
/usr/include/boost/dynamic_bitset_fwd.hpp:
/usr/include/c++/4.5/memory:
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
/usr/include/c++/4.5/backward/auto_ptr.h:
/usr/include/boost/detail/dynamic_bitset.hpp:
/usr/include/boost/detail/iterator.hpp:
/usr/include/c++/4.5/iterator:
/usr/include/c++/4.5/bits/stream_iterator.h:
/usr/include/boost/static_assert.hpp:
/usr/include/boost/limits.hpp:
/usr/include/c++/4.5/limits:
/usr/include/boost/pending/lowest_bit.hpp:
/usr/include/boost/pending/integer_log2.hpp:
/usr/include/boost/shared_ptr.hpp:
/usr/include/boost/smart_ptr/shared_ptr.hpp:
/usr/include/boost/config/no_tr1/memory.hpp:
/usr/include/boost/assert.hpp:
/usr/include/boost/checked_delete.hpp:
/usr/include/boost/throw_exception.hpp:
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
/usr/include/boost/exception/exception.hpp:
/usr/include/boost/current_function.hpp:
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
/usr/include/boost/detail/sp_typeinfo.hpp:
/usr/include/c++/4.5/typeinfo:
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
/usr/include/c++/4.5/functional:
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
/usr/include/boost/memory_order.hpp:
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
/usr/include/boost/scoped_ptr.hpp:
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
/usr/include/stdint.h:
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:

187
moses/mm/custom-pt.cc Normal file
View File

@ -0,0 +1,187 @@
// build a phrase table for the given input
// #include "ug_lexical_phrase_scorer2.h"
#include <stdint.h>
#include <string>
#include <vector>
#include <cassert>
#include <iomanip>
#include <algorithm>
#include "moses/generic/sorting/VectorIndexSorter.h"
#include "moses/generic/sampling/Sampling.h"
#include "moses/generic/file_io/ug_stream.h"
#include <boost/math/distributions/binomial.hpp>
#include <boost/unordered_map.hpp>
#include <boost/foreach.hpp>
#include "ug_mm_ttrack.h"
#include "ug_mm_tsa.h"
#include "tpt_tokenindex.h"
#include "ug_corpus_token.h"
#include "ug_typedefs.h"
#include "tpt_pickler.h"
#include "ug_bitext.h"
#include "ug_lexical_phrase_scorer2.h"
using namespace std;
using namespace ugdiss;
using namespace Moses;
using namespace Moses::bitext;
#define CACHING_THRESHOLD 1000
#define lbop boost::math::binomial_distribution<>::find_lower_bound_on_p
size_t mctr=0,xctr=0;
typedef L2R_Token<SimpleWordId> Token;
typedef mmBitext<Token> mmbitext;
mmbitext bt;
float lbsmooth = .005;
PScorePfwd<Token> calc_pfwd;
PScorePbwd<Token> calc_pbwd;
PScoreLex<Token> calc_lex;
PScoreWP<Token> apply_wp;
vector<float> fweights;
void
nbest_phrasepairs(uint64_t const pid1,
pstats const& ps,
vector<PhrasePair> & nbest)
{
boost::unordered_map<uint64_t,jstats>::const_iterator m;
vector<size_t> idx(nbest.size());
size_t i=0;
for (m = ps.trg.begin();
m != ps.trg.end() && i < nbest.size();
++m)
{
// cout << m->second.rcnt() << " " << ps.good << endl;
if ((m->second.rcnt() < 3) && (m->second.rcnt() * 100 < ps.good))
continue;
nbest[i].init(pid1,ps,5);
nbest[i].update(m->first,m->second);
calc_pfwd(bt, nbest[i]);
calc_pbwd(bt, nbest[i]);
calc_lex(bt, nbest[i]);
apply_wp(bt, nbest[i]);
nbest[i].eval(fweights);
idx[i] = i;
++i;
}
// cout << i << " " << nbest.size() << endl;
if (i < nbest.size())
{
// cout << "Resizing from " << nbest.size() << " to " << i << endl;
nbest.resize(i);
idx.resize(i);
}
VectorIndexSorter<PhrasePair> sorter(nbest,greater<PhrasePair>());
if (m != ps.trg.end())
{
make_heap(idx.begin(),idx.end(),sorter);
PhrasePair cand;
cand.init(pid1,ps,5);
for (; m != ps.trg.end(); ++m)
{
if ((m->second.rcnt() < 3) && (m->second.rcnt() * 100 < ps.good))
continue;
cand.update(m->first,m->second);
calc_pfwd(bt, cand);
calc_pbwd(bt, cand);
calc_lex(bt, cand);
apply_wp(bt, cand);
cand.eval(fweights);
if (cand < nbest[idx[0]]) continue;
pop_heap(idx.begin(),idx.end(),sorter);
nbest[idx.back()] = cand;
push_heap(idx.begin(),idx.end(),sorter);
}
}
sort(nbest.begin(),nbest.end(),greater<PhrasePair>());
}
int main(int argc, char* argv[])
{
// assert(argc == 4);
#if 0
string base = argv[1];
string L1 = argv[2];
string L2 = argv[3];
size_t max_samples = argc > 4 ? atoi(argv[4]) : 0;
#else
string base = "/fs/syn5/germann/exp/sapt/crp/trn/mm/";
string L1 = "de";
string L2 = "en";
size_t max_samples = argc > 1 ? atoi(argv[1]) : 1000;
#endif
char c = *base.rbegin();
if (c != '/' && c != '.')
base += ".";
fweights.resize(5,.25);
fweights[0] = 1;
bt.open(base,L1,L2);
bt.setDefaultSampleSize(max_samples);
size_t i;
i = calc_pfwd.init(0,.05);
i = calc_pbwd.init(i,.05);
i = calc_lex.init(i,base+L1+"-"+L2+".lex");
i = apply_wp.init(i);
string line;
while (getline(cin,line))
{
vector<id_type> snt;
bt.V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
TSA<Token>::tree_iterator m(bt.I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
bt.prep(m);
}
// continue;
for (size_t i = 0; i < snt.size(); ++i)
{
TSA<Token>::tree_iterator m(bt.I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
{
uint64_t spid = m.getPid();
sptr<pstats> s = bt.lookup(m);
for (size_t j = i; j <= k; ++j)
cout << (*bt.V1)[snt[j]] << " ";
cout << s->good << "/"
<< s->sample_cnt << "/"
<< s->raw_cnt << endl;
// vector<PhrasePair> nbest(min(s->trg.size(),size_t(20)));
vector<PhrasePair> nbest(s->trg.size());
nbest_phrasepairs(spid, *s, nbest);
BOOST_FOREACH(PhrasePair const& pp, nbest)
{
uint32_t sid,off,len;
parse_pid(pp.p2,sid,off,len);
uint32_t stop = off + len;
// cout << sid << " " << off << " " << len << endl;
Token const* o = bt.T2->sntStart(sid);
cout << " " << setw(6) << pp.score << " ";
for (uint32_t i = off; i < stop; ++i)
cout << (*bt.V2)[o[i].id()] << " ";
cout << pp.joint << "/"
<< pp.raw1 << "/"
<< pp.raw2 << " |";
BOOST_FOREACH(float f, pp.fvals)
cout << " " << f;
cout << endl;
}
}
}
}
exit(0);
}

192
moses/mm/mmlex-build.cc Normal file
View File

@ -0,0 +1,192 @@
// -*- c++ -*-
// Program to extract word cooccurrence counts from a memory-mapped word-aligned bitext
// stores the counts lexicon in the format for mm2dTable<uint32_t> (ug_mm_2d_table.h)
// (c) 2010-2012 Ulrich Germann
#include <queue>
#include <iomanip>
#include <vector>
#include <iterator>
#include <sstream>
#include <boost/program_options.hpp>
#include <boost/dynamic_bitset.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/foreach.hpp>
#include <boost/math/distributions/binomial.hpp>
#include "moses/generic/program_options/ug_get_options.h"
// #include "ug_translation_finder.h"
// #include "ug_sorters.h"
// #include "ug_corpus_sampling.h"
#include "ug_mm_2d_table.h"
#include "ug_mm_ttrack.h"
#include "ug_corpus_token.h"
using namespace std;
using namespace ugdiss;
using namespace boost::math;
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> LEX_t;
typedef SimpleWordId Token;
vector<uint32_t> m1; // marginals L1
vector<uint32_t> m2; // marginals L2
id_type first_rare_id=500;
vector<vector<uint32_t> > JFREQ; // joint count table for frequent L1 words
vector<map<id_type,uint32_t> > JRARE; // joint count table for rare L1 words
mmTtrack<Token> T1,T2;
mmTtrack<char> Tx;
TokenIndex V1,V2;
string bname,cfgFile,L1,L2,oname;
// DECLARATIONS
void interpret_args(int ac, char* av[]);
void
processSentence(id_type sid)
{
Token const* s1 = T1.sntStart(sid);
Token const* s2 = T2.sntStart(sid);
char const* p = Tx.sntStart(sid);
char const* q = Tx.sntEnd(sid);
ushort r,c;
bitvector check1(T1.sntLen(sid)), check2(T2.sntLen(sid));
check1.set();
check2.set();
// count links
while (p < q)
{
p = binread(p,r);
p = binread(p,c);
check1.reset(r);
check2.reset(c);
id_type id1 = (s1+r)->id();
if (id1 < first_rare_id) JFREQ[id1][(s2+c)->id()]++;
else JRARE[id1][(s2+c)->id()]++;
}
// count unaliged words
for (size_t i = check1.find_first(); i < check1.size(); i = check1.find_next(i))
{
id_type id1 = (s1+i)->id();
if (id1 < first_rare_id) JFREQ[id1][0]++;
else JRARE[id1][0]++;
}
for (size_t i = check2.find_first(); i < check2.size(); i = check2.find_next(i))
JFREQ[0][(s2+i)->id()]++;
}
void
makeTable(string ofname)
{
ofstream out(ofname.c_str());
filepos_type idxOffset=0;
m1.resize(max(first_rare_id,V1.getNumTokens()),0);
m2.resize(V2.getNumTokens(),0);
JFREQ.resize(first_rare_id,vector<uint32_t>(m2.size(),0));
JRARE.resize(m1.size());
for (size_t sid = 0; sid < T1.size(); ++sid)
processSentence(sid);
vector<id_type> index(V1.getNumTokens()+1,0);
numwrite(out,idxOffset); // blank for the time being
numwrite(out,id_type(m1.size()));
numwrite(out,id_type(m2.size()));
id_type cellCount=0;
id_type stop = min(first_rare_id,id_type(m1.size()));
for (id_type id1 = 0; id1 < stop; ++id1)
{
index[id1] = cellCount;
vector<uint32_t> const& v = JFREQ[id1];
for (id_type id2 = 0; id2 < id_type(v.size()); ++id2)
{
if (!v[id2]) continue;
cellCount++;
numwrite(out,id2);
out.write(reinterpret_cast<char const*>(&v[id2]),sizeof(uint32_t));
m1[id1] += v[id2];
m2[id2] += v[id2];
}
}
for (id_type id1 = stop; id1 < id_type(m1.size()); ++id1)
{
index[id1] = cellCount;
map<id_type,uint32_t> const& M = JRARE[id1];
for (map<id_type,uint32_t>::const_iterator m = M.begin(); m != M.end(); ++m)
{
if (m->second == 0) continue;
cellCount++;
numwrite(out,m->first);
out.write(reinterpret_cast<char const*>(&m->second),sizeof(float));
m1[id1] += m->second;
m2[m->first] += m->second;
}
}
index[m1.size()] = cellCount;
idxOffset = out.tellp();
for (size_t i = 0; i < index.size(); ++i)
numwrite(out,index[i]);
out.write(reinterpret_cast<char const*>(&m1[0]),m1.size()*sizeof(float));
out.write(reinterpret_cast<char const*>(&m2[0]),m2.size()*sizeof(float));
// re-write the file header
out.seekp(0);
numwrite(out,idxOffset);
out.close();
}
int
main(int argc, char* argv[])
{
interpret_args(argc,argv);
char c = *bname.rbegin();
if (c != '/' && c != '.') bname += '.';
T1.open(bname+L1+".mct");
T2.open(bname+L2+".mct");
Tx.open(bname+L1+"-"+L2+".mam");
V1.open(bname+L1+".tdx");
V2.open(bname+L2+".tdx");
makeTable(oname);
exit(0);
}
void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
po::variables_map vm;
po::options_description o("Options");
po::options_description h("Hidden Options");
po::positional_options_description a;
o.add_options()
("help,h", "print this message")
("cfg,f", po::value<string>(&cfgFile),"config file")
("oname,o", po::value<string>(&oname),"output file name")
;
h.add_options()
("bname", po::value<string>(&bname), "base name")
("L1", po::value<string>(&L1),"L1 tag")
("L2", po::value<string>(&L2),"L2 tag")
;
a.add("bname",1);
a.add("L1",1);
a.add("L2",1);
get_options(ac,av,h.add(o),a,vm,"cfg");
if (vm.count("help") || bname.empty() || oname.empty())
{
cout << "usage:\n\t" << av[0] << " <basename> <L1 tag> <L2 tag> -o <output file>\n" << endl;
cout << o << endl;
exit(0);
}
}

494
moses/mm/mtt-build.cc Normal file
View File

@ -0,0 +1,494 @@
// -*- c++ -*-
// Converts a corpus in text format (plain text, one centence per line) or
// conll format or treetagger output format (which one is automatically
// recognized based on the number of fields per line) into memory-mapped
// format. (c) 2007-2013 Ulrich Germann
#include <boost/program_options.hpp>
#include <boost/program_options/options_description.hpp>
#include <boost/program_options/parsers.hpp>
#include <boost/program_options/variables_map.hpp>
#include <boost/iostreams/device/mapped_file.hpp>
#include <iostream>
#include <fstream>
#include <sstream>
#include <iomanip>
#include <vector>
#include <string>
#include <sys/types.h>
#include <sys/wait.h>
#include "ug_conll_record.h"
#include "tpt_tokenindex.h"
#include "ug_mm_ttrack.h"
#include "tpt_pickler.h"
#include "ug_deptree.h"
#include "moses/generic/sorting/VectorIndexSorter.h"
#include "ug_im_tsa.h"
using namespace std;
using namespace ugdiss;
using namespace Moses;
namespace po=boost::program_options;
int with_pfas;
int with_dcas;
int with_sfas;
bool incremental = false; // build / grow vocabs automatically
bool is_conll = false; // text or conll format?
bool quiet = false; // no progress reporting
string vocabBase; // base name for existing vocabs that should be used
string baseName; // base name for all files
string tmpFile, mttFile; /* name of temporary / actual track file
* (.mtt for Conll format, .mct for plain text)
*/
string UNK;
TokenIndex SF; // surface form
TokenIndex LM; // lemma
TokenIndex PS; // part of speech
TokenIndex DT; // dependency type
void interpret_args(int ac, char* av[]);
inline uchar rangeCheck(int p, int limit) { return p < limit ? p : 1; }
id_type
get_id(TokenIndex const& T, string const& w)
{
id_type ret = T[w];
if (ret == 1 && w != UNK)
{
cerr << "Warning! Unkown vocabulary item '" << w << "', but "
<< "incremental mode (-i) is not set." << endl;
assert(0);
}
return ret;
}
void
open_vocab(TokenIndex& T, string fname)
{
if (!access(fname.c_str(), F_OK))
{
T.open(fname,UNK);
assert(T[UNK] == 1);
}
else T.setUnkLabel(UNK);
if (incremental) T.setDynamic(true);
assert(T["NULL"] == 0);
assert(T[UNK] == 1);
}
void
ini_cnt_vec(TokenIndex const& T, vector<pair<string,size_t> > & v)
{
v.resize(T.totalVocabSize());
for (size_t i = 0; i < T.totalVocabSize(); ++i)
{
v[i].first = T[i];
v[i].second = 0;
}
}
void
write_tokenindex(string fname, TokenIndex& T, vector<id_type> const& n2o)
{
if (!quiet) cerr << "Writing " << fname << endl;
vector<id_type> o2n(n2o.size());
for (id_type i = 0; i < n2o.size(); ++i) o2n[n2o[i]] = i;
vector<pair<string,uint32_t> > v(n2o.size());
for (id_type i = 0; i < n2o.size(); ++i)
{
v[i].first = T[n2o[i]];
v[i].second = i;
}
T.close();
sort(v.begin(),v.end());
write_tokenindex_to_disk(v, fname, UNK);
}
void init(int argc, char* argv[])
{
interpret_args(argc,argv);
if (is_conll)
{
open_vocab(SF, vocabBase+".tdx.sfo"); // surface form
open_vocab(LM, vocabBase+".tdx.lem"); // lemma
open_vocab(PS, vocabBase+".tdx.pos"); // part-of-speech
open_vocab(DT, vocabBase+".tdx.drl"); // dependency type
}
else open_vocab(SF, vocabBase+".tdx"); // surface form
}
void fill_rec(Conll_Record& rec, vector<string> const& w)
{
if (w.size() == 3) // treetagger output
{
rec.sform = get_id(SF, w[0]);
rec.lemma = get_id(LM, w[2] == "<UNKNOWN>" ? w[0] : w[2]);
rec.majpos = rangeCheck(get_id(PS, w[1]), 256);
rec.minpos = rangeCheck(get_id(PS, w[1]), 256);
rec.dtype = 0;
rec.parent = -1;
}
else if (w.size() >= 8) // CONLL format
{
int id = atoi(w[0].c_str());
int gov = atoi(w[6].c_str());
rec.sform = get_id(SF, w[1]);
rec.lemma = get_id(LM, w[2]);
rec.majpos = rangeCheck(get_id(PS, w[3]), 256);
rec.minpos = rangeCheck(get_id(PS, w[4]), 256);
rec.dtype = get_id(DT, w[7]);
rec.parent = gov ? gov - id : 0;
}
}
void log_progress(size_t ctr)
{
if (ctr % 100000 == 0)
{
if (ctr) cerr << endl;
cerr << setw(12) << ctr / 1000 << "K sentences processed ";
}
else if (ctr % 10000 == 0)
{
cerr << ".";
}
}
size_t
process_plain_input(ostream& out, vector<id_type> & s_index)
{
id_type totalWords = 0;
string line,w;
while (getline(cin,line))
{
istringstream buf(line);
if (!quiet) log_progress(s_index.size());
s_index.push_back(totalWords);
while (buf>>w)
{
numwrite(out,get_id(SF,w));
++totalWords;
}
}
s_index.push_back(totalWords);
return totalWords;
}
size_t
process_tagged_input(ostream& out,
vector<id_type> & s_index,
vector<id_type> & p_index)
{
string line;
Conll_Record rec;
bool new_sent = true;
bool new_par = true;
id_type totalWords = 0;
while (getline(cin,line))
{
vector<string> w; string f; istringstream buf(line);
while (buf>>f) w.push_back(f);
if (w.size() == 0 || (w[0].size() >= 4 && w[0].substr(0,4) == "SID="))
new_sent = true;
else if (w.size() == 1 && w[0] == "<P>")
new_par = new_sent = true;
if (w.size() < 3) continue;
if (!quiet && new_sent) log_progress(s_index.size());
if (new_sent) { s_index.push_back(totalWords); new_sent = false; }
if (new_par) { p_index.push_back(totalWords); new_par = false; }
fill_rec(rec,w);
out.write(reinterpret_cast<char const*>(&rec),sizeof(rec));
++totalWords;
}
s_index.push_back(totalWords);
return totalWords;
}
size_t
numberize()
{
ofstream out(tmpFile.c_str());
filepos_type startIdx=0;
id_type idxSize=0,totalWords=0;
numwrite(out,startIdx); // place holder, to be filled at the end
numwrite(out,idxSize); // place holder, to be filled at the end
numwrite(out,totalWords); // place holder, to be filled at the end
vector<id_type> s_index, p_index;
if(is_conll)
totalWords = process_tagged_input(out,s_index,p_index);
else
totalWords = process_plain_input(out,s_index);
vector<id_type> const* index = &s_index;
if (p_index.size() && p_index.back())
{
p_index.push_back(totalWords);
index = &p_index;
}
if (!quiet)
cerr << endl << "Writing index ... (" << index->size() << " chunks) ";
startIdx = out.tellp();
for (size_t i = 0; i < index->size(); i++) numwrite(out,(*index)[i]);
out.seekp(0);
idxSize = index->size();
numwrite(out, startIdx);
numwrite(out, idxSize - 1);
numwrite(out, totalWords);
out.close();
if (!quiet) cerr << "done" << endl;
return totalWords;
}
vector<id_type> smap,lmap,pmap,dmap;
void
invert(vector<id_type> const& from, vector<id_type> & to)
{
to.resize(from.size());
for (size_t i = 0 ; i < to.size(); ++i)
to[from[i]] = i;
}
// sorts new items based on occurrence counts but won't reassign
// existing token ids
void
conservative_sort(TokenIndex const & V,
vector<size_t> const & cnt,
vector<id_type> & xmap)
{
xmap.resize(V.totalVocabSize());
for (size_t i = 0; i < xmap.size(); ++i) xmap[i] = i;
VectorIndexSorter<size_t,greater<size_t>, id_type> sorter(cnt);
sort(xmap.begin()+max(id_type(2),V.knownVocabSize()), xmap.end(), sorter);
}
// reassign token ids in the corpus track based on the id map created by
// conservative_sort
void remap()
{
if (!quiet) cerr << "Remapping ids ... ";
filepos_type idxOffset;
id_type totalWords, idxSize;
boost::iostreams::mapped_file mtt(tmpFile);
char const* p = mtt.data();
p = numread(p,idxOffset);
p = numread(p,idxSize);
p = numread(p,totalWords);
if (is_conll)
{
vector<size_t> sf(SF.totalVocabSize(), 0);
vector<size_t> lm(LM.totalVocabSize(), 0);
vector<size_t> ps(PS.totalVocabSize(), 0);
vector<size_t> dt(DT.totalVocabSize(), 0);
Conll_Record* w = reinterpret_cast<Conll_Record*>(const_cast<char*>(p));
for (size_t i = 0; i < totalWords; ++i)
{
++sf.at(w[i].sform);
++lm.at(w[i].lemma);
++ps.at(w[i].majpos);
++ps.at(w[i].minpos);
++dt.at(w[i].dtype);
}
conservative_sort(SF,sf,smap);
conservative_sort(LM,lm,lmap);
conservative_sort(PS,ps,pmap);
conservative_sort(DT,dt,dmap);
vector<id_type> smap_i(smap.size()); invert(smap,smap_i);
vector<id_type> lmap_i(lmap.size()); invert(lmap,lmap_i);
vector<id_type> pmap_i(pmap.size()); invert(pmap,pmap_i);
vector<id_type> dmap_i(dmap.size()); invert(dmap,dmap_i);
for (size_t i = 0; i < totalWords; ++i)
{
w[i].sform = smap_i[w[i].sform];
w[i].lemma = lmap_i[w[i].lemma];
w[i].majpos = pmap_i[w[i].majpos];
w[i].minpos = pmap_i[w[i].minpos];
w[i].dtype = dmap_i[w[i].dtype];
}
}
else
{
vector<size_t> sf(SF.totalVocabSize(), 0);
id_type* w = reinterpret_cast<id_type*>(const_cast<char*>(p));
for (size_t i = 0; i < totalWords; ++i) ++sf.at(w[i]);
conservative_sort(SF,sf,smap);
vector<id_type> smap_i(smap.size()); invert(smap,smap_i);
for (size_t i = 0; i < totalWords; ++i) w[i] = smap_i[w[i]];
}
mtt.close();
if (!quiet) cerr << "done." << endl;
}
void save_vocabs()
{
string vbase = baseName;
if (is_conll)
{
if (SF.totalVocabSize() > SF.knownVocabSize())
write_tokenindex(vbase+".tdx.sfo",SF,smap);
if (LM.totalVocabSize() > LM.knownVocabSize())
write_tokenindex(vbase+".tdx.lem",LM,lmap);
if (PS.totalVocabSize() > PS.knownVocabSize())
write_tokenindex(vbase+".tdx.pos",PS,pmap);
if (DT.totalVocabSize() > DT.knownVocabSize())
write_tokenindex(vbase+".tdx.drl",DT,dmap);
}
else if (SF.totalVocabSize() > SF.knownVocabSize())
write_tokenindex(vbase+".tdx",SF,smap);
}
template<typename Token>
size_t
build_mmTSA(string infile, string outfile)
{
size_t mypid = fork();
if(mypid) return mypid;
mmTtrack<Token> T(infile);
bdBitset filter;
filter.resize(T.size(),true);
imTSA<Token> S(&T,filter,(quiet?NULL:&cerr));
S.save_as_mm_tsa(outfile);
exit(0);
}
bool
build_plaintext_tsas()
{
typedef L2R_Token<SimpleWordId> L2R;
typedef R2L_Token<SimpleWordId> R2L;
size_t c = with_sfas + with_pfas;
if (with_sfas) build_mmTSA<L2R>(tmpFile, baseName + ".sfa");
if (with_pfas) build_mmTSA<R2L>(tmpFile, baseName + ".pfa");
while (c--) wait(NULL);
return true;
}
void build_conll_tsas()
{
string bn = baseName;
string mtt = tmpFile;
size_t c = 3 * (with_sfas + with_pfas + with_dcas);
if (with_sfas)
{
build_mmTSA<L2R_Token<Conll_Sform> >(mtt,bn+".sfa-sform");
build_mmTSA<L2R_Token<Conll_Lemma> >(mtt,bn+".sfa-lemma");
build_mmTSA<L2R_Token<Conll_MinPos> >(mtt,bn+".sfa-minpos");
}
if (with_pfas)
{
build_mmTSA<R2L_Token<Conll_Sform> >(mtt,bn+".pfa-sform");
build_mmTSA<R2L_Token<Conll_Lemma> >(mtt,bn+".pfa-lemma");
build_mmTSA<R2L_Token<Conll_MinPos> >(mtt,bn+".pfa-minpos");
}
if (with_dcas)
{
build_mmTSA<ConllBottomUpToken<Conll_Sform> >(mtt,bn+".dca-sform");
build_mmTSA<ConllBottomUpToken<Conll_Lemma> >(mtt,bn+".dca-lemma");
build_mmTSA<ConllBottomUpToken<Conll_MinPos> >(mtt,bn+".dca-minpos");
}
while (c--) wait(NULL);
}
int main(int argc, char* argv[])
{
init(argc,argv);
numberize();
if (SF.totalVocabSize() > SF.knownVocabSize() ||
LM.totalVocabSize() > LM.knownVocabSize() ||
PS.totalVocabSize() > PS.knownVocabSize() ||
DT.totalVocabSize() > DT.knownVocabSize())
{
remap();
save_vocabs();
}
if (is_conll) build_conll_tsas();
else build_plaintext_tsas();
if (!quiet) cerr << endl;
rename(tmpFile.c_str(),mttFile.c_str());
}
void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
po::options_description o("Options");
o.add_options()
("help,h", "print this message")
("quiet,q", po::bool_switch(&quiet),
"don't print progress information")
("incremental,i", po::bool_switch(&incremental),
"incremental mode; rewrites vocab files!")
("vocab-base,v", po::value<string>(&vocabBase),
"base name of various vocabularies")
("output,o", po::value<string>(&baseName),
"base file name of the resulting file(s)")
("sfa,s", po::value<int>(&with_sfas)->default_value(1),
"also build suffix arrays")
("pfa,p", po::value<int>(&with_pfas)
->default_value(0)->implicit_value(1),
"also build prefix arrays")
("dca,d", po::value<int>(&with_dcas)
->default_value(0)->implicit_value(1),
"also build dependency chain arrays")
("conll,c", po::bool_switch(&is_conll),
"corpus is in CoNLL format (default: plain text)")
("unk,u", po::value<string>(&UNK)->default_value("UNK"),
"label for unknown tokens")
// ("map,m", po::value<string>(&vmap),
// "map words to word classes for indexing")
;
po::options_description h("Hidden Options");
h.add_options()
;
h.add(o);
po::positional_options_description a;
a.add("output",1);
po::store(po::command_line_parser(ac,av)
.options(h)
.positional(a)
.run(),vm);
po::notify(vm);
if (vm.count("help") || !vm.count("output"))
{
cout << "\nusage:\n\t cat <corpus> | " << av[0]
<< " [options] <output .mtt file>" << endl;
cout << o << endl;
exit(0);
}
mttFile = baseName + (is_conll ? ".mtt" : ".mct");
tmpFile = mttFile + "_";
}

156
moses/mm/mtt-dump.cc Normal file
View File

@ -0,0 +1,156 @@
// -*- c++ -*-
// (c) 2008-2010 Ulrich Germann
#include <boost/program_options.hpp>
#include <iomanip>
#include "tpt_typedefs.h"
#include "ug_mm_ttrack.h"
#include "tpt_tokenindex.h"
#include "ug_deptree.h"
#include "ug_corpus_token.h"
using namespace std;
using namespace ugdiss;
namespace po = boost::program_options;
string bname,mtt,mct;
vector<string> range;
typedef L2R_Token<Conll_Sform> Token;
TokenIndex SF,LM,PS,DT;
mmTtrack<Token> MTT;
mmTtrack<SimpleWordId> MCT;
bool sform;
bool have_mtt, have_mct;
bool with_sids;
void
interpret_args(int ac, char* av[])
{
po::variables_map vm;
po::options_description o("Options");
o.add_options()
("help,h", "print this message")
("numbers,n", po::bool_switch(&with_sids), "print sentence ids as first token")
("sform,s", po::bool_switch(&sform), "sform only")
;
po::options_description h("Hidden Options");
h.add_options()
("bname", po::value<string>(&bname), "base name")
("range", po::value<vector<string> >(&range), "range")
;
po::positional_options_description a;
a.add("bname",1);
a.add("range",-1);
po::store(po::command_line_parser(ac,av)
.options(h.add(o))
.positional(a)
.run(),vm);
po::notify(vm); // IMPORTANT
if (vm.count("help") || bname.empty())
{
cout << "usage:\n\t"
<< av[0] << " track name [<range>]\n"
<< endl;
cout << o << endl;
exit(0);
}
mtt = bname+".mtt";
mct = bname+".mct";
}
void
printRangeMTT(size_t start, size_t stop)
{
for (;start < stop; start++)
{
size_t i = 0;
Token const* t = MTT.sntStart(start);
Token const* e = MTT.sntEnd(start);
if (with_sids) cout << start << " ";
for (;t < e; ++t)
{
#if 0
uchar const* x = reinterpret_cast<uchar const*>(t);
cout << *reinterpret_cast<id_type const*>(x) << " ";
cout << *reinterpret_cast<id_type const*>(x+4) << " ";
cout << int(*(x+8)) << " ";
cout << int(*(x+9)) << " ";
cout << *reinterpret_cast<short const*>(x+10) << endl;
#endif
if (!sform)
{
cout << setw(2) << right << ++i << " ";
cout << setw(30) << right << SF[t->sform] << " ";
cout << setw(4) << right << PS[t->majpos] << " ";
cout << setw(4) << right << PS[t->minpos] << " ";
cout << setw(30) << left << LM[t->lemma] << " ";
cout << i+t->parent << " ";
cout << DT[t->dtype] << endl;
}
else cout << SF[t->id()] << " ";
}
cout << endl;
}
}
void
printRangeMCT(size_t start, size_t stop)
{
for (;start < stop; start++)
{
SimpleWordId const* t = MCT.sntStart(start);
SimpleWordId const* e = MCT.sntEnd(start);
if (with_sids) cout << start << " ";
while (t < e) cout << SF[(t++)->id()] << " ";
cout << endl;
}
}
int
main(int argc, char*argv[])
{
interpret_args(argc,argv);
have_mtt = !access(mtt.c_str(),F_OK);
have_mct = !have_mtt && !access(mct.c_str(),F_OK);
if (!have_mtt && !have_mct)
{
cerr << "FATAL ERROR: neither " << mtt << " nor " << mct << " exit." << endl;
exit(1);
}
if (have_mtt)
{
SF.open(bname+".tdx.sfo"); SF.iniReverseIndex();
LM.open(bname+".tdx.lem"); LM.iniReverseIndex();
PS.open(bname+".tdx.pos"); PS.iniReverseIndex();
DT.open(bname+".tdx.drl"); DT.iniReverseIndex();
MTT.open(mtt);
}
else
{
sform = true;
SF.open(bname+".tdx"); SF.iniReverseIndex();
MCT.open(mct);
}
if (!range.size())
have_mtt ? printRangeMTT(0, MTT.size()) : printRangeMCT(0, MCT.size());
else
{
for (size_t i = 0; i < range.size(); i++)
{
istringstream buf(range[i]);
size_t first,last; uchar c;
buf>>first;
if (buf.peek() == '-') buf>>c>>last;
else last = first;
if (have_mtt && last < MTT.size())
printRangeMTT(first,last+1);
else if (last < MCT.size())
printRangeMCT(first,last+1);
}
}
}

77
moses/mm/mtt.count.cc Normal file
View File

@ -0,0 +1,77 @@
// build a phrase table for the given input
#include "ug_mm_ttrack.h"
#include "ug_mm_tsa.h"
#include "tpt_tokenindex.h"
#include "ug_corpus_token.h"
#include <string>
#include <vector>
#include <cassert>
#include <boost/unordered_map.hpp>
#include <boost/foreach.hpp>
#include <iomanip>
#include "ug_typedefs.h"
#include "tpt_pickler.h"
#include "moses/generic/sorting/VectorIndexSorter.h"
#include "moses/generic/sampling/Sampling.h"
#include "moses/generic/file_io/ug_stream.h"
#include <algorithm>
#include "moses/generic/program_options/ug_get_options.h"
using namespace std;
using namespace ugdiss;
using namespace Moses;
typedef L2R_Token<SimpleWordId> Token;
typedef mmTSA<Token>::tree_iterator iter;
typedef boost::unordered_map<pair<size_t,size_t>,size_t> phrase_counter_t;
#define CACHING_THRESHOLD 1000
mmTtrack<Token> T; // token tracks
TokenIndex V; // vocabs
mmTSA<Token> I; // suffix arrays
void interpret_args(int ac, char* av[]);
string bname;
bool echo;
int main(int argc, char* argv[])
{
interpret_args(argc,argv);
T.open(bname+".mct");
V.open(bname+".tdx"); V.iniReverseIndex();
I.open(bname+".sfa",&T);
string line;
while (getline(cin,line))
{
vector<id_type> phr;
V.fillIdSeq(line,phr);
TSA<Token>::tree_iterator m(&I);
size_t i = 0;
while (i < phr.size() && m.extend(phr[i])) ++i;
if (echo) cout << line << ": ";
if (i < phr.size()) cout << 0 << endl;
else cout << m.rawCnt() << endl;
}
exit(0);
}
void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
po::variables_map vm;
po::options_description o("Options");
po::options_description h("Hidden Options");
po::positional_options_description a;
o.add_options()
("help,h", "print this message")
("echo,e", po::bool_switch(&echo), "repeat lookup phrases")
;
h.add_options()
("bname", po::value<string>(&bname), "base name")
;
a.add("bname",1);
get_options(ac,av,h.add(o),a,vm);
}

66
moses/mm/num_read_write.h Normal file
View File

@ -0,0 +1,66 @@
// -*- c++ -*-
// (c) 2006,2007,2008 Ulrich Germann
#ifndef __num_read_write_hh
#define __num_read_write_hh
#include <stdint.h>
#include <iostream>
#include <endian.h>
#include <byteswap.h>
#include "tpt_typedefs.h"
namespace ugdiss {
template<typename uintNumber>
void
numwrite(std::ostream& out, uintNumber const& x)
{
#if __BYTE_ORDER == __BIG_ENDIAN
uintNumber y;
switch (sizeof(uintNumber))
{
case 2: y = bswap_16(x); break;
case 4: y = bswap_32(x); break;
case 8: y = bswap_64(x); break;
default: y = x;
}
out.write(reinterpret_cast<char*>(&y),sizeof(y));
#else
out.write(reinterpret_cast<char const*>(&x),sizeof(x));
#endif
}
template<typename uintNumber>
void
numread(std::istream& in, uintNumber& x)
{
in.read(reinterpret_cast<char*>(&x),sizeof(uintNumber));
#if __BYTE_ORDER == __BIG_ENDIAN
switch (sizeof(uintNumber))
{
case 2: x = bswap_16(x); break;
case 4: x = bswap_32(x); break;
case 8: x = bswap_64(x); break;
default: break;
}
#endif
}
template<typename uintNumber>
char const*
numread(char const* src, uintNumber& x)
{
// ATTENTION: THIS NEEDS TO BE VERIFIED FOR BIG-ENDIAN MACHINES!!!
x = *reinterpret_cast<uintNumber const*>(src);
#if __BYTE_ORDER == __BIG_ENDIAN
switch (sizeof(uintNumber))
{
case 2: x = bswap_16(x); break;
case 4: x = bswap_32(x); break;
case 8: x = bswap_64(x); break;
default: break;
}
#endif
return src+sizeof(uintNumber);
}
} // end of namespace ugdiss
#endif

View File

@ -0,0 +1,166 @@
#ifndef __ug_bitext_base_h
#define __ug_bitext_base_h
// Abstract word-aligned bitext class
// Written by Ulrich Germann
#include <string>
#include <vector>
#include <cassert>
#include <iomanip>
#include <algorithm>
#include <boost/unordered_map.hpp>
#include <boost/foreach.hpp>
#include <boost/thread.hpp>
#include "moses/generic/sorting/VectorIndexSorter.h"
#include "moses/generic/sampling/Sampling.h"
#include "moses/generic/file_io/ug_stream.h"
#include "ug_typedefs.h"
#include "ug_mm_ttrack.h"
#include "ug_mm_tsa.h"
#include "tpt_tokenindex.h"
#include "ug_corpus_token.h"
#include "tpt_pickler.h"
using namespace ugdiss;
using namespace std;
namespace Moses {
typedef L2R_Token<SimpleWordId> Token;
typedef mmTSA<Token>::tree_iterator iter;
class bitext_base
{
public:
typedef mmTSA<Token>::tree_iterator iter;
class pstats; // one-sided phrase statistics
class jstats; // phrase pair ("joint") statistics
class agenda
{
boost::mutex lock;
boost::condition_variable ready;
class job;
class worker;
list<job> joblist;
vector<sptr<boost::thread> > workers;
bool shutdown;
size_t doomed;
public:
bitext_base const& bitext;
agenda(bitext_base const& bitext);
~agenda();
void add_workers(int n);
sptr<pstats> add_job(mmbitext::iter const& phrase,
size_t const max_samples);
bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
bool & fwd, sptr<bitext_base::pstats> & stats);
};
// stores the list of unfinished jobs;
// maintains a pool of workers and assigns the jobs to them
agenda* ag;
mmTtrack<char> Tx; // word alignments
mmTtrack<Token> T1,T2; // token tracks
TokenIndex V1,V2; // vocabs
mmTSA<Token> I1,I2; // suffix arrays
/// given the source phrase sid[start:stop]
// find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
// alignments in *core_alignment. If /flip/, source phrase is
// L2.
bool
find_trg_phr_bounds
(size_t const sid, size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
sptr<pstats>
prep2(iter const& phrase);
public:
mmbitext();
~mmbitext();
void open(string const base, string const L1, string const L2);
sptr<pstats> lookup(iter const& phrase);
void prep(iter const& phrase);
};
// "joint" (i.e., phrase pair) statistics
class
mmbitext::
jstats
{
uint32_t my_rcnt; // unweighted count
float my_wcnt; // weighted count
vector<pair<size_t, vector<uchar> > > my_aln;
boost::mutex lock;
public:
jstats();
jstats(jstats const& other);
uint32_t rcnt() const;
float wcnt() const;
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a);
};
struct
mmbitext::
pstats
{
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs;
// size_t snt_cnt;
// size_t sample_snt;
size_t in_progress; // keeps track of how many threads are currently working on this
boost::unordered_map<uint64_t, jstats> trg;
pstats();
// vector<phrase> nbest;
// void select_nbest(size_t const N=10);
void release();
void register_worker();
void add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a);
};
class
mmbitext::
agenda::
worker
{
agenda& ag;
public:
worker(agenda& a);
void operator()();
};
class
mmbitext::
agenda::
job
{
public:
char const* next;
char const* stop;
size_t max_samples;
size_t ctr;
size_t len;
bool fwd;
sptr<mmbitext::pstats> stats;
bool step(uint64_t & sid, uint64_t & offset);
};
}
#endif

291
moses/mm/symal2mam.cc Normal file
View File

@ -0,0 +1,291 @@
// -*- c++ -*-
// program to convert GIZA-style alignments into memory-mapped format
// (c) 2010 Ulrich Germann
// Reads from stdin a file with alternating lines: sentence lengths and symal output.
// We need the sentence lenghts for sanity checks, because GIZA alignment might skip
// sentences. If --skip, we skip such sentence pairs, otherwise, we leave the word
// alignment matrix blank.
#include "ug_mm_ttrack.h"
#include "ug_deptree.h"
#include "tpt_tokenindex.h"
#include "tpt_pickler.h"
#include "moses/generic/program_options/ug_get_options.h"
#include "moses/generic/file_io/ug_stream.h"
#include <iostream>
#include <string>
#include <sstream>
#include <boost/program_options.hpp>
#include <boost/scoped_ptr.hpp>
#include "util/exception.hh"
#include "util/check.hh"
// NOTE TO SELF:
/* Program to filter out sentences that GIZA will skip or truncate,
* i.e. sentences longer than 100 words or sentence pairs with a length
*/
using namespace std;
using namespace ugdiss;
ofstream t1out,t2out,mam;
int len1=0,len2=0;
size_t lineCtr=0,sid=0;
bool conll=false;
bool skip=false;
bool debug=false;
TokenIndex V1;
string mtt1name,mtt2name,o1name,o2name,mamname,cfgFile;
string dataFormat,A3filename;
void
interpret_args(int ac, char* av[])
{
namespace po=boost::program_options;
po::variables_map vm;
po::options_description o("Options");
po::options_description h("Hidden Options");
po::positional_options_description a;
o.add_options()
("help,h", "print this message")
("cfg,f", po::value<string>(&cfgFile),"config file")
("a3", po::value<string>(&A3filename), "name of A3 file (for sanity checks)")
("o1", po::value<string>(&o1name), "name of output file for track 1")
("o2", po::value<string>(&o2name), "name of output file for track 2")
("skip", "skip sentence pairs without word alignment (requires --o1 and --o2)")
("debug,d", "debug mode")
("t1", po::value<string>(&mtt1name), "file name of L1 mapped token track")
("t2", po::value<string>(&mtt2name), "file name of L2 mapped token track")
("format,F", po::value<string>(&dataFormat)->default_value("plain"), "data format (plain or conll)")
;
h.add_options()
("mamname", po::value<string>(&mamname), "name of output file for mam")
;
a.add("mamname",1);
get_options(ac,av,h.add(o),a,vm,"cfg");
skip = vm.count("skip");
debug = vm.count("debug");
if (vm.count("help") || mamname.empty())
{
cout << "usage:\n"
<< "\t\n"
<< "\t ... | " << av[0]
<< " <.mam file> \n" << endl;
cout << o << endl;
cout << "If an A3 file is given (as produced by (m)giza), symal2mam performs\n"
<< "a sanity check to make sure that sentence lengths match." << endl;
exit(0);
}
conll = dataFormat == "conll";
if (!conll and dataFormat != "plain")
{
cerr << "format must be 'conll' or 'plain'" << endl;
exit(1);
}
if (skip && (o1name.empty() || o2name.empty()))
{
cerr << "--skip requires --o1 and --o2" << endl;
exit(1);
}
}
template<typename track_t>
void
copySentence(track_t const& T, size_t sid, ostream& dest)
{
char const* a = reinterpret_cast<char const*>(T.sntStart(sid));
char const* z = reinterpret_cast<char const*>(T.sntEnd(sid));
dest.write(a,z-a);
}
size_t
procSymalLine(string const& line, ostream& out)
{
ushort a,b; char dash;
istringstream buf(line);
while (buf>>a>>dash>>b)
{
if (debug && ((len1 && a >= len1) || (len2 && b >= len2)))
{
cerr << a << "-" << b << " " << len1 << "/" << len2 << endl;
}
assert(len1 == 0 || a<len1);
assert(len2 == 0 || b<len2);
binwrite(out,a);
binwrite(out,b);
}
return out.tellp();
}
void finiMAM(ofstream& out, vector<id_type>& idx, id_type numTok)
{
id_type offset = sizeof(filepos_type)+2*sizeof(id_type);
filepos_type idxStart = out.tellp();
for (vector<id_type>::iterator i = idx.begin(); i != idx.end(); ++i)
numwrite(out,*i-offset);
out.seekp(0);
numwrite(out,idxStart);
numwrite(out,id_type(idx.size()-1));
numwrite(out,numTok);
out.close();
}
void
finalize(ofstream& out, vector<id_type> const& idx, id_type tokenCount)
{
id_type idxSize = idx.size();
filepos_type idxStart = out.tellp();
for (size_t i = 0; i < idx.size(); ++i)
numwrite(out,idx[i]);
out.seekp(0);
numwrite(out,idxStart);
numwrite(out,idxSize-1);
numwrite(out,tokenCount);
out.close();
}
bool getCheckValues(istream& in, int& check1, int& check2)
{
if (A3filename.empty()) return true;
string line; string w;
getline(in,line);
size_t p1 = line.find("source length ") + 14;
if (p1 >= line.size()) return false;
size_t p2 = line.find("target length ",p1);
if (p2 >= line.size()) return false;
// cout << line << endl;
// cout << line.substr(p1,p2-p1) << endl;
check1 = atoi(line.substr(p1,p2-p1).c_str());
p1 = p2+14;
p2 = line.find("alignment ",p1);
if (p2 >= line.size()) return false;
check2 = atoi(line.substr(p1,p2-p1).c_str());
getline(in,line);
getline(in,line);
return true;
}
void
go()
{
size_t ctr=0;
vector<id_type> idxm;
idxm.reserve(10000000);
idxm.push_back(mam.tellp());
string line;
while(getline(cin,line))
{
idxm.push_back(procSymalLine(line,mam));
if (debug && ++ctr%100000==0)
cerr << ctr/1000 << "K lines processed" << endl;
}
finiMAM(mam,idxm,0);
cout << idxm.size() << endl;
}
template<typename TKN>
void
go(string t1name, string t2name, string A3filename)
{
typedef mmTtrack<TKN> track_t;
track_t T1(t1name),T2(t2name);
filtering_istream A3file; open_input_stream(A3filename,A3file);
string line; int check1=-1,check2=-1;
vector<id_type> idx1(1,0),idx2(1,0),idxm(1,mam.tellp());
size_t tokenCount1=0,tokenCount2=0;
size_t skipCtr=0,lineCtr=0;
if (!getCheckValues(A3file,check1,check2))
UTIL_THROW(util::Exception, "Mismatch in input files!");
for (sid = 0; sid < T1.size(); ++sid)
{
len1 = T1.sntLen(sid);
len2 = T2.sntLen(sid);
if (debug)
cerr << "[" << lineCtr << "] "
<< len1 << " (" << check1 << ") / "
<< len2 << " (" << check2 << ")" << endl;
if ((check1 >=0 && check1!=len1) ||
(check2 >=0 && check2!=len2))
{
if (skip)
{
cerr << "[" << ++skipCtr << "] skipping "
<< check1 << "/" << check2 << " vs. "
<< len1 << "/" << len2
<< " at line " << lineCtr << endl;
}
else
{
idxm.push_back(mam.tellp());
}
if (len1 > 100 || len2 > 100)
{
getline(cin,line);
getCheckValues(A3file,check1,check2);
lineCtr++;
}
continue;
}
if (skip)
{
idx1.push_back(tokenCount1 += len1);
copySentence(T1,sid,t1out);
idx2.push_back(tokenCount2 += len2);
copySentence(T2,sid,t2out);
}
if (!getline(cin,line))
UTIL_THROW(util::Exception, "Too few lines in symal input!");
lineCtr++;
idxm.push_back(procSymalLine(line,mam));
if (debug) cerr << "[" << lineCtr << "] "
<< check1 << " (" << len1 <<") "
<< check2 << " (" << len2 <<") "
<< line << endl;
getCheckValues(A3file,check1,check2);
}
if (skip)
{
finalize(t1out,idx1,tokenCount1);
finalize(t2out,idx2,tokenCount2);
}
finiMAM(mam,idxm,0);
cout << idxm.size() << endl;
}
void
initialize(ofstream& out, string const& fname)
{
out.open(fname.c_str());
numwrite(out,filepos_type(0)); // place holder for index start
numwrite(out,id_type(0)); // place holder for index size
numwrite(out,id_type(0)); // place holder for token count
}
int main(int argc, char* argv[])
{
interpret_args(argc,argv);
if (skip)
{
initialize(t1out,o1name);
initialize(t2out,o2name);
}
initialize(mam,mamname);
if (A3filename.size() == 0)
go();
else if (conll)
go<Conll_Record>(mtt1name,mtt2name,A3filename);
else
go<id_type>(mtt1name,mtt2name,A3filename);
}

405
moses/mm/tpt_pickler.cc Normal file
View File

@ -0,0 +1,405 @@
// -*- c++ -*-
// (c) 2006,2007,2008 Ulrich Germann
#include "tpt_pickler.h"
#include <sys/stat.h>
#include <cassert>
#ifdef CYGWIN
#define stat64 stat
#endif
namespace ugdiss
{
using namespace std;
uint64_t
getFileSize(const std::string& fname)
{
struct stat64 buf;
stat64(fname.c_str(),&buf);
return buf.st_size;
}
template <typename T>
void
binwrite_unsigned_integer(std::ostream& out, T data)
{
char c;
while (data >= 128)
{
out.put(data%128);
data = data >> 7;
}
c = data;
out.put(c|char(-128)); // set the 'stop' bit
}
template<typename T>
void
binread_unsigned_integer(std::istream& in, T& data)
{
char c, mask=127;
in.clear();
in.get(c);
data = c&mask;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 7;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 14;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 21;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 28;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 35;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 42;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 49;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 56;
if (c < 0) return;
in.get(c);
data += T(c&mask) << 63;
}
void
binwrite(std::ostream& out, unsigned char data)
{
binwrite_unsigned_integer(out, data);
}
void
binwrite(std::ostream& out, unsigned short data)
{
binwrite_unsigned_integer(out, data);
}
void
binwrite(std::ostream& out, unsigned long data)
{
binwrite_unsigned_integer(out, data);
}
void
binwrite(std::ostream& out, unsigned long long data)
{
binwrite_unsigned_integer(out, data);
}
#if __WORDSIZE == 64
void
binwrite(std::ostream& out, unsigned int data)
{
binwrite_unsigned_integer(out, data);
}
#else
void
binwrite(std::ostream& out, size_t data)
{
binwrite_unsigned_integer(out, data);
}
#endif
void
binread(std::istream& in, unsigned short& data)
{
assert(sizeof(data)==2);
char c, mask=127;
in.clear();
in.get(c);
data = c&mask;
if (c < 0) return;
in.get(c);
data += uint16_t(c&mask) << 7;
if (c < 0) return;
in.get(c);
data += uint16_t(c&mask) << 14;
}
void
binread(std::istream& in, unsigned int& data)
{
assert(sizeof(data) == 4);
char c, mask=127;
in.clear();
in.get(c);
data = c&mask;
if (c < 0) return;
in.get(c);
data += uint32_t(c&mask) << 7;
if (c < 0) return;
in.get(c);
data += uint32_t(c&mask) << 14;
if (c < 0) return;
in.get(c);
data += uint32_t(c&mask) << 21;
if (c < 0) return;
in.get(c);
data += uint32_t(c&mask) << 28;
}
void
binread(std::istream& in, unsigned long& data)
{
#if __WORDSIZE == 32
assert(sizeof(unsigned long)==4);
#else
assert(sizeof(unsigned long)==8);
#endif
char c, mask=127;
in.get(c);
data = c&mask;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 7;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 14;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 21;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 28;
#if __WORDSIZE == 64
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 35;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 42;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 49;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 56;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 63;
#endif
}
void
binread(std::istream& in, unsigned long long& data)
{
assert(sizeof(unsigned long long)==8);
char c, mask=127;
in.get(c);
data = c&mask;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 7;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 14;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 21;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 28;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 35;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 42;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 49;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 56;
if (c < 0) return;
in.get(c);
data += static_cast<unsigned long long>(c&mask) << 63;
}
// writing and reading strings ...
void
binwrite(std::ostream& out, std::string const& s)
{
size_t len = s.size();
ugdiss::binwrite(out,len);
out.write(s.c_str(),len);
}
void
binread(std::istream& in, std::string& s)
{
size_t len;
ugdiss::binread(in,len);
if (!in) return;
char buf[len+1];
in.read(buf,len);
buf[len] = 0;
s = buf;
}
void
binwrite(std::ostream& out, float x)
{
// IMPORTANT: this is not robust against the big/little endian
// issue.
out.write(reinterpret_cast<char*>(&x),sizeof(float));
}
void
binread(std::istream& in, float& x)
{
// IMPORTANT: this is not robust against the big/little endian
// issue.
in.read(reinterpret_cast<char*>(&x),sizeof(x));
}
template<>
char const*
binread<uint16_t>(char const* p, uint16_t& buf)
{
static char mask = 127;
buf = (*p)&mask;
if (*p++ < 0) return p;
buf += uint16_t((*p)&mask)<<7;
if (*p++ < 0) return p;
buf += uint16_t((*p)&mask)<<14;
#ifndef NDEBUG
assert(*p++ < 0);
#else
++p;
#endif
return p;
}
template<>
char const*
binread<uint32_t>(char const* p, uint32_t& buf)
{
static char mask = 127;
if (*p < 0)
{
buf = (*p)&mask;
return ++p;
}
buf = *p;
if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<7;
return ++p;
}
buf += uint32_t(*p)<<7;
if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<14;
return ++p;
}
buf += uint32_t(*p)<<14;
if (*(++p) < 0)
{
buf += uint32_t((*p)&mask)<<21;
return ++p;
}
buf += uint32_t(*p)<<21;
#ifndef NDEBUG
assert(*(++p) < 0);
#else
++p;
#endif
buf += uint32_t((*p)&mask)<<28;
return ++p;
}
template<>
char const*
binread<filepos_type>(char const* p, filepos_type& buf)
{
static char mask = 127;
if (*p < 0)
{
buf = (*p)&mask;
return ++p;
}
buf = *p;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<7;
return ++p;
}
buf += filepos_type(*p)<<7;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<14;
return ++p;
}
buf += filepos_type(*p)<<14;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<21;
return ++p;
}
buf += filepos_type(*p)<<21;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<28;
return ++p;
}
buf += filepos_type(*p)<<28;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<35;
return ++p;
}
buf += filepos_type(*p)<<35;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<42;
return ++p;
}
buf += filepos_type(*p)<<42;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<49;
return ++p;
}
buf += filepos_type(*p)<<49;
if (*(++p) < 0)
{
buf += filepos_type((*p)&mask)<<56;
return ++p;
}
buf += filepos_type(*p)<<56;
#ifndef NDEBUG
assert(*(++p) < 0);
#else
++p;
#endif
buf += filepos_type((*p)&mask)<<63;
return ++p;
}
template<>
char const*
binread<float>(char const* p, float& buf)
{
buf = *reinterpret_cast<float const*>(p);
return p+sizeof(float);
}
} // end namespace ugdiss

207
moses/mm/tpt_pickler.h Normal file
View File

@ -0,0 +1,207 @@
// -*- c++ -*-
// (c) 2006,2007,2008 Ulrich Germann
#ifndef __Pickler
#define __Pickler
#include<iostream>
#include<string>
#include<vector>
#include<map>
#include "tpt_typedefs.h"
#include "num_read_write.h"
#include <cassert>
namespace ugdiss
{
/// Utility method placed here for lack of a better place
/// @return the size of file fname.
uint64_t getFileSize(const std::string& fname);
/**
* The following functions write and read data in a compact binary
* representation. Write and read errors can be checked directly
* on the ostream object after the function call, so no return value is
* necessary.*/
void binwrite(std::ostream& out, char data);
void binwrite(std::ostream& out, unsigned char data);
void binwrite(std::ostream& out, unsigned short data);
void binwrite(std::ostream& out, unsigned int data);
void binwrite(std::ostream& out, unsigned long data);
void binwrite(std::ostream& out, size_t data);
void binwrite(std::ostream& out, unsigned long long data);
void binwrite(std::ostream& out, std::string const& data);
void binwrite(std::ostream& out, float data);
void binread(std::istream& in, char &data);
void binread(std::istream& in, unsigned char &data);
void binread(std::istream& in, unsigned short &data);
void binread(std::istream& in, unsigned int &data);
void binread(std::istream& in, unsigned long &data);
void binread(std::istream& in, size_t &data);
void binread(std::istream& in, unsigned long long &data);
void binread(std::istream& in, std::string &data);
void binread(std::istream& in, float &data);
std::ostream& write(std::ostream& out, char x);
std::ostream& write(std::ostream& out, unsigned char x);
std::ostream& write(std::ostream& out, short x);
std::ostream& write(std::ostream& out, unsigned short x);
std::ostream& write(std::ostream& out, long x);
std::ostream& write(std::ostream& out, size_t x);
std::ostream& write(std::ostream& out, float x);
std::istream& read(std::istream& in, char& x);
std::istream& read(std::istream& in, unsigned char& x);
std::istream& read(std::istream& in, short& x);
std::istream& read(std::istream& in, unsigned short& x);
std::istream& read(std::istream& in, long& x);
std::istream& read(std::istream& in, size_t& x);
std::istream& read(std::istream& in, float& x);
template<typename WHATEVER>
char const*
binread(char const* p, WHATEVER* buf);
template<typename numtype>
char const*
binread(char const* p, numtype& buf);
template<typename K, typename V>
void binwrite(std::ostream& out, std::pair<K,V> const& data);
template<typename K, typename V>
void binread(std::istream& in, std::pair<K,V>& data);
template<typename K, typename V>
char const* binread(char const* p, std::pair<K,V>& data);
template<typename V>
char const* binread(char const* p, std::vector<V>& v);
template<typename K, typename V>
char const* binread(char const* p, std::pair<K,V>& data)
{
#ifdef VERIFY_TIGHT_PACKING
assert(p);
#endif
p = binread(p,data.first);
p = binread(p,data.second);
return p;
}
template<typename V>
char const* binread(char const* p, std::vector<V>& v)
{
size_t vsize;
#ifdef VERIFY_TIGHT_PACKING
assert(p);
#endif
p = binread(p,vsize);
v.resize(vsize);
for (size_t i = 0; i < vsize; ++i)
p = binread(p,v[i]);
return p;
}
template<typename T>
T read(std::istream& in)
{
T ret;
read(in,ret);
return ret;
}
template<typename T>
T binread(std::istream& in)
{
T ret;
binread(in,ret);
return ret;
}
template<typename T>
void
binwrite(std::ostream& out, std::vector<T> const& data)
{
binwrite(out,data.size());
for (size_t i = 0; i < data.size(); i++)
{ binwrite(out,data[i]); }
}
template<typename T>
void
binread(std::istream& in, std::vector<T>& data)
{
size_t s;
binread(in,s);
data.resize(s);
for (size_t i = 0; i < s; i++)
{ binread(in,data[i]); }
}
template<typename K, typename V>
void
binread(std::istream& in, std::map<K,V>& data)
{
size_t s; K k; V v;
binread(in,s);
data.clear();
// I have no idea why this is necessary, but it is, even when
// /data/ is supposed to be empty
for (size_t i = 0; i < s; i++)
{
binread(in,k);
binread(in,v);
data[k] = v;
// cerr << "* " << i << " " << k << " " << v << endl;
}
}
template<typename K, typename V>
void
binwrite(std::ostream& out, std::map<K,V> const& data)
{
binwrite(out,data.size());
for (typename std::map<K,V>::const_iterator m = data.begin();
m != data.end(); m++)
{
binwrite(out,m->first);
binwrite(out,m->second);
}
}
template<typename K, typename V>
void
binwrite(std::ostream& out, std::pair<K,V> const& data)
{
binwrite(out,data.first);
binwrite(out,data.second);
}
template<typename K, typename V>
void
binread(std::istream& in, std::pair<K,V>& data)
{
binread(in,data.first);
binread(in,data.second);
}
template<typename WHATEVER>
char const*
binread(char const* p, WHATEVER* buf)
{
#ifdef VERIFY_TIGHT_PACKING
assert(p);
#endif
return binread(p,*buf);
}
template<typename numtype>
char const*
binread(char const* p, numtype& buf);
} // end namespace ugdiss
#endif

633
moses/mm/tpt_tightindex.cc Normal file
View File

@ -0,0 +1,633 @@
// -*- c++ -*-
// (c) 2007,2008 Ulrich Germann
/* Functions for writing indices tightly (use only the bytes you need).
* The first bit indicates whether a byte belongs to a key or a value.
* The remaining 7 bits are part of the respective integer value.
* (c) 2007 Ulrich Germann
*/
//
// ugTightIndex.cc
//
// Made by Ulrich Germann
// Login <germann@germann-laptop>
//
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
//
#include <iostream>
#include <assert.h>
#include "tpt_tightindex.h"
namespace ugdiss
{
// std::string bitpattern(unsigned int s)
// {
// std::ostringstream out;
// size_t bit=1;
// for (size_t i = 31; i > 0; i--)
// out << (s&(bit<<i) ? 1 : 0);
// out << ((s&1) ? 1 : 0) ;
// return out.str();
// }
// std::string bitpattern(char c)
// {
// std::ostringstream out;
// out << ((c&-128) ? 1 : 0);
// // out << ".";
// out << ((c&64) ? 1 : 0);
// out << ((c&32) ? 1 : 0);
// out << ((c&16) ? 1 : 0);
// out << ((c&8) ? 1 : 0);
// out << ((c&4) ? 1 : 0);
// out << ((c&2) ? 1 : 0);
// out << ((c&1) ? 1 : 0);
// return out.str();
// }
// std::string bitpattern(unsigned char c)
// {
// std::ostringstream out;
// out << ((c&128) ? 1 : 0);
// out << ((c&64) ? 1 : 0);
// out << ((c&32) ? 1 : 0);
// out << ((c&16) ? 1 : 0);
// out << ((c&8) ? 1 : 0);
// out << ((c&4) ? 1 : 0);
// out << ((c&2) ? 1 : 0);
// out << ((c&1) ? 1 : 0);
// return out.str();
// }
// #define LOG_WRITE_ACTIVITY
// write a key or value into a tight index
// flag indicates wheter it's a key or a value
void tightwrite(std::ostream& out, uint64_t data, bool flag)
{
// assert(sizeof(size_t)==4);
#ifdef LOG_WRITE_ACTIVITY
size_t bytes_written=1;
std::cerr << "starting at file position " << out.tellp()
<< ": tightwrite " << data;
#endif
if (flag)
{
#ifdef LOG_WRITE_ACTIVITY
std::cerr << " with flag 1 ";
#endif
while (data >= 128)
{
char c = char(data%128)|char(-128);
out.put(c);
data >>= 7;
#ifdef LOG_WRITE_ACTIVITY
bytes_written++;
#endif
}
char c = char(data%128)|char(-128);
out.put(c);
}
else
{
#ifdef LOG_WRITE_ACTIVITY
std::cerr << " with flag 0 ";
#endif
while (data >= 128)
{
char c = data&127;
out.put(c);
data >>= 7;
#ifdef LOG_WRITE_ACTIVITY
bytes_written++;
#endif
}
char c = (data&127);
out.put(c);
}
#ifdef LOG_WRITE_ACTIVITY
std::cerr << " in " << bytes_written << " bytes" << std::endl;
#endif
}
// For the code below: does it make a difference if I hard-code the
// unraveled loop or does code optimization by the compiler take care
// of that?
#define DEBUG_TIGHTREAD 0
// read a key value from a tight index; filepos_type must be at least as
// large as count_type
filepos_type
tightread(std::istream& in, std::ios::pos_type stop)
{
// debug=true;
// assert(sizeof(size_t) == 4);
assert(in.rdbuf()->in_avail() > 0);
filepos_type data = 0;
short int bitshift = 7;
int pos = in.tellg();
#if DEBUG_TIGHTREAD
if (debug)
cerr << bitpattern(uint(in.peek())) << " " << in.peek()
<< " pos=" << in.tellg() << "\n";
#endif
int buf = in.get();
if (stop == std::ios::pos_type(0))
stop = size_t(in.tellg())+in.rdbuf()->in_avail();
else
stop = std::min(size_t(stop),size_t(in.tellg())+in.rdbuf()->in_avail());
if (buf < 0)
std::cerr << "number read: " << buf << " " << pos << " "
<< in.tellg() << std::endl;
assert (buf>=0);
if (buf >= 128) // continuation bit is 1
{
data = buf-128; // unset the bit
while (in.tellg() < stop && in.peek() >= 128)
{
#if DEBUG_TIGHTREAD
if (debug)
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
#endif
// cerr << bitpattern(size_t(in.peek())) << std::endl;
data += size_t(in.get()-128)<<bitshift;
bitshift += 7;
#if DEBUG_TIGHTREAD
if (debug)
cerr << " " << data << " pos=" << in.tellg() << std::endl;
#endif
}
}
else
{
data = buf;
while (in.tellg() < stop && in.peek() < 128)
{
// cerr << bitpattern(size_t(in.peek())) << std::endl;
#if DEBUG_TIGHTREAD
if (debug)
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
#endif
data += size_t(in.get())<<bitshift;
bitshift += 7;
#if DEBUG_TIGHTREAD
if (debug)
cerr << " " << data << " pos=" << in.tellg() << "\n";
#endif
}
}
return data;
}
#define DEBUG_TIGHTFIND 0
#if DEBUG_TIGHTFIND
bool debug=true;
#endif
bool
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop)
{
in.seekg((start+stop)/2);
// Jump approximately to the middle. Since we might land in the
// middle of a number, we need to find the start of the next
// [index key/file offset] pair first. Bytes belonging to an index
// key have the leftmost bit set to 0, bytes belonging to a file
// offset have it set to 1
// if we landed in the middle of an index key, skip to the end of it
while (static_cast<filepos_type>(in.tellg()) < stop && in.get() < 128)
{
#if DEBUG_TIGHTFIND
if (debug)
{
in.unget();
char c = in.get();
std::cerr << in.tellg() << " skipped key byte " << c << std::endl;
}
#endif
if (in.eof()) return false;
}
// Also skip the associated file offset:
while (static_cast<filepos_type>(in.tellg()) < stop && in.peek() >= 128)
{
#if DEBUG_TIGHTFIND
int r = in.get();
if (debug)
std::cerr << in.tellg() << " skipped value byte " << r
<< " next is " << in.peek()
<< std::endl;
#else
in.get();
#endif
}
return true;
}
char const*
tightfind_midpoint(char const* const start,
char const* const stop)
{
char const* mp = start + (stop - start)/2;
while (*mp < 0 && mp > start) mp--;
while (*mp >= 0 && mp > start) mp--;
return (*mp < 0) ? ++mp : mp;
}
bool
linear_search(std::istream& in, filepos_type start, filepos_type stop,
id_type key, unsigned char& flags)
{ // performs a linear search in the range
in.seekg(start);
#if DEBUG_TIGHTFIND
if (debug) std::cerr << in.tellg() << " ";
#endif
// ATTENTION! The bitshift operations below are important:
// We use some of the bits in the key value to store additional
// information about what and where node iformation is stored.
id_type foo;
for(foo = tightread(in,stop);
(foo>>FLAGBITS) < key;
foo = tightread(in,stop))
{
// skip the value associated with key /foo/
while (static_cast<filepos_type>(in.tellg()) < stop
&& in.peek() >= 128) in.get();
#if DEBUG_TIGHTFIND
if (debug)
std::cerr << (foo>>FLAGBITS) << " [" << key << "] "
<< in.tellg() << std::endl;
#endif
if (in.tellg() == std::ios::pos_type(stop))
return false; // not found
}
#if DEBUG_TIGHTFIND
if (debug && (foo>>FLAGBITS)==key)
std::cerr << "found entry for " << key << std::endl;
std::cerr << "current file position is " << in.tellg()
<< " (value read: " << key << std::endl;
#endif
assert(static_cast<filepos_type>(in.tellg()) < stop);
if ((foo>>FLAGBITS)==key)
{
flags = (foo%256);
flags &= FLAGMASK;
return true;
}
else
return false;
}
bool
tightfind(std::istream& in, filepos_type start, filepos_type stop,
id_type key, unsigned char& flags)
{
// returns true if the value is found
#if DEBUG_TIGHTFIND
if (debug)
std::cerr << "looking for " << key
<< " in range [" << start << ":" << stop << "]" << std::endl;
#endif
if (start==stop) return false;
assert(stop>start);
if ((start+1)==stop) return false; // list is empty
unsigned int const granularity = sizeof(filepos_type)*5;
// granularity: point where we should switch to linear search,
// because otherwise we might skip over the entry we are looking for
// because we land right in the middle of it.
if (stop > start + granularity)
if (!tightfind_midpoint(in,start,stop))
return false; // something went wrong (empty index)
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
{ // If the search range is very short, tightfind_midpoint might skip the
// entry we are loking for. In this case, we can afford a linear
// search
return linear_search(in,start,stop,key,flags);
}
// perform binary search
filepos_type curpos = in.tellg();
id_type foo = tightread(in,stop);
id_type tmpid = foo>>FLAGBITS;
if (tmpid == key)
{
flags = foo%256;
flags &= FLAGMASK;
#if DEBUG_TIGHTFIND
if (debug) std::cerr << "found entry for " << key << std::endl;
#endif
return true; // done, found
}
else if (tmpid > key)
{ // look in the lower half
#if DEBUG_TIGHTFIND
if (debug) std::cerr << foo << " > " << key << std::endl;
#endif
return tightfind(in,start,curpos,key,flags);
}
else
{ // look in the upper half
while (static_cast<filepos_type>(in.tellg()) < stop
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
&& in.peek() >= 128)
in.get(); // skip associated value
if (in.rdbuf()->in_avail() == 0 || in.tellg() == std::ios::pos_type(stop))
return false;
#if DEBUG_TIGHTFIND
if (debug) std::cerr << foo << " < " << key << std::endl;
#endif
return tightfind(in,in.tellg(),stop,key,flags);
}
}
char const*
tightfind(char const* const start,
char const* const stop,
id_type key,
unsigned char& flags)
{
// returns true if the value is found
if (start==stop) return NULL;
assert(stop>start);
if ((start+1)==stop) return NULL; // list is empty
char const* p = tightfind_midpoint(start,stop);
// if ids can be larger than 67,108,864 on 32-bit machines
// (i.e., 2**(28-flagbits)), dest must be declared as uint64_t
size_t foo;
char const* after = tightread(p,stop,foo);
id_type tmpId = foo>>FLAGBITS;
if (tmpId == key)
{
flags = foo%256;
flags &= FLAGMASK;
return after;
}
else if (tmpId > key)
{ // look in the lower half
return tightfind(start,p,key,flags);
}
else
{ // look in the upper half
while (*after<0 && ++after < stop);
if (after == stop) return NULL;
return tightfind(after,stop,key,flags);
}
}
char const*
tightfind_noflags(char const* const start,
char const* const stop,
id_type key)
{
// returns true if the value is found
if (start==stop) return NULL;
assert(stop>start);
if ((start+1)==stop) return NULL; // list is empty
char const* p = tightfind_midpoint(start,stop);
// if ids can be larger than 67,108,864 on 32-bit machines
// (i.e., 2**(28-flagbits)), dest must be declared as uint64_t
size_t foo;
char const* after = tightread(p,stop,foo);
if (foo == key)
return after;
else if (foo > key)
{ // look in the lower half
return tightfind_noflags(start,p,key);
}
else
{ // look in the upper half
while (*after<0 && ++after < stop);
if (after == stop) return NULL;
return tightfind_noflags(after,stop,key);
}
}
bool
linear_search_noflags(std::istream& in, filepos_type start,
filepos_type stop, id_type key)
{ // performs a linear search in the range
std::ios::pos_type mystop = stop;
in.seekg(start);
id_type foo;
for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop))
{
// skip the value associated with key /foo/
while (in.tellg() < mystop && in.peek() >= 128)
in.get();
if (in.tellg() == mystop)
return false; // not found
}
assert(in.tellg() < mystop);
return (foo==key);
}
bool
tightfind_noflags(std::istream& in, filepos_type start,
filepos_type stop, id_type key)
{
// returns true if the value is found
if (start==stop) return false;
assert(stop>start);
if ((start+1)==stop) return false; // list is empty
// granularity: point where we should switch to linear search,
// because otherwise we might skip over the entry we are looking for
// because we land right in the middle of it.
unsigned int const granularity = sizeof(filepos_type)*5;
// UG: why 5? we should be able to get away with less!
if (stop > start + granularity)
if (!tightfind_midpoint(in,start,stop))
return false; // something went wrong (empty index)
// If the search range is very short, tightfind_midpoint might skip the
// entry we are loking for. In this case, we can afford a linear
// search
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
return linear_search_noflags(in,start,stop,key);
// Otherwise, perform binary search
filepos_type curpos = in.tellg();
id_type foo = tightread(in,stop);
if (foo == key)
return true; // done, found
else if (foo > key) // search first half
return tightfind_noflags(in,start,curpos,key);
else // search second half
{
std::ios::pos_type mystop = stop;
while (in.tellg() < mystop
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
&& in.peek() >= 128)
in.get(); // skip associated value
if (in.rdbuf()->in_avail() == 0 || in.tellg() == mystop)
return false;
return tightfind_noflags(in,in.tellg(),stop,key);
}
}
void tightwrite2(std::ostream& out, size_t data, bool flag)
{
// same as tightwrite, but uses basic storage units of size 2
// assert(sizeof(size_t)==4);
short int foo = (data%32768);
if (flag)
{
foo += 32768; // set first bit
while (data >= 32768) // = 2^15
{
out.write(reinterpret_cast<char*>(&foo),2);
data >>= 15;
foo = (data%32768)+32768;
}
}
else
{
while (data >= 32768) // = 2^15
{
out.write(reinterpret_cast<char*>(&foo),2);
data >>= 15;
foo = data%32768;
}
}
out.write(reinterpret_cast<char*>(&foo),2);
}
char const*
tightread8(char const* start,
char const* stop,
uint64_t& dest)
{
static char bitmask=127;
dest = 0;
if (*start < 0)
{
dest = (*start)&bitmask;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<7;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<14;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<21;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<28;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<35;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<42;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<49;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<56;
if (++start==stop || *start >= 0) return start;
dest += uint64_t((*start)&bitmask)<<63;
}
else
{
dest = *start;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<7;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<14;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<21;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<28;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<35;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<42;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<49;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<56;
if (++start==stop || *start < 0) return start;
dest += uint64_t(*start)<<63;
}
assert(start<stop);
return ++start;
}
char const*
tightread4(char const* start,
char const* stop,
uint32_t& dest)
{
static char bitmask=127;
dest = 0;
if (*start < 0)
{
dest = (*start)&bitmask;
if (++start==stop || *start >= 0) return start;
dest += uint32_t((*start)&bitmask)<<7;
if (++start==stop || *start >= 0) return start;
dest += uint32_t((*start)&bitmask)<<14;
if (++start==stop || *start >= 0) return start;
dest += uint32_t((*start)&bitmask)<<21;
if (++start==stop || *start >= 0) return start;
dest += uint32_t((*start)&bitmask)<<28;
}
else
{
dest = *start;
if (++start==stop || *start < 0) return start;
dest += uint32_t(*start)<<7;
if (++start==stop || *start < 0) return start;
dest += uint32_t(*start)<<14;
if (++start==stop || *start < 0) return start;
dest += uint32_t(*start)<<21;
if (++start==stop || *start < 0) return start;
dest += uint32_t(*start)<<28;
}
assert(start<stop);
return ++start;
}
char const*
tightread2(char const* start,
char const* stop,
uint16_t& dest)
{
static char bitmask=127;
dest = 0;
if (*start < 0)
{
dest = (*start)&bitmask;
if (++start==stop || *start >= 0) return start;
dest += uint32_t((*start)&bitmask)<<7;
if (++start==stop || *start >= 0) return start;
dest += uint32_t((*start)&bitmask)<<14;
}
else
{
dest = *start;
if (++start==stop || *start < 0) return start;
dest += uint32_t(*start)<<7;
if (++start==stop || *start < 0) return start;
dest += uint32_t(*start)<<14;
}
assert(start<stop);
return ++start;
}
} // end namespace ugdiss

177
moses/mm/tpt_tightindex.h Normal file
View File

@ -0,0 +1,177 @@
// -*- c++ -*-
// (c) 2007,2008 Ulrich Germann
/* Functions for writing indices tightly (use only the bytes you need).
* The first bit indicates whether a byte belongs to a key or a value.
* The remaining 7 bits are part of the respective integer value.
*/
#ifndef __ugTightIndex
#define __ugTightIndex
#include <map>
#include <iostream>
#include <sstream>
#include "tpt_typedefs.h"
#include <cassert>
// using namespace std;
#ifndef uchar
#endif
#define FLAGBITS 2
#define FLAGMASK (uchar(3))
#define HAS_VALUE_MASK (uchar(2))
#define HAS_CHILD_MASK (uchar(1))
extern bool debug;
namespace ugdiss
{
// void tightwritex(iostream& out, size_t data, bool flag);
void
tightwrite(std::ostream& out, uint64_t data, bool flag);
filepos_type
tightread(std::istream& in, std::ios::pos_type stop);
bool
tightfind(std::istream& in,
filepos_type start,
filepos_type stop,
id_type key,
unsigned char& flags);
bool
tightfind_noflags(std::istream& in,
filepos_type start,
filepos_type stop,
id_type key);
char const*
tightfind(char const* const start,
char const* const stop,
id_type key,
unsigned char& flags);
char const*
tightfind_noflags(char const* const start,
char const* const stop,
id_type key);
/** move read header in istream /in/ to the first entry after the midpoint of
* file position range [start,stop) in in a 'tight' index
* @param in the data input stream
* @param start start of the search range
* @param stop end of the search range
* @return true if no errors occurred
*/
bool
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop);
// the bitpattern functions below are for debugging
// They return a string showing the bits of the argument value
// std::string bitpattern(unsigned int s);
// std::string bitpattern(unsigned char c);
// std::string bitpattern(char c);
/** read a number from a tight index directy from a memory location
* @param start start of read range
* @param stop non-inclusive end of read range
* @param dest destination
* @return first memory position after the number
*/
char const*
tightread2(char const* start, char const* stop, uint16_t& dest);
char const*
tightread4(char const* start, char const* stop, uint32_t& dest);
char const*
tightread8(char const* start, char const* stop, uint64_t& dest);
template<typename numType>
char const*
tightread(char const* start, char const* stop, numType& dest)
{
if (sizeof(numType)==2)
return tightread2(start,stop,reinterpret_cast<uint16_t&>(dest));
if (sizeof(numType)==4)
return tightread4(start,stop,reinterpret_cast<uint32_t&>(dest));
else if (sizeof(numType)==8)
return tightread8(start,stop,reinterpret_cast<uint64_t&>(dest));
assert(0);
return NULL;
}
// char const*
// tightread(char const* start, char const* stop, uint64_t& dest);
// char const*
// tightread(char const* start, char const* stop, filepos_type& dest);
#if 0
template<typename dtype>
char const*
tightread(char const* start,
char const* stop,
dtype& dest)
{
static char bitmask=127;
dest = 0;
if (*start < 0)
{
dest = (*start)&bitmask;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<7;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<14;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<21;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<28;
if (++start==stop || *start >= 0) return start;
assert(sizeof(dtype) > 4);
dest += dtype((*start)&bitmask)<<35;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<42;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<49;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<56;
if (++start==stop || *start >= 0) return start;
dest += dtype((*start)&bitmask)<<63;
}
else
{
dest = *start;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<7;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<14;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<21;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<28;
if (++start==stop || *start < 0) return start;
assert(sizeof(dtype) > 4);
dest += dtype(*start)<<35;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<42;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<49;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<56;
if (++start==stop || *start < 0) return start;
dest += dtype(*start)<<63;
}
assert(start<stop);
return ++start;
}
#endif
}
#endif

386
moses/mm/tpt_tokenindex.cc Normal file
View File

@ -0,0 +1,386 @@
// -*- c++ -*-
// (c) 2007-2013 Ulrich Germann
#include <sstream>
#include <string.h>
#include <algorithm>
#include <iostream>
#include <stdexcept>
#include <boost/pool/pool_alloc.hpp>
#include "tpt_tokenindex.h"
using namespace std;
namespace ugdiss
{
TokenIndex::
TokenIndex(string unkToken)
: ridx(0),unkLabel(unkToken),unkId(1),numTokens(0)
{
lock.reset(new boost::mutex());
};
#if 0
TokenIndex::
TokenIndex(string fname, string unkToken,bool dyna)
: ridx(0),unkLabel(unkToken)
{
this->open(fname,unkToken,dyna);
};
#endif
void
TokenIndex::
open(string fname, string unkToken,bool dyna)
{
if (access(fname.c_str(),F_OK))
{
ostringstream msg;
msg << "TokenIndex::open: File '" << fname << "' does not exist.";
throw std::runtime_error(msg.str().c_str());
}
file.open(fname);
if (!file.is_open())
{
cerr << "Error opening file " << fname << endl;
assert(0);
}
// cout << "file is open" << endl;
this->numTokens = *(reinterpret_cast<uint32_t const*>(file.data()));
unkId = *(reinterpret_cast<id_type const*>(file.data()+4));
// cout << "tokenindex.open: unkId=" << unkId << endl;
startIdx = reinterpret_cast<Entry const*>(file.data()+4+sizeof(id_type));
endIdx = startIdx + numTokens;
comp.base = reinterpret_cast<char const*>(endIdx);
if (!unkToken.empty())
{
Entry const* bla = lower_bound(startIdx,endIdx,unkToken.c_str(),comp);
unkId = ((bla < endIdx && unkToken == comp.base+bla->offset)
? bla->id
: numTokens);
}
this->dynamic=dyna;
if (dyna)
{
this->str2idExtra.reset(new map<string,id_type>());
this->newWords.reset(new vector<string>());
}
}
void
TokenIndex::
close()
{
file.close();
}
TokenIndex::
CompFunc::
CompFunc()
{};
bool
TokenIndex::
CompFunc::
operator()(Entry const& A, char const* w)
{
return strcmp(base+A.offset,w) < 0;
};
id_type
TokenIndex::
operator[](char const* p) const
{
if (startIdx==endIdx && !dynamic) return strcmp(p,"NULL") && unkId;
Entry const* bla = lower_bound(startIdx,endIdx,p,comp);
if (bla != endIdx && !strcmp(comp.base+bla->offset,p))
return bla->id;
if (!dynamic) return unkId;
boost::lock_guard<boost::mutex> lk(*this->lock);
// stuff below is new as of 2011-01-30, for dynamic adding of unknown items
// IMPORTANT: numTokens is not currently not changed, it is the number of
// PRE-EXISING TOKENS, not including dynamically added Items
map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens);
pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem);
if (foo.second) // it actually is a new item
newWords->push_back(foo.first->first);
return foo.first->second;
}
id_type
TokenIndex::
operator[](string const& w) const
{
return (*this)[w.c_str()];
}
vector<char const*>
TokenIndex::
reverseIndex() const
{
size_t numToks = endIdx-startIdx;
// cout << "tokenindex has " << numToks << " tokens" << endl;
vector<char const*> v(numToks,NULL);
// v.reserve(endIdx-startIdx);
for (Entry const* x = startIdx; x != endIdx; x++)
{
if (x->id >= v.size())
v.resize(x->id+1);
v[x->id] = comp.base+x->offset;
}
// cout << "done reversing index " << endl;
return v;
}
char const* const
TokenIndex::
operator[](id_type id) const
{
if (!ridx.size())
{
cerr << "FATAL ERROR: You need to call iniReverseIndex() "
<< "on the TokenIndex class before using operator[](id_type id)."
<< endl;
assert(0);
exit(1);
}
if (id < ridx.size())
return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size())
return (*newWords)[id-ridx.size()].c_str();
return unkLabel.c_str();
}
void
TokenIndex::
iniReverseIndex()
{
if (!ridx.size()) ridx = reverseIndex();
}
char const* const
TokenIndex::
operator[](id_type id)
{
if (!ridx.size()) ridx = reverseIndex();
if (id < ridx.size())
return ridx[id];
boost::lock_guard<boost::mutex> lk(*this->lock);
if (dynamic && id < ridx.size()+newWords->size())
return (*newWords)[id-ridx.size()].c_str();
return unkLabel.c_str();
}
string
TokenIndex::
toString(vector<id_type> const& v)
{
if (!ridx.size()) ridx = reverseIndex();
ostringstream buf;
for (size_t i = 0; i < v.size(); i++)
buf << (i ? " " : "") << (*this)[v[i]];
return buf.str();
}
string
TokenIndex::
toString(vector<id_type> const& v) const
{
assert (ridx.size());
ostringstream buf;
for (size_t i = 0; i < v.size(); i++)
buf << (i ? " " : "") << (*this)[v[i]];
return buf.str();
}
string
TokenIndex::
toString(id_type const* start, id_type const* const stop)
{
if (!ridx.size()) ridx = reverseIndex();
ostringstream buf;
if (start < stop)
buf << (*this)[*start];
while (++start < stop)
buf << " " << (*this)[*start];
return buf.str();
}
string
TokenIndex::
toString(id_type const* start, id_type const* const stop) const
{
assert (ridx.size());
ostringstream buf;
if (start < stop)
buf << (*this)[*start];
while (++start < stop)
buf << " " << (*this)[*start];
return buf.str();
}
vector<id_type>
TokenIndex::
toIdSeq(string const& line) const
{
istringstream buf(line);
string w;
vector<id_type> retval;
while (buf>>w)
retval.push_back((*this)[w]);
return retval;
}
/// Return false if line contains unknown tokens, true otherwise
bool
TokenIndex::
fillIdSeq(string const& line, vector<id_type> & v) const
{
bool allgood = true; string w;
v.clear();
for (istringstream buf(line); buf>>w;)
{
v.push_back((*this)[w]);
allgood = allgood && v.back() > 1;
}
return allgood;
}
id_type
TokenIndex::
getNumTokens() const
{
return numTokens;
}
id_type
TokenIndex::
getUnkId() const
{
return unkId;
}
char const* const
TokenIndex::
getUnkToken() const
{
return unkLabel.c_str();
// return (*this)[unkId];
}
id_type
TokenIndex::
knownVocabSize() const
{
return numTokens;
}
id_type
TokenIndex::
ksize() const
{
return numTokens;
}
id_type
TokenIndex::
totalVocabSize() const
{ return tsize(); }
id_type
TokenIndex::
tsize() const
{
return (newWords != NULL
? numTokens+newWords->size()
: numTokens);
}
void
write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
string const& ofile, string const& unkToken)
{
typedef pair<uint32_t,id_type> IndexEntry; // offset and id
// Write token strings to a buffer, keep track of offsets
vector<IndexEntry> index(tok.size());
ostringstream data;
id_type unkId = tok.size();
for (size_t i = 0; i < tok.size(); i++)
{
if (tok[i].first == unkToken)
unkId = tok[i].second;
index[i].first = data.tellp(); // offset of string
index[i].second = tok[i].second; // respective ID
data<<tok[i].first<<char(0); // write string to buffer
}
// Now write the actual file
ofstream out(ofile.c_str());
uint32_t vsize = index.size(); // how many vocab items?
out.write(reinterpret_cast<char*>(&vsize),4);
out.write(reinterpret_cast<char*>(&unkId),sizeof(id_type));
for (size_t i = 0; i < index.size(); i++)
{
out.write(reinterpret_cast<char*>(&index[i].first),4);
out.write(reinterpret_cast<char*>(&index[i].second),sizeof(id_type));
}
out<<data.str();
}
void
TokenIndex::
write(string fname)
{
typedef pair<string,uint32_t> Token; // token and id
vector<Token> tok(totalVocabSize());
for (id_type i = 0; i < tok.size(); ++i)
tok[i] = Token((*this)[i],i);
sort(tok.begin(),tok.end());
write_tokenindex_to_disk(tok,fname,unkLabel);
}
bool
TokenIndex::
isDynamic() const
{
return dynamic;
}
bool
TokenIndex::
setDynamic(bool on)
{
bool ret = dynamic;
if (on && this->str2idExtra == NULL)
{
this->str2idExtra.reset(new map<string,id_type>());
this->newWords.reset(new vector<string>());
}
dynamic = on;
if (on)
{
(*this)["NULL"];
(*this)[unkLabel];
}
return ret;
}
void
TokenIndex::
setUnkLabel(string unk)
{
unkId = (*this)[unk];
unkLabel = unk;
}
}

168
moses/mm/tpt_tokenindex.h Normal file
View File

@ -0,0 +1,168 @@
// -*- c++ -*-
// TO DO (12.01.2011):
//
// - Vocab items should be stored in order of ids, so that we can determine their length
// by taking computing V[id+1] - V[id] instead of using strlen.
//
// (c) 2007,2008 Ulrich Germann
#ifndef __ugTokenIndex_hh
#define __ugTokenIndex_hh
#include <iostream>
#include <sstream>
#include <fstream>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/iostreams/stream.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/scoped_ptr.hpp>
#include <boost/thread.hpp>
#include "tpt_typedefs.h"
#include <vector>
#include <map>
using namespace std;
namespace bio=boost::iostreams;
namespace ugdiss
{
class TokenIndex
{
/** Reverse index: maps from ID to char const* */
vector<char const*> ridx;
/** Label for the UNK token */
string unkLabel;
id_type unkId,numTokens;
/// New 2013-09-02: thread-safe
boost::scoped_ptr<boost::mutex> lock;
// NEW 2011-01-30: dynamic adding of unknown items
bool dynamic; // dynamically assign a new word id to unknown items?
boost::shared_ptr<map<string,id_type> > str2idExtra;
boost::shared_ptr<vector<string> > newWords;
// The use of pointers to external items is a bit of a bad hack
// in terms of the semantic of TokenIndex const: since external items
// are changed, the TokenIndex instance remains unchanged and const works,
// even though in reality the underlying object on the coceptual level
// *IS* changed. This means that dynamic TokenIndex instances are not
// thread-safe!
public:
/** string->ID lookup works via binary search in a vector of Entry instances */
class Entry
{
public:
uint32_t offset;
id_type id;
};
/** Comparison function object used for Entry instances */
class CompFunc
{
public:
char const* base;
CompFunc();
bool operator()(Entry const& A, char const* w);
};
bio::mapped_file_source file;
Entry const* startIdx;
Entry const* endIdx;
CompFunc comp;
TokenIndex(string unkToken="UNK");
// TokenIndex(string fname,string unkToken="UNK",bool dyna=false);
void open(string fname,string unkToken="UNK",bool dyna=false);
void close();
// id_type unkId,numTokens;
id_type operator[](char const* w) const;
id_type operator[](string const& w) const;
char const* const operator[](id_type id) const;
char const* const operator[](id_type id);
vector<char const*> reverseIndex() const;
string toString(vector<id_type> const& v);
string toString(vector<id_type> const& v) const;
string toString(id_type const* start, id_type const* const stop);
string toString(id_type const* start, id_type const* const stop) const;
vector<id_type> toIdSeq(string const& line) const;
bool fillIdSeq(string const& line, vector<id_type> & v) const;
void iniReverseIndex();
id_type getNumTokens() const;
id_type getUnkId() const;
// the following two functions are deprecated; use ksize() and tsize() instead
id_type knownVocabSize() const; // return size of known (fixed) vocabulary
id_type totalVocabSize() const; // total of known and dynamically items
id_type ksize() const; // shorthand for knownVocabSize();
id_type tsize() const; // shorthand for totalVocabSize();
char const* const getUnkToken() const;
void write(string fname); // write TokenIndex to a new file
bool isDynamic() const;
bool setDynamic(bool onoff);
void setUnkLabel(string unk);
};
void
write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
string const& ofile, string const& unkToken);
/** for sorting words by frequency */
class compWords
{
string unk;
public:
compWords(string _unk) : unk(_unk) {};
bool
operator()(pair<string,size_t> const& A,
pair<string,size_t> const& B) const
{
if (A.first == unk) return false;// do we still need this special treatment?
if (B.first == unk) return true; // do we still need this special treatment?
if (A.second == B.second)
return A.first < B.first;
return A.second > B.second;
}
};
template<class MYMAP>
void
mkTokenIndex(string ofile,MYMAP const& M,string unkToken)
{
typedef pair<uint32_t,id_type> IndexEntry; // offset and id
typedef pair<string,uint32_t> Token; // token and id
// first, sort the word list in decreasing order of frequency, so that we
// can assign IDs in an encoding-efficient manner (high frequency. low ID)
vector<pair<string,size_t> > wcounts(M.size()); // for sorting by frequency
typedef typename MYMAP::const_iterator myIter;
size_t z=0;
for (myIter m = M.begin(); m != M.end(); m++)
{
// cout << m->first << " " << m->second << endl;
wcounts[z++] = pair<string,size_t>(m->first,m->second);
}
compWords compFunc(unkToken);
sort(wcounts.begin(),wcounts.end(),compFunc);
// Assign IDs ...
vector<Token> tok(wcounts.size());
for (size_t i = 0; i < wcounts.size(); i++)
tok[i] = Token(wcounts[i].first,i);
// and re-sort in alphabetical order
sort(tok.begin(),tok.end());
write_tokenindex_to_disk(tok,ofile,unkToken);
}
}
#endif

15
moses/mm/tpt_typedefs.h Normal file
View File

@ -0,0 +1,15 @@
// -*- c++ -*-
// Basic type definitions for code related to tightly packed tries
// (c) 2006-2012 Ulrich Germann
#ifndef __tpt_typedefs_h
#define __tpt_typedefs_h
#include <stdint.h>
namespace ugdiss
{
typedef uint32_t id_type;
typedef uint32_t count_type;
typedef uint64_t filepos_type;
typedef unsigned char uchar;
}
#endif

176
moses/mm/ug_bitext.cc Normal file
View File

@ -0,0 +1,176 @@
//-*- c++-mode -*-
#include "ug_bitext.h"
#include <algorithm>
#include <boost/math/distributions/binomial.hpp>
using namespace ugdiss;
using namespace std;
namespace Moses
{
namespace bitext
{
pstats::
pstats()
: raw_cnt (0)
, sample_cnt (0)
, good (0)
, sum_pairs (0)
, in_progress (0)
{}
void
pstats::
register_worker()
{
this->lock.lock();
++this->in_progress;
this->lock.unlock();
}
void
pstats::
release()
{
this->lock.lock();
if (this->in_progress-- == 1) // last one - >we're done
this->ready.notify_all();
this->lock.unlock();
}
void
pstats::
add(uint64_t pid, float const w,
vector<uchar> const& a,
uint32_t const cnt2)
{
this->lock.lock();
jstats& entry = this->trg[pid];
this->lock.unlock();
entry.add(w,a,cnt2);
if (this->good < entry.rcnt())
{
this->lock.lock();
UTIL_THROW(util::Exception, "more joint counts than good counts!"
<< entry.rcnt() << "/" << this->good);
}
}
jstats::
jstats()
: my_rcnt(0), my_wcnt(0), my_cnt2(0)
{
my_aln.reserve(1);
}
jstats::
jstats(jstats const& other)
{
my_rcnt = other.rcnt();
my_wcnt = other.wcnt();
my_aln = other.aln();
}
void
jstats::
add(float w, vector<uchar> const& a, uint32_t const cnt2)
{
boost::lock_guard<boost::mutex> lk(this->lock);
my_rcnt += 1;
my_wcnt += w;
my_cnt2 += cnt2;
if (a.size())
{
size_t i = 0;
while (i < my_aln.size() && my_aln[i].second != a) ++i;
if (i == my_aln.size())
my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
else
my_aln[i].first++;
if (my_aln[i].first > my_aln[i/2].first)
push_heap(my_aln.begin(),my_aln.begin()+i+1);
}
}
uint32_t
jstats::
rcnt() const
{ return my_rcnt; }
float
jstats::
wcnt() const
{ return my_wcnt; }
uint32_t
jstats::
cnt2() const
{ return my_cnt2; }
vector<pair<size_t, vector<uchar> > > const&
jstats::
aln() const
{ return my_aln; }
bool
PhrasePair::
operator<(PhrasePair const& other) const
{
return this->score < other.score;
}
bool
PhrasePair::
operator>(PhrasePair const& other) const
{
return this->score > other.score;
}
PhrasePair::PhrasePair() {}
void
PhrasePair::
init(uint64_t const pid1, pstats const& ps, size_t const numfeats)
{
p1 = pid1;
raw1 = ps.raw_cnt;
sample1 = ps.sample_cnt;
sample2 = 0;
good1 = ps.good;
good2 = 0;
fvals.resize(numfeats);
}
float
lbop(size_t const tries, size_t const succ, float const confidence)
{
return
boost::math::binomial_distribution<>::
find_lower_bound_on_p(tries, succ, confidence);
}
void
PhrasePair::
update(uint64_t const pid2, jstats const& js)
{
p2 = pid2;
raw2 = js.cnt2();
joint = js.rcnt();
assert(js.aln().size());
if (js.aln().size())
aln = js.aln()[0].second;
}
float
PhrasePair::
eval(vector<float> const& w)
{
assert(w.size() == this->fvals.size());
this->score = 0;
for (size_t i = 0; i < w.size(); ++i)
this->score += w[i] * this->fvals[i];
return this->score;
}
}
}

992
moses/mm/ug_bitext.h Normal file
View File

@ -0,0 +1,992 @@
//-*- c++ -*-
#ifndef __ug_bitext_h
#define __ug_bitext_h
// Implementations of word-aligned bitext.
// Written by Ulrich Germann
//
// mmBitext: static, memory-mapped bitext
// imBitext: dynamic, in-memory bitext
//
// things we can do to speed up things:
// - set up threads at startup time that force the
// data in to memory sequentially
//
// - use multiple agendas for better load balancing and to avoid
// competition for locks
#include <string>
#include <vector>
#include <cassert>
#include <iomanip>
#include <algorithm>
#include <boost/unordered_map.hpp>
#include <boost/foreach.hpp>
#include <boost/thread.hpp>
#include "moses/generic/sorting/VectorIndexSorter.h"
#include "moses/generic/sampling/Sampling.h"
#include "moses/generic/file_io/ug_stream.h"
#include "moses/Util.h"
#include "util/exception.hh"
#include "util/check.hh"
#include "ug_typedefs.h"
#include "ug_mm_ttrack.h"
#include "ug_im_ttrack.h"
#include "ug_mm_tsa.h"
#include "ug_im_tsa.h"
#include "tpt_tokenindex.h"
#include "ug_corpus_token.h"
#include "tpt_pickler.h"
#include "ug_lexical_phrase_scorer2.h"
using namespace ugdiss;
using namespace std;
namespace Moses {
namespace bitext
{
using namespace ugdiss;
template<typename TKN> class Bitext;
template<typename sid_t, typename off_t, typename len_t>
void
parse_pid(uint64_t const pid, sid_t & sid,
off_t & off, len_t& len)
{
static uint64_t two32 = uint64_t(1)<<32;
static uint64_t two16 = uint64_t(1)<<16;
len = pid%two16;
off = (pid%two32)>>16;
sid = pid>>32;
}
float
lbop(size_t const tries, size_t const succ,
float const confidence);
// "joint" (i.e., phrase pair) statistics
class
jstats
{
boost::mutex lock;
uint32_t my_rcnt; // unweighted count
float my_wcnt; // weighted count
uint32_t my_cnt2;
vector<pair<size_t, vector<uchar> > > my_aln;
public:
jstats();
jstats(jstats const& other);
uint32_t rcnt() const;
uint32_t cnt2() const; // raw target phrase occurrence count
float wcnt() const;
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a, uint32_t const cnt2);
};
struct
pstats
{
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs;
size_t in_progress; // keeps track of how many threads are currently working on this
typename boost::unordered_map<uint64_t, jstats> trg;
pstats();
void release();
void register_worker();
size_t count_workers() { return in_progress; }
void add(uint64_t const pid, float const w,
vector<uchar> const& a, uint32_t const cnt2);
};
class
PhrasePair
{
public:
uint64_t p1, p2;
uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
uint32_t mono,swap,left,right;
vector<float> fvals;
vector<uchar> aln;
// float avlex12,avlex21; // average lexical probs (Moses std)
// float znlex1,znlex2; // zens-ney lexical smoothing
// float colex1,colex2; // based on raw lexical occurrences
float score;
PhrasePair();
bool operator<(PhrasePair const& other) const;
bool operator>(PhrasePair const& other) const;
void init(uint64_t const pid1, pstats const& ps,
size_t const numfeats);
void update(uint64_t const pid2, jstats const& js);
float eval(vector<float> const& w);
};
template<typename Token>
class
PhraseScorer
{
protected:
int index;
int num_feats;
public:
virtual
void
operator()(Bitext<Token> const& pt, PhrasePair& pp) const = 0;
int
fcnt() const { return num_feats; }
};
template<typename Token>
class
PScorePfwd : public PhraseScorer<Token>
{
float conf;
public:
PScorePfwd()
{
this->num_feats = 1;
}
int
init(int const i, float const c)
{
conf = c;
this->index = i;
return i + this->num_feats;
}
void
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
{
if (pp.joint > pp.good1)
{
cerr << bt.toString(pp.p1,0) << " ::: " << bt.toString(pp.p2,1) << endl;
cerr << pp.joint << "/" << pp.good1 << "/" << pp.raw2 << endl;
}
pp.fvals[this->index] = log(lbop(pp.good1, pp.joint, conf));
}
};
template<typename Token>
class
PScorePbwd : public PhraseScorer<Token>
{
float conf;
public:
PScorePbwd()
{
this->num_feats = 1;
}
int
init(int const i, float const c)
{
conf = c;
this->index = i;
return i + this->num_feats;
}
void
operator()(Bitext<Token> const& pt, PhrasePair& pp) const
{
pp.fvals[this->index] = log(lbop(max(pp.raw2,pp.joint), pp.joint, conf));
}
};
template<typename Token>
class
PScoreLex : public PhraseScorer<Token>
{
LexicalPhraseScorer2<Token> scorer;
public:
PScoreLex() { this->num_feats = 2; }
int
init(int const i, string const& fname)
{
scorer.open(fname);
this->index = i;
return i + this->num_feats;
}
void
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
{
uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
parse_pid(pp.p1, sid1, off1, len1);
parse_pid(pp.p2, sid2, off2, len2);
#if 0
Token const* t1 = bt.T1->sntStart(sid1);
for (size_t i = off1; i < off1 + len1; ++i)
cout << (*bt.V1)[t1[i].id()] << " ";
cout << __FILE__ << ":" << __LINE__ << endl;
Token const* t2 = bt.T2->sntStart(sid2);
for (size_t i = off2; i < off2 + len2; ++i)
cout << (*bt.V2)[t2[i].id()] << " ";
cout << __FILE__ << ":" << __LINE__ << endl;
BOOST_FOREACH (int a, pp.aln)
cout << a << " " ;
cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
#endif
scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
bt.T2->sntStart(sid2)+off2,0,len2,
pp.aln, pp.fvals[this->index],
pp.fvals[this->index+1]);
}
};
/// Word penalty
template<typename Token>
class
PScoreWP : public PhraseScorer<Token>
{
public:
PScoreWP() { this->num_feats = 1; }
int
init(int const i)
{
this->index = i;
return i + this->num_feats;
}
void
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
{
uint32_t sid2=0,off2=0,len2=0;
parse_pid(pp.p2, sid2, off2, len2);
pp.fvals[this->index] = len2;
}
};
/// Phrase penalty
template<typename Token>
class
PScorePP : public PhraseScorer<Token>
{
public:
PScorePP() { this->num_feats = 1; }
int
init(int const i)
{
this->index = i;
return i + this->num_feats;
}
void
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
{
pp.fvals[this->index] = 1;
}
};
template<typename TKN>
class Bitext
{
mutable boost::mutex lock;
public:
typedef TKN Token;
typedef typename TSA<Token>::tree_iterator iter;
class agenda;
// stores the list of unfinished jobs;
// maintains a pool of workers and assigns the jobs to them
// to be done: work with multiple agendas for faster lookup
// (multiplex jobs); not sure if an agenda having more than
// four or so workers is efficient, because workers get into
// each other's way.
mutable sptr<agenda> ag;
sptr<Ttrack<char> > const Tx; // word alignments
sptr<Ttrack<Token> > const T1; // token track
sptr<Ttrack<Token> > const T2; // token track
sptr<TokenIndex> const V1; // vocab
sptr<TokenIndex> const V2; // vocab
sptr<TSA<Token> > const I1; // indices
sptr<TSA<Token> > const I2; // indices
/// given the source phrase sid[start:stop]
// find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
// alignments in *core_alignment. If /flip/, source phrase is
// L2.
bool
find_trg_phr_bounds
(size_t const sid, size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
mutable boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
size_t default_sample_size;
sptr<pstats>
prep2(iter const& phrase, size_t const max_sample) const;
public:
Bitext(Ttrack<Token>* const t1,
Ttrack<Token>* const t2,
Ttrack<char>* const tx,
TokenIndex* const v1,
TokenIndex* const v2,
TSA<Token>* const i1,
TSA<Token>* const i2,
size_t const max_sample=5000);
virtual void open(string const base, string const L1, string const L2) = 0;
sptr<pstats> lookup(iter const& phrase) const;
sptr<pstats> lookup(iter const& phrase, size_t const max_sample) const;
void prep(iter const& phrase) const;
void setDefaultSampleSize(size_t const max_samples);
size_t getDefaultSampleSize() const;
string toString(uint64_t pid, int isL2) const;
};
template<typename Token>
string
Bitext<Token>::
toString(uint64_t pid, int isL2) const
{
ostringstream buf;
uint32_t sid,off,len; parse_pid(pid,sid,off,len);
Token const* t = (isL2 ? T2 : T1)->sntStart(sid) + off;
Token const* x = t + len;
TokenIndex const& V = isL2 ? *V2 : *V1;
while (t < x)
{
buf << V[t->id()];
if (++t < x) buf << " ";
}
return buf.str();
}
template<typename Token>
size_t
Bitext<Token>::
getDefaultSampleSize() const
{
return default_sample_size;
}
template<typename Token>
void
Bitext<Token>::
setDefaultSampleSize(size_t const max_samples)
{
if (max_samples != default_sample_size)
{
cache1.clear();
cache2.clear();
default_sample_size = max_samples;
}
}
template<typename Token>
Bitext<Token>::
Bitext(Ttrack<Token>* const t1,
Ttrack<Token>* const t2,
Ttrack<char>* const tx,
TokenIndex* const v1,
TokenIndex* const v2,
TSA<Token>* const i1,
TSA<Token>* const i2,
size_t const max_sample)
: Tx(tx), T1(t1), T2(t2), V1(v1), V2(v2), I1(i1), I2(i2)
, default_sample_size(max_sample)
{ }
// agenda is a pool of jobs
template<typename Token>
class
Bitext<Token>::
agenda
{
boost::mutex lock;
class job
{
boost::mutex lock;
friend class agenda;
public:
size_t workers; // how many workers are working on this job?
sptr<TSA<Token> const> root; // root of the underlying suffix array
char const* next; // next position to read from
char const* stop; // end of index range
size_t max_samples; // how many samples to extract at most
size_t ctr; /* # of phrase occurrences considered so far
* # of samples chosen is stored in stats->good */
size_t len; // phrase length
bool fwd; // if true, source phrase is L1
sptr<pstats> stats; // stores statistics collected during sampling
bool step(uint64_t & sid, uint64_t & offset); // select another occurrence
bool done() const;
job(typename TSA<Token>::tree_iterator const& m,
sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd);
};
class
worker
{
agenda& ag;
public:
worker(agenda& a) : ag(a) {}
void operator()();
};
list<sptr<job> > joblist;
vector<sptr<boost::thread> > workers;
bool shutdown;
size_t doomed;
public:
Bitext<Token> const& bt;
agenda(Bitext<Token> const& bitext);
~agenda();
void add_workers(int n);
sptr<pstats>
add_job(typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples);
sptr<job> get_job();
};
template<typename Token>
bool
Bitext<Token>::
agenda::
job::
step(uint64_t & sid, uint64_t & offset)
{
boost::lock_guard<boost::mutex> jguard(lock);
if ((max_samples == 0) && (next < stop))
{
next = root->readSid(next,stop,sid);
next = root->readOffset(next,stop,offset);
boost::lock_guard<boost::mutex> sguard(stats->lock);
if (stats->raw_cnt == ctr) ++stats->raw_cnt;
stats->sample_cnt++;
return true;
}
else
{
while (next < stop && stats->good < max_samples)
{
next = root->readSid(next,stop,sid);
next = root->readOffset(next,stop,offset);
{
boost::lock_guard<boost::mutex> sguard(stats->lock);
if (stats->raw_cnt == ctr) ++stats->raw_cnt;
size_t rnum = randInt(stats->raw_cnt - ctr++);
if (rnum < max_samples - stats->good)
{
stats->sample_cnt++;
return true;
}
}
}
return false;
}
}
template<typename Token>
void
Bitext<Token>::
agenda::
add_workers(int n)
{
static boost::posix_time::time_duration nodelay(0,0,0,0);
boost::lock_guard<boost::mutex> guard(this->lock);
int target = max(1, int(n + workers.size() - this->doomed));
// house keeping: remove all workers that have finished
for (size_t i = 0; i < workers.size(); )
{
if (workers[i]->timed_join(nodelay))
{
if (i + 1 < workers.size())
workers[i].swap(workers.back());
workers.pop_back();
}
else ++i;
}
// cerr << workers.size() << "/" << target << " active" << endl;
if (int(workers.size()) > target)
this->doomed = workers.size() - target;
else
while (int(workers.size()) < target)
{
sptr<boost::thread> w(new boost::thread(worker(*this)));
workers.push_back(w);
}
}
template<typename Token>
void
Bitext<Token>::
agenda::
worker::
operator()()
{
size_t s1=0, s2=0, e1=0, e2=0;
uint64_t sid=0, offset=0; // of the source phrase
while(sptr<job> j = ag.get_job())
{
j->stats->register_worker();
vector<uchar> aln;
while (j->step(sid,offset))
{
aln.clear();
if (!ag.bt.find_trg_phr_bounds
(sid, offset, offset + j->len, s1, s2, e1, e2,
j->fwd?&aln:NULL, !j->fwd))
continue;
j->stats->lock.lock();
j->stats->good += 1;
j->stats->sum_pairs += (s2-s1+1)*(e2-e1+1);
j->stats->lock.unlock();
for (size_t k = j->fwd ? 1 : 0; k < aln.size(); k += 2)
aln[k] += s2 - s1;
Token const* o = (j->fwd ? ag.bt.T2 : ag.bt.T1)->sntStart(sid);
float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
for (size_t s = s1; s <= s2; ++s)
{
sptr<iter> b = (j->fwd ? ag.bt.I2 : ag.bt.I1)->find(o+s,e1-s);
if (!b || b->size() < e1 -s)
UTIL_THROW(util::Exception, "target phrase not found");
// assert(b);
for (size_t i = e1; i <= e2; ++i)
{
j->stats->add(b->getPid(),sample_weight,aln,b->approxOccurrenceCount());
if (i < e2)
{
#ifndef NDEBUG
bool ok = b->extend(o[i].id());
assert(ok);
#else
b->extend(o[i].id());
// cerr << "boo" << endl;
#endif
}
}
if (j->fwd && s < s2)
for (size_t k = j->fwd ? 1 : 0; k < aln.size(); k += 2)
--aln[k];
}
// j->stats->lock.unlock();
}
j->stats->release();
}
}
template<typename Token>
Bitext<Token>::
agenda::
job::
job(typename TSA<Token>::tree_iterator const& m,
sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd)
: workers(0)
, root(r)
, next(m.lower_bound(-1))
, stop(m.upper_bound(-1))
, max_samples(maxsmpl)
, ctr(0)
, len(m.size())
, fwd(isfwd)
{
stats.reset(new pstats());
stats->raw_cnt = m.approxOccurrenceCount();
}
template<typename Token>
sptr<pstats>
Bitext<Token>::
agenda::
add_job(typename TSA<Token>::tree_iterator const& phrase,
size_t const max_samples)
{
static boost::posix_time::time_duration nodelay(0,0,0,0);
bool fwd = phrase.root == bt.I1.get();
sptr<job> j(new job(phrase, fwd ? bt.I2 : bt.I1, max_samples, fwd));
j->stats->register_worker();
boost::unique_lock<boost::mutex> lk(this->lock);
joblist.push_back(j);
if (joblist.size() == 1)
{
size_t i = 0;
while (i < workers.size())
{
if (workers[i]->timed_join(nodelay))
{
if (doomed)
{
if (i+1 < workers.size())
workers[i].swap(workers.back());
workers.pop_back();
--doomed;
}
else
workers[i++] = sptr<boost::thread>(new boost::thread(worker(*this)));
}
else ++i;
}
}
return j->stats;
}
template<typename Token>
sptr<typename Bitext<Token>::agenda::job>
Bitext<Token>::
agenda::
get_job()
{
// cerr << workers.size() << " workers on record" << endl;
sptr<job> ret;
if (this->shutdown) return ret;
// add_workers(0);
boost::unique_lock<boost::mutex> lock(this->lock);
if (this->doomed)
{
--this->doomed;
return ret;
}
typename list<sptr<job> >::iterator j = joblist.begin();
while (j != joblist.end())
{
if ((*j)->done())
{
(*j)->stats->release();
joblist.erase(j++);
}
else if ((*j)->workers >= 4)
{
++j;
}
else break;
}
if (joblist.size())
{
ret = j == joblist.end() ? joblist.front() : *j;
boost::lock_guard<boost::mutex> jguard(ret->lock);
++ret->workers;
}
return ret;
}
template<typename TKN>
class mmBitext : public Bitext<TKN>
{
public:
void open(string const base, string const L1, string L2);
mmBitext();
};
template<typename TKN>
mmBitext<TKN>::
mmBitext()
: Bitext<TKN>(new mmTtrack<TKN>(),
new mmTtrack<TKN>(),
new mmTtrack<char>(),
new TokenIndex(),
new TokenIndex(),
new mmTSA<TKN>(),
new mmTSA<TKN>())
{};
template<typename TKN>
void
mmBitext<TKN>::
open(string const base, string const L1, string L2)
{
mmTtrack<TKN>& t1 = *reinterpret_cast<mmTtrack<TKN>*>(this->T1.get());
mmTtrack<TKN>& t2 = *reinterpret_cast<mmTtrack<TKN>*>(this->T2.get());
mmTtrack<char>& tx = *reinterpret_cast<mmTtrack<char>*>(this->Tx.get());
t1.open(base+L1+".mct");
t2.open(base+L2+".mct");
tx.open(base+L1+"-"+L2+".mam");
this->V1->open(base+L1+".tdx"); this->V1->iniReverseIndex();
this->V2->open(base+L2+".tdx"); this->V2->iniReverseIndex();
mmTSA<TKN>& i1 = *reinterpret_cast<mmTSA<TKN>*>(this->I1.get());
mmTSA<TKN>& i2 = *reinterpret_cast<mmTSA<TKN>*>(this->I2.get());
i1.open(base+L1+".sfa", this->T1.get());
i2.open(base+L2+".sfa", this->T2.get());
assert(this->T1->size() == this->T2->size());
}
template<typename TKN>
class imBitext : public Bitext<TKN>
{
public:
void open(string const base, string const L1, string L2);
imBitext();
};
template<typename TKN>
imBitext<TKN>::
imBitext()
: Bitext<TKN>(new imTtrack<TKN>(),
new imTtrack<TKN>(),
new imTtrack<char>(),
new TokenIndex(),
new TokenIndex(),
new imTSA<TKN>(),
new imTSA<TKN>())
{}
// template<typename TKN>
// void
// imBitext<TKN>::
// open(string const base, string const L1, string L2)
// {
// mmTtrack<TKN>& t1 = *reinterpret_cast<mmTtracuk<TKN>*>(this->T1.get());
// mmTtrack<TKN>& t2 = *reinterpret_cast<mmTtrack<TKN>*>(this->T2.get());
// mmTtrack<char>& tx = *reinterpret_cast<mmTtrack<char>*>(this->Tx.get());
// t1.open(base+L1+".mct");
// t2.open(base+L2+".mct");
// tx.open(base+L1+"-"+L2+".mam");
// cerr << "DADA" << endl;
// this->V1->open(base+L1+".tdx"); this->V1->iniReverseIndex();
// this->V2->open(base+L2+".tdx"); this->V2->iniReverseIndex();
// mmTSA<TKN>& i1 = *reinterpret_cast<mmTSA<TKN>*>(this->I1.get());
// mmTSA<TKN>& i2 = *reinterpret_cast<mmTSA<TKN>*>(this->I2.get());
// i1.open(base+L1+".sfa", this->T1.get());
// i2.open(base+L2+".sfa", this->T2.get());
// assert(this->T1->size() == this->T2->size());
// }
template<typename Token>
bool
Bitext<Token>::
find_trg_phr_bounds(size_t const sid, size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar>* core_alignment, bool const flip) const
{
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
// a word on the core_alignment:
// since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1 < e2, respectively)
// are be definition unaligned, we store only the core alignment in *core_alignment
// it is up to the calling function to shift alignment points over for start positions
// of extracted phrases that start with a fringe word
bitvector forbidden((flip ? T1 : T2)->sntLen(sid));
size_t src,trg;
size_t lft = forbidden.size();
size_t rgt = 0;
vector<vector<ushort> > aln((*T1).sntLen(sid));
char const* p = Tx->sntStart(sid);
char const* x = Tx->sntEnd(sid);
// cerr << "flip = " << flip << " " << __FILE__ << ":" << __LINE__ << endl;
while (p < x)
{
if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
if (src < start || src >= stop)
forbidden.set(trg);
else
{
lft = min(lft,trg);
rgt = max(rgt,trg);
if (core_alignment)
{
if (flip) aln[trg].push_back(src);
else aln[src].push_back(trg);
}
}
}
for (size_t i = lft; i <= rgt; ++i)
if (forbidden[i])
return false;
s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
if (lft > rgt) return false;
if (core_alignment)
{
core_alignment->clear();
if (flip)
{
for (size_t i = lft; i <= rgt; ++i)
{
sort(aln[i].begin(),aln[i].end());
BOOST_FOREACH(ushort x, aln[i])
{
core_alignment->push_back(i-lft);
core_alignment->push_back(x-start);
}
}
}
else
{
for (size_t i = start; i < stop; ++i)
{
BOOST_FOREACH(ushort x, aln[i])
{
core_alignment->push_back(i-start);
core_alignment->push_back(x-lft);
}
}
}
#if 0
// if (e1 - s1 > 3)
{
lock_guard<mutex> guard(this->lock);
Token const* t1 = T1->sntStart(sid);
Token const* t2 = T2->sntStart(sid);
cout << "[" << start << ":" << stop << "] => ["
<< s1 << ":" << s2 << ":"
<< e1 << ":" << e2 << "]" << endl;
for (size_t k = start; k < stop; ++k)
cout << k-start << "." << (*V1)[t1[k].id()] << " ";
cout << endl;
for (size_t k = s1; k < e2;)
{
if (k == s2) cout << "[";
cout << int(k)-int(s2) << "." << (*V2)[t2[k].id()];
if (++k == e1) cout << "] ";
else cout << " ";
}
cout << endl;
for (size_t k = 0; k < core_alignment->size(); k += 2)
cout << int((*core_alignment)[k]) << "-" << int((*core_alignment)[k+1]) << " ";
cout << "\n" << __FILE__ << ":" << __LINE__ << endl;
}
#endif
}
return lft <= rgt;
}
template<typename Token>
void
Bitext<Token>::
prep(iter const& phrase) const
{
prep2(phrase, this->default_sample_size);
}
template<typename Token>
sptr<pstats>
Bitext<Token>::
prep2(iter const& phrase, size_t const max_sample) const
{
// boost::lock_guard<boost::mutex>(this->lock);
if (!ag)
{
// boost::lock_guard<boost::mutex>(this->lock);
if (!ag)
{
ag.reset(new agenda(*this));
ag->add_workers(20);
}
}
typedef boost::unordered_map<uint64_t,sptr<pstats> > pcache_t;
sptr<pstats> ret;
if (max_sample == this->default_sample_size)
{
uint64_t pid = phrase.getPid();
pcache_t & cache(phrase.root == &(*this->I1) ? cache1 : cache2);
pcache_t::value_type entry(pid,sptr<pstats>());
pair<pcache_t::iterator,bool> foo;
{
// boost::lock_guard<boost::mutex>(this->lock);
foo = cache.emplace(entry);
}
if (foo.second) foo.first->second = ag->add_job(phrase, max_sample);
ret = foo.first->second;
}
else ret = ag->add_job(phrase, max_sample);
return ret;
}
template<typename Token>
sptr<pstats>
Bitext<Token>::
lookup(iter const& phrase) const
{
boost::lock_guard<boost::mutex>(this->lock);
sptr<pstats> ret;
ret = prep2(phrase, this->default_sample_size);
assert(ret);
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
ret->ready.wait(lock);
return ret;
}
template<typename Token>
sptr<pstats>
Bitext<Token>::
lookup(iter const& phrase, size_t const max_sample) const
{
boost::lock_guard<boost::mutex>(this->lock);
sptr<pstats> ret = prep2(phrase, max_sample);
boost::unique_lock<boost::mutex> lock(ret->lock);
while (ret->in_progress)
ret->ready.wait(lock);
return ret;
}
template<typename Token>
Bitext<Token>::
agenda::
~agenda()
{
this->lock.lock();
this->shutdown = true;
this->lock.unlock();
for (size_t i = 0; i < workers.size(); ++i)
workers[i]->join();
}
template<typename Token>
Bitext<Token>::
agenda::
agenda(Bitext<Token> const& thebitext)
: shutdown(false), doomed(0), bt(thebitext)
{ }
template<typename Token>
bool
Bitext<Token>::
agenda::
job::
done() const
{
return (max_samples && stats->good >= max_samples) || next == stop;
}
} // end of namespace bitext
} // end of namespace moses
#endif

View File

@ -0,0 +1,57 @@
// -*- c++ -*-
// (c) 2007-2012 Ulrich Germann
// Token class for dependency trees, where the linear order
// of tokens is defined as going up a dependency chain
#ifndef __ug_conll_bottom_up_token_h
#define __ug_conll_bottok_up_token_h
#include "ug_typedefs.h"
namespace ugdiss
{
using namespace std;
template<typename T>
class ConllBottomUpToken : public T
{
public:
typedef T Token;
ConllBottomUpToken() : T() {};
ConllBottomUpToken(id_type id) : T(id) {};
ConllBottomUpToken const* next(int length=1) const;
template<typename TTRACK_TYPE>
ConllBottomUpToken const* stop(TTRACK_TYPE const& C, id_type sid) const
{
return NULL;
};
ConllBottomUpToken const*
stop(ConllBottomUpToken const* seqStart,
ConllBottomUpToken const* seqEnd) const
{
return NULL;
};
bool operator<(T const& other) const { return this->cmp(other) < 0; }
bool operator>(T const& other) const { return this->cmp(other) > 0; }
bool operator==(T const& other) const { return this->cmp(other) == 0; }
bool operator!=(T const& other) const { return this->cmp(other) != 0; }
bool reachable(T const* o)
{
for (T const* x = this; x; x = reinterpret_cast<T const*>(x->up()))
if (x == o) return true;
return false;
}
};
template<typename T>
ConllBottomUpToken<T> const*
ConllBottomUpToken<T>::
next(int length) const
{
return reinterpret_cast<ConllBottomUpToken<T> const*>(this->up(length));
}
} // end of namespace ugdiss
#endif

View File

@ -0,0 +1,20 @@
#include "ug_conll_record.h"
namespace ugdiss
{
Conll_Record
Conll_Record::
remap(vector<id_type const*> const& m) const
{
Conll_Record ret;
ret.sform = m.size() > 0 && m[0] ? m[0][this->sform] : this->sform;
ret.lemma = m.size() > 1 && m[1] ? m[1][this->lemma] : this->lemma;
ret.majpos = m.size() > 2 && m[2] ? m[2][this->majpos] : this->majpos;
ret.minpos = m.size() > 2 && m[2] ? m[2][this->minpos] : this->minpos;
ret.dtype = m.size() > 3 && m[3] ? m[3][this->dtype] : this->dtype;
ret.info[0] = m.size() > 4 && m[4] ? m[4][this->info[0]] : this->info[0];
ret.info[1] = m.size() > 5 && m[5] ? m[5][this->info[1]] : this->info[1];
ret.info[2] = m.size() > 6 && m[6] ? m[6][this->info[2]] : this->info[2];
ret.parent = this->parent;
return ret;
}
}

View File

@ -0,0 +1,70 @@
#ifndef __ug_conll_record_h
#define __ug_conll_record_h
#include "ug_typedefs.h"
// Base class for dependency tree corpora with POS and Lemma annotations
namespace ugdiss
{
using namespace std;
class
Conll_Record
{
public:
id_type sform; // surface form
id_type lemma; // lemma
uchar majpos; // major part of speech
uchar minpos; // minor part of speech
short parent; // id of parent
uchar dtype; // dependency type
uchar info[3]; /* additional information (depends on the part of speech)
* a place holder for the time being, to ensure proper
* alignment in memory */
Conll_Record();
Conll_Record const* up(int length=1) const;
Conll_Record& operator=(Conll_Record const& other);
bool isDescendentOf(Conll_Record const* other) const;
// virtual bool operator==(Conll_Record const& other) const;
// virtual bool operator<(Conll_Record const& other) const;
Conll_Record remap(vector<id_type const*> const& m) const;
#if 0
/** constructor for conversion from CONLL-stype text format
* @parameter SF Vocabulary for surface form
* @parameter LM Vocabulary for lemma
* @parameter PS Vocabulary for part-of-speech
* @parameter DT Vocabulary for dependency type
*/
Conll_Record(string const& line,
TokenIndex const& SF, TokenIndex const& LM,
TokenIndex const& PS, TokenIndex const& DT);
/** store the record as-is to disk (for memory-mapped reading later) */
void store(ostream& out);
#endif
};
template<typename T>
T const* as(Conll_Record const* p)
{
return reinterpret_cast<T const*>(p);
}
template<typename T>
T const* up(T const* p,int length=1)
{
return as<T>(p->up(length));
}
// this is for contigous word sequences extracted from longer sequences
// adjust parent pointers to 0 (no parent) if they point out of the
// subsequence
void
fixParse(Conll_Record* start, Conll_Record* stop);
} // end of namespace ugdiss
#endif

View File

@ -0,0 +1,44 @@
#include "ug_corpus_token.h"
// Simple wrapper around integer IDs for use with the Ctrack and TSA template classes.
// (c) 2007-2009 Ulrich Germann
namespace ugdiss
{
id_type const&
SimpleWordId::
id() const
{
return theID;
}
int
SimpleWordId::
cmp(SimpleWordId const& other) const
{
return (theID < other.theID ? -1
: theID == other.theID ? 0
: 1);
}
SimpleWordId::
SimpleWordId(id_type const& id)
{
theID = id;
}
bool
SimpleWordId::
operator==(SimpleWordId const& other) const
{
return theID == other.theID;
}
id_type
SimpleWordId::
remap(vector<id_type const*> const& m) const
{
if (!m[0]) return theID;
return m[0][theID];
}
}

View File

@ -0,0 +1,97 @@
// -*- c++ -*-
// This code is part of the re-factorization of the earlier non-template implementation of "corpus tracks"
// and suffix and prefix arrays over them as template classes.
// (c) 2007-2009 Ulrich Germann
#ifndef __ug_corpus_token_h
#define __ug_corpus_token_h
// This file defines a few simple token classes for use with the Ttrack/TSA template classes
// - SimpleWordId is a simple wrapper around an integer ID
// - L2R_Token defines next() for building suffix arrays
// - R2L_Token defines next() for building prefix arrays
#include "tpt_typedefs.h"
#include "ug_ttrack_base.h"
namespace ugdiss
{
/** Simple wrapper around id_type for use with the Ttrack/TSA template classes */
class SimpleWordId
{
id_type theID;
public:
SimpleWordId(id_type const& id);
id_type const& id() const;
int cmp(SimpleWordId const& other) const;
bool operator==(SimpleWordId const& other) const;
id_type remap(vector<id_type const*> const& m) const;
};
/** Token class for suffix arrays */
template<typename T>
class
L2R_Token : public T
{
public:
typedef T Token;
L2R_Token() : T() {};
L2R_Token(id_type id) : T(id) {};
L2R_Token const* next(int n=1) const { return this+n; }
/** return a pointer to the end of a sentence; used as a stopping criterion during
* comparison of suffixes; see Ttrack::cmp() */
template<typename TTRACK_TYPE>
L2R_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
{
return reinterpret_cast<L2R_Token<T> const*>(C.sntEnd(sid));
}
L2R_Token const* stop(L2R_Token const* seqStart, L2R_Token const* seqEnd) const
{
return seqEnd;
}
bool operator<(T const& other) const { return this->cmp(other) < 0; }
bool operator>(T const& other) const { return this->cmp(other) > 0; }
bool operator==(T const& other) const { return this->cmp(other) == 0; }
bool operator!=(T const& other) const { return this->cmp(other) != 0; }
};
/** Token class for prefix arrays */
template<typename T>
class
R2L_Token : public T
{
public:
typedef T Token;
R2L_Token() : T() {};
R2L_Token(id_type id) : T(id) {};
R2L_Token const* next(int n = 1) const { return this - n; }
template<typename TTRACK_TYPE>
R2L_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
{
return reinterpret_cast<R2L_Token<T> const*>(C.sntStart(sid) - 1);
}
R2L_Token const* stop(R2L_Token const* seqStart, R2L_Token const* seqEnd) const
{
assert(seqStart);
return seqStart - 1;
}
bool operator<(T const& other) const { return this->cmp(other) < 0; }
bool operator>(T const& other) const { return this->cmp(other) > 0; }
bool operator==(T const& other) const { return this->cmp(other) == 0; }
bool operator!=(T const& other) const { return this->cmp(other) != 0; }
};
}
#endif

323
moses/mm/ug_deptree.cc Normal file
View File

@ -0,0 +1,323 @@
#include <sstream>
#include "ug_deptree.h"
#include "tpt_tokenindex.h"
using namespace std;
namespace ugdiss
{
bool
Conll_Record::
isDescendentOf(Conll_Record const* other) const
{
Conll_Record const* a = this;
while (a != other && a->parent)
a += a->parent;
return a==other;
}
Conll_Record&
Conll_Record::
operator=(Conll_Record const& o)
{
sform = o.sform;
lemma = o.lemma;
majpos = o.majpos;
minpos = o.minpos;
parent = o.parent;
dtype = o.dtype;
info[0] = o.info[0];
info[1] = o.info[1];
info[2] = o.info[2];
return *this;
}
Conll_Record::
Conll_Record()
: sform(0),lemma(0),majpos(0),minpos(0),parent(0),dtype(0)
{
info[0]=0;
info[1]=0;
info[2]=0;
}
Conll_AllFields::
Conll_AllFields()
: Conll_Record::Conll_Record()
{};
int
Conll_AllFields::
cmp(Conll_Record const& other) const
{
if (sform != other.sform) return sform < other.sform ? -1 : 1;
if (lemma != other.lemma) return lemma < other.lemma ? -1 : 1;
if (majpos != other.majpos) return majpos < other.majpos ? -1 : 1;
if (minpos != other.minpos) return minpos < other.minpos ? -1 : 1;
if (dtype != other.dtype) return dtype < other.dtype ? -1 : 1;
if (info[0] != other.info[0]) return info[0] < other.info[0] ? -1 : 1;
if (info[1] != other.info[1]) return info[1] < other.info[1] ? -1 : 1;
if (info[2] != other.info[2]) return info[2] < other.info[2] ? -1 : 1;
if (parent != other.parent) return parent < other.parent ? -1 : 1;
return 0;
}
Conll_WildCard::
Conll_WildCard()
: Conll_Record::Conll_Record()
{};
int
Conll_WildCard::
cmp(Conll_Record const& other) const
{
return 0;
}
#if 1
bool
Conll_AllFields::
operator==(Conll_AllFields const& other) const
{
return (sform == other.sform
&& lemma == other.lemma
&& majpos == other.majpos
&& minpos == other.minpos
&& parent == other.parent
&& dtype == other.dtype
&& info[0] == other.info[0]
&& info[1] == other.info[1]
&& info[2] == other.info[2]
);
}
#endif
#if 0
Conll_Record::
Conll_Record(string const& line,
TokenIndex const& SF, TokenIndex const& LM,
TokenIndex const& PS, TokenIndex const& DT)
{
string surf,lem,pos1,pos2,dummy,drel;
short id,gov;
istringstream buf(line);
buf >> id >> surf >> lem >> pos1 >> pos2 >> dummy >> gov >> drel;
sform = SF[surf];
lemma = LM[lem];
if (PS[pos1] > 255 || PS[pos2] > 255 || DT[drel] > 255)
{
cerr << "error at this line:\n" << line << endl;
exit(1);
}
majpos = rangeCheck(PS[pos1],256);
minpos = rangeCheck(PS[pos2],256);
dtype = rangeCheck(DT[drel],256);
parent = gov ? gov-id : 0;
info[0]=info[1]=info[2]=0;
}
void
Conll_Record::
store(ostream& out)
{
out.write(reinterpret_cast<char const*>(this),sizeof(*this));
}
#endif
#if 1
Conll_Record const*
Conll_Record::up(int length) const
{
Conll_Record const* ret = this;
while (length-- > 0)
if (!ret->parent) return NULL;
else ret += ret->parent;
return ret;
}
#endif
Conll_Sform::
Conll_Sform()
: Conll_Record::Conll_Record()
{};
Conll_MinPos::
Conll_MinPos()
: Conll_Record::Conll_Record()
{};
Conll_MinPos_Lemma::
Conll_MinPos_Lemma()
: Conll_Record::Conll_Record()
{};
Conll_Lemma::
Conll_Lemma()
: Conll_Record::Conll_Record()
{};
Conll_Lemma::
Conll_Lemma(id_type _id)
: Conll_Record::Conll_Record()
{
this->lemma = _id;
};
Conll_MinPos::
Conll_MinPos(id_type _id)
: Conll_Record::Conll_Record()
{
this->minpos = _id;
};
id_type
Conll_MinPos::
id() const
{
return this->minpos;
}
Conll_MajPos::
Conll_MajPos(id_type _id)
: Conll_Record::Conll_Record()
{
this->majpos = _id;
};
id_type
Conll_MajPos::
id() const
{
return this->majpos;
}
id_type
Conll_MinPos_Lemma::
id() const
{
return this->minpos;
}
int
Conll_MajPos::
cmp(Conll_Record const& other) const
{
return this->majpos < other.majpos ? -1 : this->majpos > other.majpos ? 1 : 0;
}
int
Conll_MinPos::
cmp(Conll_Record const& other) const
{
return this->minpos < other.minpos ? -1 : this->minpos > other.minpos ? 1 : 0;
}
int
Conll_MinPos_Lemma::
cmp(Conll_Record const& other) const
{
if (this->minpos != 0 && other.minpos != 0 && this->minpos != other.minpos)
return this->minpos < other.minpos ? -1 : 1;
if (this->lemma != 0 && other.lemma != 0 && this->lemma != other.lemma)
return this->lemma < other.lemma ? -1 : 1;
return 0;
}
id_type
Conll_Lemma::
id() const
{
return this->lemma;
}
int
Conll_Lemma::
cmp(Conll_Record const& other) const
{
#if 0
for (Conll_Record const* x = this; x; x = x->parent ? x+x->parent : NULL)
cout << (x!=this?".":"") << x->lemma;
cout << " <=> ";
for (Conll_Record const* x = &other; x; x = x->parent ? x+x->parent : NULL)
cout << (x!=&other?".":"") << x->lemma;
cout << (this->lemma < other.lemma ? -1 : this->lemma > other.lemma ? 1 : 0);
cout << endl;
#endif
return this->lemma < other.lemma ? -1 : this->lemma > other.lemma ? 1 : 0;
}
Conll_Sform::
Conll_Sform(id_type _id)
: Conll_Record::Conll_Record()
{
this->sform = _id;
};
id_type
Conll_Sform
::id() const
{
return this->sform;
}
int
Conll_Sform::
cmp(Conll_Record const& other) const
{
return this->sform < other.sform ? -1 : this->sform > other.sform ? 1 : 0;
}
#if 0
dpSnt::
dpSnt(Conll_Record const* first, Conll_Record const* last)
{
w.reserve(last-first);
for (Conll_Record const* x = first; x < last; ++x)
w.push_back(DTNode(x));
for (size_t i = 0; i < w.size(); i++)
{
short p = w[i].rec->parent;
if (p != 0)
{
if (p > 0) assert(i+p < w.size());
else assert(i >= size_t(-p));
w[i].parent = &(w[i+p]);
w[i].parent->children.push_back(&(w[i]));
}
}
}
#endif
/** @return true if the linear sequence of /Conll_Record/s is coherent,
* i.e., a proper connected tree structure */
bool
isCoherent(Conll_Record const* const start, Conll_Record const* const stop)
{
int outOfRange=0;
for (Conll_Record const* x = start; outOfRange <= 1 && x < stop; ++x)
{
Conll_Record const* n = x->up();
if (!n || n < start || n >= stop)
outOfRange++;
}
return outOfRange<=1;
}
// this is for contigous word sequences extracted from longer sequences
// adjust parent pointers to 0 (no parent) if they point out of the
// subsequence
void
fixParse(Conll_Record* start, Conll_Record* stop)
{
int len = stop-start;
int i = 0;
for (Conll_Record* x = start; x < stop; ++x,++i)
{
int p = i+x->parent;
if (p < 0 || p >= len) x->parent = 0;
}
}
}

217
moses/mm/ug_deptree.h Normal file
View File

@ -0,0 +1,217 @@
// -*- c++ -*-
// (c) 2007-2012 Ulrich Germann
// Stuff related to dependency trees
#ifndef __ug_deptree_h
#define __ug_deptree_h
#include <string>
#include <iostream>
#include "tpt_tokenindex.h"
#include "ug_ttrack_base.h"
#include "ug_conll_record.h"
#include "ug_conll_bottom_up_token.h"
#include "ug_typedefs.h"
using namespace std;
namespace ugdiss
{
// Fills the vector v with pointers to the internal root r_x for the
// stretch [start,x] for all x: start <= x < stop. If the stretch
// is incoherent, r_x is NULL
template<typename T>
void
fill_L2R_roots(T const* start,T const* stop, vector<T const*>& v)
{
assert(stop>start);
v.resize(stop-start);
v[0] = start;
bitvector isR(v.size());
vector<T const*> root(v.size());
isR.set(0);
root[0] = start+start->parent;
for (T const* x = start+1; x < stop; ++x)
{
size_t p = x-start;
root[p] = x+x->parent;
for (size_t i = isR.find_first(); i < isR.size(); i = isR.find_next(i))
if (root[i]==x)
isR.reset(i);
if (root[p] < start || root[p] >= stop)
isR.set(x-start);
v[p] = (isR.count()==1) ? start+isR.find_first() : NULL;
}
}
// return the root of the tree if the span [start,stop) constitutes a
// tree, NULL otherwise
template<typename T>
T const*
findInternalRoot(T const* start, T const* stop)
{
int outOfRange=0;
T const* root = NULL;
for (T const* t = start; t < stop && outOfRange <= 1; t++)
{
T const* n = reinterpret_cast<T const*>(t->up());
if (!n || n < start || n >=stop)
{
outOfRange++;
root = t;
}
}
assert(outOfRange);
return outOfRange == 1 ? root : NULL;
}
// return the governor of the tree given by [start,stop) if the span
// constitutes a tree, NULL otherwise
template<typename T>
T const*
findExternalRoot(T const* start, T const* stop)
{
int numRoots=0;
T const* root = NULL;
for (T const* t = start; t < stop && numRoots <= 1; t++)
{
T const* n = reinterpret_cast<T const*>(t->up());
if (!n || n < start || n >=stop)
{
if (root && n != root)
numRoots++;
else
{
root = n;
if (!numRoots) numRoots++;
}
}
}
assert(numRoots);
return numRoots == 1 ? root : NULL;
}
template<typename T>
T const*
findInternalRoot(vector<T> const& v)
{
T const* a = as<T>(&(*v.begin()));
T const* b = as<T>(&(*v.end()));
return (a==b) ? NULL : findInternalRoot<T>(a,b);
}
#if 1
class DTNode
{
public:
Conll_Record const* rec; // pointer to the record (see below) for this node
DTNode* parent; // pointer to my parent
vector<DTNode*> children; // children (in the order they appear in the sentence)
DTNode(Conll_Record const* p);
};
/** A parsed sentence */
class
DependencyTree
{
public:
vector<DTNode> w;
DependencyTree(Conll_Record const* first, Conll_Record const* last);
};
#endif
class
Conll_Lemma : public Conll_Record
{
public:
Conll_Lemma();
Conll_Lemma(id_type _id);
id_type id() const;
int cmp(Conll_Record const& other) const;
};
class
Conll_Sform : public Conll_Record
{
public:
Conll_Sform();
Conll_Sform(id_type _id);
id_type id() const;
int cmp(Conll_Record const& other) const;
};
class
Conll_MajPos : public Conll_Record
{
public:
Conll_MajPos();
Conll_MajPos(id_type _id);
id_type id() const;
int cmp(Conll_Record const& other) const;
};
class
Conll_MinPos : public Conll_Record
{
public:
Conll_MinPos();
Conll_MinPos(id_type _id);
id_type id() const;
int cmp(Conll_Record const& other) const;
};
class
Conll_MinPos_Lemma : public Conll_Record
{
public:
Conll_MinPos_Lemma();
id_type id() const;
int cmp(Conll_Record const& other) const;
};
class
Conll_AllFields : public Conll_Record
{
public:
Conll_AllFields();
int cmp(Conll_Record const& other) const;
bool operator==(Conll_AllFields const& other) const;
};
class
Conll_WildCard : public Conll_Record
{
public:
Conll_WildCard();
int cmp(Conll_Record const& other) const;
};
/** @return true if the linear sequence of /Conll_Record/s is coherent,
* i.e., a proper connected tree structure */
bool
isCoherent(Conll_Record const* start, Conll_Record const* const stop);
/** @return the root node of the tree covering the span [start,stop), if the span is coherent;
* NULL otherwise */
template<typename T>
T const* topNode(T const* start , T const* stop)
{
T const* ret = NULL;
for (T const* x = start; x < stop; ++x)
{
T const* n = reinterpret_cast<T const*>(x->up());
if (!n || n < start || n >= stop)
{
if (ret) return NULL;
else ret = x;
}
}
return ret;
}
}
#endif

330
moses/mm/ug_im_tsa.h Normal file
View File

@ -0,0 +1,330 @@
// -*- c++ -*-
// (c) 2007-2009 Ulrich Germann. All rights reserved.
#ifndef _ug_im_tsa_h
#define _ug_im_tsa_h
// TO DO:
// - multi-threaded sorting during TSA construction (currently painfully slow!)
#include <iostream>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/dynamic_bitset.hpp>
#include "tpt_tightindex.h"
#include "tpt_tokenindex.h"
#include "ug_tsa_base.h"
#include "tpt_pickler.h"
namespace ugdiss
{
using namespace std;
namespace bio=boost::iostreams;
//-----------------------------------------------------------------------
template<typename TOKEN>
class imTSA : public TSA<TOKEN>
{
typedef typename Ttrack<TOKEN>::Position cpos;
public:
class tree_iterator;
friend class tree_iterator;
private:
vector<cpos> sufa; // stores the actual array
vector<filepos_type> index; /* top-level index into regions in sufa
* (for faster access) */
private:
char const*
index_jump(char const* a, char const* z, float ratio) const;
char const*
getLowerBound(id_type id) const;
char const*
getUpperBound(id_type id) const;
public:
imTSA();
imTSA(Ttrack<TOKEN> const* c, bdBitset const& filt, ostream* log = NULL);
count_type
sntCnt(char const* p, char const * const q) const;
count_type
rawCnt(char const* p, char const * const q) const;
void
getCounts(char const* p, char const * const q,
count_type& sids, count_type& raw) const;
char const*
readSid(char const* p, char const* q, id_type& sid) const;
char const*
readSid(char const* p, char const* q, uint64_t& sid) const;
char const*
readOffset(char const* p, char const* q, uint16_t& offset) const;
char const*
readOffset(char const* p, char const* q, uint64_t& offset) const;
void
sanityCheck() const;
void
save_as_mm_tsa(string fname) const;
};
template<typename TOKEN>
class
imTSA<TOKEN>::
tree_iterator : public TSA<TOKEN>::tree_iterator
{
public:
tree_iterator(imTSA<TOKEN> const* s);
};
template<typename TOKEN>
imTSA<TOKEN>::
tree_iterator::
tree_iterator(imTSA<TOKEN> const* s)
: TSA<TOKEN>::tree_iterator::tree_iterator(reinterpret_cast<TSA<TOKEN> const*>(s))
{};
/** jump to the point 1/ratio in a tightly packed index
* assumes that keys are flagged with '1', values with '0'
*/
template<typename TOKEN>
char const*
imTSA<TOKEN>::
index_jump(char const* a, char const* z, float ratio) const
{
typedef cpos cpos;
assert(ratio >= 0 && ratio < 1);
cpos const* xa = reinterpret_cast<cpos const*>(a);
cpos const* xz = reinterpret_cast<cpos const*>(z);
return reinterpret_cast<char const*>(xa+int(ratio*(xz-xa)));
}
template<typename TOKEN>
imTSA<TOKEN>::
imTSA()
{
this->corpus = NULL;
this->indexSize = 0;
this->data = NULL;
this->startArray = NULL;
this->endArray = NULL;
this->corpusSize=0;
this->BitSetCachingThreshold=4096;
};
// build an array from all the tokens in the sentences in *c that are
// specified in filter
template<typename TOKEN>
imTSA<TOKEN>::
imTSA(Ttrack<TOKEN> const* c, bdBitset const& filter, ostream* log)
{
assert(c);
this->corpus = c;
// In the first iteration over the corpus, we obtain word counts.
// They allows us to
// a. allocate the exact amount of memory we need
// b. place tokens into the right 'section' in the array, based on
// the ID of the first token in the sequence. We can then sort
// each section separately.
if (log) *log << "counting tokens ... ";
int slimit = 65536;
// slimit=65536 is the upper bound of what we can fit into a ushort which
// we currently use for the offset. Actually, due to (memory) word
// alignment in the memory, using a ushort instead of a uint32_t might not
// even make a difference.
vector<count_type> wcnt; // word counts
sufa.resize(c->count_tokens(wcnt,filter,slimit,log));
if (log) *log << sufa.size() << "." << endl;
// exit(1);
// we use a second vector that keeps track for each ID of the current insertion
// position in the array
vector<count_type> tmp(wcnt.size(),0);
for (size_t i = 1; i < wcnt.size(); ++i)
tmp[i] = tmp[i-1] + wcnt[i-1];
// Now dump all token positions into the right place in sufa
this->corpusSize = 0;
for (id_type sid = filter.find_first();
sid < filter.size();
sid = filter.find_next(sid))
{
TOKEN const* k = c->sntStart(sid);
TOKEN const* const stop = c->sntEnd(sid);
if (stop - k >= slimit) continue;
this->corpusSize++;
for (ushort p=0; k < stop; ++p,++k)
{
id_type wid = k->id();
cpos& cpos = sufa[tmp[wid]++];
cpos.sid = sid;
cpos.offset = p;
assert(p < c->sntLen(sid));
}
}
// Now sort the array
if (log) *log << "sorting ...." << endl;
index.resize(wcnt.size()+1,0);
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(c);
for (size_t i = 0; i < wcnt.size(); i++)
{
if (log && wcnt[i] > 5000)
*log << "sorting " << wcnt[i]
<< " entries starting with id " << i << "." << endl;
index[i+1] = index[i]+wcnt[i];
assert(index[i+1]==tmp[i]); // sanity check
if (wcnt[i]>1)
sort(sufa.begin()+index[i],sufa.begin()+index[i+1],sorter);
}
this->startArray = reinterpret_cast<char const*>(&(*sufa.begin()));
this->endArray = reinterpret_cast<char const*>(&(*sufa.end()));
this->numTokens = sufa.size();
this->indexSize = this->index.size();
#if 1
// Sanity check during code development. Can be removed once the thing is stable.
typename vector<cpos>::iterator m = sufa.begin();
for (size_t i = 0; i < wcnt.size(); i++)
{
for (size_t k = 0; k < wcnt[i]; ++k,++m)
{
assert(c->getToken(*m)->id()==i);
assert(m->offset < c->sntLen(m->sid));
}
}
#endif
} // end of imTSA constructor (corpus,filter,quiet)
// ----------------------------------------------------------------------
template<typename TOKEN>
char const*
imTSA<TOKEN>::
getLowerBound(id_type id) const
{
if (id >= this->index.size())
return NULL;
return reinterpret_cast<char const*>(&(this->sufa[index[id]]));
}
template<typename TOKEN>
char const*
imTSA<TOKEN>::
getUpperBound(id_type id) const
{
if (id+1 >= this->index.size())
return NULL;
return reinterpret_cast<char const*>(&(this->sufa[index[id+1]]));
}
template<typename TOKEN>
char const*
imTSA<TOKEN>::
readSid(char const* p, char const* q, id_type& sid) const
{
sid = reinterpret_cast<cpos const*>(p)->sid;
return p;
}
template<typename TOKEN>
char const*
imTSA<TOKEN>::
readSid(char const* p, char const* q, uint64_t& sid) const
{
sid = reinterpret_cast<cpos const*>(p)->sid;
return p;
}
template<typename TOKEN>
char const*
imTSA<TOKEN>::
readOffset(char const* p, char const* q, uint16_t& offset) const
{
offset = reinterpret_cast<cpos const*>(p)->offset;
return p+sizeof(cpos);
}
template<typename TOKEN>
char const*
imTSA<TOKEN>::
readOffset(char const* p, char const* q, uint64_t& offset) const
{
offset = reinterpret_cast<cpos const*>(p)->offset;
return p+sizeof(cpos);
}
template<typename TOKEN>
count_type
imTSA<TOKEN>::
rawCnt(char const* p, char const* const q) const
{
cpos const* xp = reinterpret_cast<cpos const*>(p);
cpos const* xq = reinterpret_cast<cpos const*>(q);
return xq-xp;
}
template<typename TOKEN>
void
imTSA<TOKEN>::
getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const
{
id_type sid; uint16_t off;
bdBitset check(this->corpus->size());
cpos const* xp = reinterpret_cast<cpos const*>(p);
cpos const* xq = reinterpret_cast<cpos const*>(q);
raw = xq-xp;
for (;xp < xq;xp++)
{
sid = xp->sid;
off = xp->offset;
check.set(sid);
}
sids = check.count();
}
template<typename TOKEN>
void
imTSA<TOKEN>::
save_as_mm_tsa(string fname) const
{
ofstream out(fname.c_str());
filepos_type idxStart(0);
id_type idxSize(index.size());
numwrite(out,idxStart);
numwrite(out,idxSize);
vector<filepos_type> mmIndex;
for (size_t i = 1; i < this->index.size(); i++)
{
mmIndex.push_back(out.tellp());
for (size_t k = this->index[i-1]; k < this->index[i]; ++k)
{
tightwrite(out,sufa[k].sid,0);
tightwrite(out,sufa[k].offset,1);
}
}
mmIndex.push_back(out.tellp());
idxStart = out.tellp();
for (size_t i = 0; i < mmIndex.size(); i++)
numwrite(out,mmIndex[i]-mmIndex[0]);
out.seekp(0);
numwrite(out,idxStart);
out.close();
}
}
#endif

431
moses/mm/ug_im_ttrack.d Normal file
View File

@ -0,0 +1,431 @@
ug_im_ttrack.o: tpt/ug_im_ttrack.cc tpt/ug_im_ttrack.h \
/u/germann/opt64/include/boost/shared_ptr.hpp \
/u/germann/opt64/include/boost/smart_ptr/shared_ptr.hpp \
/u/germann/opt64/include/boost/config.hpp \
/u/germann/opt64/include/boost/config/user.hpp \
/u/germann/opt64/include/boost/config/select_compiler_config.hpp \
/u/germann/opt64/include/boost/config/compiler/gcc.hpp \
/u/germann/opt64/include/boost/config/select_stdlib_config.hpp \
/u/germann/opt64/include/boost/config/no_tr1/utility.hpp \
/u/germann/opt64/include/boost/config/stdlib/libstdcpp3.hpp \
/u/germann/opt64/include/boost/config/select_platform_config.hpp \
/u/germann/opt64/include/boost/config/platform/linux.hpp \
/u/germann/opt64/include/boost/config/posix_features.hpp \
/u/germann/opt64/include/boost/config/suffix.hpp \
/u/germann/opt64/include/boost/config/no_tr1/memory.hpp \
/u/germann/opt64/include/boost/assert.hpp \
/u/germann/opt64/include/boost/checked_delete.hpp \
/u/germann/opt64/include/boost/throw_exception.hpp \
/u/germann/opt64/include/boost/exception/detail/attribute_noreturn.hpp \
/u/germann/opt64/include/boost/detail/workaround.hpp \
/u/germann/opt64/include/boost/exception/exception.hpp \
/u/germann/opt64/include/boost/current_function.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/shared_count.hpp \
/u/germann/opt64/include/boost/smart_ptr/bad_weak_ptr.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/sp_counted_base.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/sp_has_sync.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
/u/germann/opt64/include/boost/detail/sp_typeinfo.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/sp_convertible.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/spinlock_pool.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/spinlock.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/spinlock_sync.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/yield_k.hpp \
/u/germann/opt64/include/boost/memory_order.hpp \
/u/germann/opt64/include/boost/smart_ptr/detail/operator_bool.hpp \
tpt/tpt_typedefs.h tpt/tpt_error.h tpt/tpt_tokenindex.h \
/u/germann/opt64/include/boost/iostreams/device/mapped_file.hpp \
/u/germann/opt64/include/boost/iostreams/close.hpp \
/u/germann/opt64/include/boost/iostreams/categories.hpp \
/u/germann/opt64/include/boost/iostreams/flush.hpp \
/u/germann/opt64/include/boost/iostreams/detail/dispatch.hpp \
/u/germann/opt64/include/boost/iostreams/detail/select.hpp \
/u/germann/opt64/include/boost/type_traits/is_base_and_derived.hpp \
/u/germann/opt64/include/boost/type_traits/intrinsics.hpp \
/u/germann/opt64/include/boost/type_traits/config.hpp \
/u/germann/opt64/include/boost/type_traits/is_class.hpp \
/u/germann/opt64/include/boost/type_traits/is_union.hpp \
/u/germann/opt64/include/boost/type_traits/remove_cv.hpp \
/u/germann/opt64/include/boost/type_traits/broken_compiler_spec.hpp \
/u/germann/opt64/include/boost/mpl/aux_/lambda_support.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/lambda.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/ttp.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/msvc.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/gcc.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/workaround.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/ctps.hpp \
/u/germann/opt64/include/boost/type_traits/detail/cv_traits_impl.hpp \
/u/germann/opt64/include/boost/type_traits/detail/type_trait_def.hpp \
/u/germann/opt64/include/boost/type_traits/detail/template_arity_spec.hpp \
/u/germann/opt64/include/boost/mpl/int.hpp \
/u/germann/opt64/include/boost/mpl/int_fwd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/adl_barrier.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/adl.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/intel.hpp \
/u/germann/opt64/include/boost/mpl/aux_/nttp_decl.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/nttp.hpp \
/u/germann/opt64/include/boost/mpl/aux_/integral_wrapper.hpp \
/u/germann/opt64/include/boost/mpl/integral_c_tag.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/static_constant.hpp \
/u/germann/opt64/include/boost/mpl/aux_/static_cast.hpp \
/u/germann/opt64/include/boost/preprocessor/cat.hpp \
/u/germann/opt64/include/boost/preprocessor/config/config.hpp \
/u/germann/opt64/include/boost/mpl/aux_/template_arity_fwd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessor/params.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/preprocessor.hpp \
/u/germann/opt64/include/boost/preprocessor/comma_if.hpp \
/u/germann/opt64/include/boost/preprocessor/punctuation/comma_if.hpp \
/u/germann/opt64/include/boost/preprocessor/control/if.hpp \
/u/germann/opt64/include/boost/preprocessor/control/iif.hpp \
/u/germann/opt64/include/boost/preprocessor/logical/bool.hpp \
/u/germann/opt64/include/boost/preprocessor/facilities/empty.hpp \
/u/germann/opt64/include/boost/preprocessor/punctuation/comma.hpp \
/u/germann/opt64/include/boost/preprocessor/repeat.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/repeat.hpp \
/u/germann/opt64/include/boost/preprocessor/debug/error.hpp \
/u/germann/opt64/include/boost/preprocessor/detail/auto_rec.hpp \
/u/germann/opt64/include/boost/preprocessor/tuple/eat.hpp \
/u/germann/opt64/include/boost/preprocessor/inc.hpp \
/u/germann/opt64/include/boost/preprocessor/arithmetic/inc.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/overload_resolution.hpp \
/u/germann/opt64/include/boost/type_traits/detail/type_trait_undef.hpp \
/u/germann/opt64/include/boost/type_traits/detail/bool_trait_def.hpp \
/u/germann/opt64/include/boost/type_traits/integral_constant.hpp \
/u/germann/opt64/include/boost/mpl/bool.hpp \
/u/germann/opt64/include/boost/mpl/bool_fwd.hpp \
/u/germann/opt64/include/boost/mpl/integral_c.hpp \
/u/germann/opt64/include/boost/mpl/integral_c_fwd.hpp \
/u/germann/opt64/include/boost/type_traits/detail/bool_trait_undef.hpp \
/u/germann/opt64/include/boost/type_traits/detail/ice_and.hpp \
/u/germann/opt64/include/boost/type_traits/detail/ice_not.hpp \
/u/germann/opt64/include/boost/type_traits/detail/yes_no_type.hpp \
/u/germann/opt64/include/boost/type_traits/is_same.hpp \
/u/germann/opt64/include/boost/type_traits/is_convertible.hpp \
/u/germann/opt64/include/boost/type_traits/is_array.hpp \
/u/germann/opt64/include/boost/type_traits/add_reference.hpp \
/u/germann/opt64/include/boost/type_traits/is_reference.hpp \
/u/germann/opt64/include/boost/type_traits/is_lvalue_reference.hpp \
/u/germann/opt64/include/boost/type_traits/is_rvalue_reference.hpp \
/u/germann/opt64/include/boost/type_traits/ice.hpp \
/u/germann/opt64/include/boost/type_traits/detail/ice_or.hpp \
/u/germann/opt64/include/boost/type_traits/detail/ice_eq.hpp \
/u/germann/opt64/include/boost/type_traits/is_arithmetic.hpp \
/u/germann/opt64/include/boost/type_traits/is_integral.hpp \
/u/germann/opt64/include/boost/type_traits/is_float.hpp \
/u/germann/opt64/include/boost/type_traits/is_void.hpp \
/u/germann/opt64/include/boost/type_traits/is_abstract.hpp \
/u/germann/opt64/include/boost/static_assert.hpp \
/u/germann/opt64/include/boost/mpl/eval_if.hpp \
/u/germann/opt64/include/boost/mpl/if.hpp \
/u/germann/opt64/include/boost/mpl/aux_/value_wknd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/integral.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/eti.hpp \
/u/germann/opt64/include/boost/mpl/aux_/na_spec.hpp \
/u/germann/opt64/include/boost/mpl/lambda_fwd.hpp \
/u/germann/opt64/include/boost/mpl/void_fwd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/na.hpp \
/u/germann/opt64/include/boost/mpl/aux_/na_fwd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/lambda_arity_param.hpp \
/u/germann/opt64/include/boost/mpl/aux_/arity.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/dtp.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessor/enum.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
/u/germann/opt64/include/boost/mpl/limits/arity.hpp \
/u/germann/opt64/include/boost/preprocessor/logical/and.hpp \
/u/germann/opt64/include/boost/preprocessor/logical/bitand.hpp \
/u/germann/opt64/include/boost/preprocessor/identity.hpp \
/u/germann/opt64/include/boost/preprocessor/facilities/identity.hpp \
/u/germann/opt64/include/boost/preprocessor/empty.hpp \
/u/germann/opt64/include/boost/preprocessor/arithmetic/add.hpp \
/u/germann/opt64/include/boost/preprocessor/arithmetic/dec.hpp \
/u/germann/opt64/include/boost/preprocessor/control/while.hpp \
/u/germann/opt64/include/boost/preprocessor/list/fold_left.hpp \
/u/germann/opt64/include/boost/preprocessor/list/detail/fold_left.hpp \
/u/germann/opt64/include/boost/preprocessor/control/expr_iif.hpp \
/u/germann/opt64/include/boost/preprocessor/list/adt.hpp \
/u/germann/opt64/include/boost/preprocessor/detail/is_binary.hpp \
/u/germann/opt64/include/boost/preprocessor/detail/check.hpp \
/u/germann/opt64/include/boost/preprocessor/logical/compl.hpp \
/u/germann/opt64/include/boost/preprocessor/list/fold_right.hpp \
/u/germann/opt64/include/boost/preprocessor/list/detail/fold_right.hpp \
/u/germann/opt64/include/boost/preprocessor/list/reverse.hpp \
/u/germann/opt64/include/boost/preprocessor/control/detail/while.hpp \
/u/germann/opt64/include/boost/preprocessor/tuple/elem.hpp \
/u/germann/opt64/include/boost/preprocessor/arithmetic/sub.hpp \
/u/germann/opt64/include/boost/mpl/identity.hpp \
/u/germann/opt64/include/boost/mpl/void.hpp \
/u/germann/opt64/include/boost/iostreams/traits.hpp \
/u/germann/opt64/include/boost/iostreams/detail/bool_trait_def.hpp \
/u/germann/opt64/include/boost/iostreams/detail/template_params.hpp \
/u/germann/opt64/include/boost/preprocessor/control/expr_if.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/enum_params.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/wide_streams.hpp \
/u/germann/opt64/include/boost/iostreams/detail/is_iterator_range.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/disable_warnings.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/enable_warnings.hpp \
/u/germann/opt64/include/boost/iostreams/detail/select_by_size.hpp \
/u/germann/opt64/include/boost/preprocessor/iteration/local.hpp \
/u/germann/opt64/include/boost/preprocessor/slot/slot.hpp \
/u/germann/opt64/include/boost/preprocessor/slot/detail/def.hpp \
/u/germann/opt64/include/boost/preprocessor/iteration/detail/local.hpp \
/u/germann/opt64/include/boost/iostreams/detail/wrap_unwrap.hpp \
/u/germann/opt64/include/boost/iostreams/detail/enable_if_stream.hpp \
/u/germann/opt64/include/boost/utility/enable_if.hpp \
/u/germann/opt64/include/boost/iostreams/traits_fwd.hpp \
/u/germann/opt64/include/boost/ref.hpp \
/u/germann/opt64/include/boost/utility/addressof.hpp \
/u/germann/opt64/include/boost/mpl/or.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/use_preprocessed.hpp \
/u/germann/opt64/include/boost/mpl/aux_/nested_type_wknd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/include_preprocessed.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/compiler.hpp \
/u/germann/opt64/include/boost/preprocessor/stringize.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
/u/germann/opt64/include/boost/range/iterator_range.hpp \
/u/germann/opt64/include/boost/range/iterator_range_core.hpp \
/u/germann/opt64/include/boost/iterator/iterator_traits.hpp \
/u/germann/opt64/include/boost/detail/iterator.hpp \
/u/germann/opt64/include/boost/iterator/iterator_facade.hpp \
/u/germann/opt64/include/boost/iterator.hpp \
/u/germann/opt64/include/boost/iterator/interoperable.hpp \
/u/germann/opt64/include/boost/iterator/detail/config_def.hpp \
/u/germann/opt64/include/boost/iterator/detail/config_undef.hpp \
/u/germann/opt64/include/boost/iterator/detail/facade_iterator_category.hpp \
/u/germann/opt64/include/boost/iterator/iterator_categories.hpp \
/u/germann/opt64/include/boost/mpl/placeholders.hpp \
/u/germann/opt64/include/boost/mpl/arg.hpp \
/u/germann/opt64/include/boost/mpl/arg_fwd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/na_assert.hpp \
/u/germann/opt64/include/boost/mpl/assert.hpp \
/u/germann/opt64/include/boost/mpl/not.hpp \
/u/germann/opt64/include/boost/mpl/aux_/yes_no.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/arrays.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/pp_counter.hpp \
/u/germann/opt64/include/boost/mpl/aux_/arity_spec.hpp \
/u/germann/opt64/include/boost/mpl/aux_/arg_typedef.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
/u/germann/opt64/include/boost/mpl/and.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
/u/germann/opt64/include/boost/type_traits/is_const.hpp \
/u/germann/opt64/include/boost/detail/indirect_traits.hpp \
/u/germann/opt64/include/boost/type_traits/is_function.hpp \
/u/germann/opt64/include/boost/type_traits/detail/false_result.hpp \
/u/germann/opt64/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
/u/germann/opt64/include/boost/type_traits/is_pointer.hpp \
/u/germann/opt64/include/boost/type_traits/is_member_pointer.hpp \
/u/germann/opt64/include/boost/type_traits/is_member_function_pointer.hpp \
/u/germann/opt64/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
/u/germann/opt64/include/boost/type_traits/is_volatile.hpp \
/u/germann/opt64/include/boost/type_traits/remove_reference.hpp \
/u/germann/opt64/include/boost/type_traits/remove_pointer.hpp \
/u/germann/opt64/include/boost/iterator/detail/enable_if.hpp \
/u/germann/opt64/include/boost/implicit_cast.hpp \
/u/germann/opt64/include/boost/type_traits/add_const.hpp \
/u/germann/opt64/include/boost/type_traits/add_pointer.hpp \
/u/germann/opt64/include/boost/type_traits/remove_const.hpp \
/u/germann/opt64/include/boost/type_traits/is_pod.hpp \
/u/germann/opt64/include/boost/type_traits/is_scalar.hpp \
/u/germann/opt64/include/boost/type_traits/is_enum.hpp \
/u/germann/opt64/include/boost/mpl/always.hpp \
/u/germann/opt64/include/boost/mpl/apply.hpp \
/u/germann/opt64/include/boost/mpl/apply_fwd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
/u/germann/opt64/include/boost/mpl/apply_wrap.hpp \
/u/germann/opt64/include/boost/mpl/aux_/has_apply.hpp \
/u/germann/opt64/include/boost/mpl/has_xxx.hpp \
/u/germann/opt64/include/boost/mpl/aux_/type_wrapper.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/has_xxx.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/msvc_typename.hpp \
/u/germann/opt64/include/boost/preprocessor/array/elem.hpp \
/u/germann/opt64/include/boost/preprocessor/array/data.hpp \
/u/germann/opt64/include/boost/preprocessor/array/size.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/has_apply.hpp \
/u/germann/opt64/include/boost/mpl/aux_/msvc_never_true.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
/u/germann/opt64/include/boost/mpl/lambda.hpp \
/u/germann/opt64/include/boost/mpl/bind.hpp \
/u/germann/opt64/include/boost/mpl/bind_fwd.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/bind.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
/u/germann/opt64/include/boost/mpl/next.hpp \
/u/germann/opt64/include/boost/mpl/next_prior.hpp \
/u/germann/opt64/include/boost/mpl/aux_/common_name_wknd.hpp \
/u/germann/opt64/include/boost/mpl/protect.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
/u/germann/opt64/include/boost/mpl/aux_/full_lambda.hpp \
/u/germann/opt64/include/boost/mpl/quote.hpp \
/u/germann/opt64/include/boost/mpl/aux_/has_type.hpp \
/u/germann/opt64/include/boost/mpl/aux_/config/bcc.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
/u/germann/opt64/include/boost/mpl/aux_/template_arity.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
/u/germann/opt64/include/boost/range/functions.hpp \
/u/germann/opt64/include/boost/range/begin.hpp \
/u/germann/opt64/include/boost/range/config.hpp \
/u/germann/opt64/include/boost/range/iterator.hpp \
/u/germann/opt64/include/boost/range/mutable_iterator.hpp \
/u/germann/opt64/include/boost/range/detail/extract_optional_type.hpp \
/u/germann/opt64/include/boost/range/const_iterator.hpp \
/u/germann/opt64/include/boost/range/end.hpp \
/u/germann/opt64/include/boost/range/detail/implementation_help.hpp \
/u/germann/opt64/include/boost/range/detail/common.hpp \
/u/germann/opt64/include/boost/range/detail/sfinae.hpp \
/u/germann/opt64/include/boost/range/size.hpp \
/u/germann/opt64/include/boost/range/difference_type.hpp \
/u/germann/opt64/include/boost/range/distance.hpp \
/u/germann/opt64/include/boost/range/empty.hpp \
/u/germann/opt64/include/boost/range/rbegin.hpp \
/u/germann/opt64/include/boost/range/reverse_iterator.hpp \
/u/germann/opt64/include/boost/iterator/reverse_iterator.hpp \
/u/germann/opt64/include/boost/utility.hpp \
/u/germann/opt64/include/boost/utility/base_from_member.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/enum_binary_params.hpp \
/u/germann/opt64/include/boost/preprocessor/tuple/rem.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/repeat_from_to.hpp \
/u/germann/opt64/include/boost/utility/binary.hpp \
/u/germann/opt64/include/boost/preprocessor/control/deduce_d.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/cat.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/fold_left.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/seq.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/elem.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/size.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/transform.hpp \
/u/germann/opt64/include/boost/preprocessor/arithmetic/mod.hpp \
/u/germann/opt64/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
/u/germann/opt64/include/boost/preprocessor/comparison/less_equal.hpp \
/u/germann/opt64/include/boost/preprocessor/logical/not.hpp \
/u/germann/opt64/include/boost/next_prior.hpp \
/u/germann/opt64/include/boost/noncopyable.hpp \
/u/germann/opt64/include/boost/iterator/iterator_adaptor.hpp \
/u/germann/opt64/include/boost/range/rend.hpp \
/u/germann/opt64/include/boost/range/algorithm/equal.hpp \
/u/germann/opt64/include/boost/range/concepts.hpp \
/u/germann/opt64/include/boost/concept_check.hpp \
/u/germann/opt64/include/boost/concept/assert.hpp \
/u/germann/opt64/include/boost/concept/detail/general.hpp \
/u/germann/opt64/include/boost/concept/detail/backward_compatibility.hpp \
/u/germann/opt64/include/boost/concept/detail/has_constraints.hpp \
/u/germann/opt64/include/boost/type_traits/conversion_traits.hpp \
/u/germann/opt64/include/boost/concept/usage.hpp \
/u/germann/opt64/include/boost/concept/detail/concept_def.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/for_each_i.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/for.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/detail/for.hpp \
/u/germann/opt64/include/boost/preprocessor/seq/enum.hpp \
/u/germann/opt64/include/boost/concept/detail/concept_undef.hpp \
/u/germann/opt64/include/boost/iterator/iterator_concepts.hpp \
/u/germann/opt64/include/boost/limits.hpp \
/u/germann/opt64/include/boost/range/value_type.hpp \
/u/germann/opt64/include/boost/range/detail/misc_concept.hpp \
/u/germann/opt64/include/boost/range/iterator_range_io.hpp \
/u/germann/opt64/include/boost/range/iterator_range_core.hpp \
/u/germann/opt64/include/boost/iostreams/detail/streambuf.hpp \
/u/germann/opt64/include/boost/iostreams/operations_fwd.hpp \
/u/germann/opt64/include/boost/iostreams/detail/adapter/non_blocking_adapter.hpp \
/u/germann/opt64/include/boost/iostreams/detail/ios.hpp \
/u/germann/opt64/include/boost/iostreams/read.hpp \
/u/germann/opt64/include/boost/iostreams/char_traits.hpp \
/u/germann/opt64/include/boost/iostreams/detail/char_traits.hpp \
/u/germann/opt64/include/boost/iostreams/seek.hpp \
/u/germann/opt64/include/boost/integer_traits.hpp \
/u/germann/opt64/include/boost/iostreams/positioning.hpp \
/u/germann/opt64/include/boost/cstdint.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/codecvt.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/fpos.hpp \
/u/germann/opt64/include/boost/iostreams/write.hpp \
/u/germann/opt64/include/boost/iostreams/concepts.hpp \
/u/germann/opt64/include/boost/iostreams/detail/default_arg.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/auto_link.hpp \
/u/germann/opt64/include/boost/config/auto_link.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/dyn_link.hpp \
/u/germann/opt64/include/boost/iostreams/detail/path.hpp \
/u/germann/opt64/include/boost/type.hpp \
/u/germann/opt64/include/boost/config/abi_prefix.hpp \
/u/germann/opt64/include/boost/config/abi_suffix.hpp \
/u/germann/opt64/include/boost/iostreams/stream.hpp \
/u/germann/opt64/include/boost/iostreams/constants.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/overload_resolution.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/gcc.hpp \
/u/germann/opt64/include/boost/iostreams/detail/forward.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/limits.hpp \
/u/germann/opt64/include/boost/iostreams/detail/push_params.hpp \
/u/germann/opt64/include/boost/iostreams/detail/iostream.hpp \
/u/germann/opt64/include/boost/iostreams/stream_buffer.hpp \
/u/germann/opt64/include/boost/iostreams/detail/streambuf/direct_streambuf.hpp \
/u/germann/opt64/include/boost/iostreams/detail/error.hpp \
/u/germann/opt64/include/boost/iostreams/detail/execute.hpp \
/u/germann/opt64/include/boost/utility/result_of.hpp \
/u/germann/opt64/include/boost/preprocessor/iteration/iterate.hpp \
/u/germann/opt64/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
/u/germann/opt64/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
/u/germann/opt64/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
/u/germann/opt64/include/boost/preprocessor/slot/detail/shared.hpp \
/u/germann/opt64/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
/u/germann/opt64/include/boost/utility/detail/result_of_iterate.hpp \
/u/germann/opt64/include/boost/iostreams/detail/functional.hpp \
/u/germann/opt64/include/boost/iostreams/detail/optional.hpp \
/u/germann/opt64/include/boost/type_traits/aligned_storage.hpp \
/u/germann/opt64/include/boost/aligned_storage.hpp \
/u/germann/opt64/include/boost/type_traits/alignment_of.hpp \
/u/germann/opt64/include/boost/type_traits/detail/size_t_trait_def.hpp \
/u/germann/opt64/include/boost/mpl/size_t.hpp \
/u/germann/opt64/include/boost/mpl/size_t_fwd.hpp \
/u/germann/opt64/include/boost/type_traits/detail/size_t_trait_undef.hpp \
/u/germann/opt64/include/boost/type_traits/type_with_alignment.hpp \
/u/germann/opt64/include/boost/preprocessor/list/for_each_i.hpp \
/u/germann/opt64/include/boost/preprocessor/tuple/to_list.hpp \
/u/germann/opt64/include/boost/preprocessor/list/transform.hpp \
/u/germann/opt64/include/boost/preprocessor/list/append.hpp \
/u/germann/opt64/include/boost/type_traits/alignment_of.hpp \
/u/germann/opt64/include/boost/iostreams/detail/streambuf/linked_streambuf.hpp \
/u/germann/opt64/include/boost/iostreams/operations.hpp \
/u/germann/opt64/include/boost/iostreams/imbue.hpp \
/u/germann/opt64/include/boost/iostreams/input_sequence.hpp \
/u/germann/opt64/include/boost/iostreams/optimal_buffer_size.hpp \
/u/germann/opt64/include/boost/iostreams/output_sequence.hpp \
/u/germann/opt64/include/boost/iostreams/detail/streambuf/indirect_streambuf.hpp \
/u/germann/opt64/include/boost/iostreams/detail/adapter/concept_adapter.hpp \
/u/germann/opt64/include/boost/iostreams/detail/call_traits.hpp \
/u/germann/opt64/include/boost/iostreams/detail/config/unreachable_return.hpp \
/u/germann/opt64/include/boost/iostreams/device/null.hpp \
/u/germann/opt64/include/boost/iostreams/detail/buffer.hpp \
/u/germann/opt64/include/boost/iostreams/checked_operations.hpp \
/u/germann/opt64/include/boost/iostreams/get.hpp \
/u/germann/opt64/include/boost/iostreams/put.hpp \
/u/germann/opt64/include/boost/iostreams/detail/double_object.hpp \
/u/germann/opt64/include/boost/call_traits.hpp \
/u/germann/opt64/include/boost/detail/call_traits.hpp \
/u/germann/opt64/include/boost/iostreams/detail/push.hpp \
/u/germann/opt64/include/boost/iostreams/detail/adapter/range_adapter.hpp \
/u/germann/opt64/include/boost/iostreams/pipeline.hpp \
/u/germann/opt64/include/boost/iostreams/detail/resolve.hpp \
/u/germann/opt64/include/boost/detail/is_incrementable.hpp \
/u/germann/opt64/include/boost/iostreams/detail/adapter/mode_adapter.hpp \
/u/germann/opt64/include/boost/iostreams/detail/adapter/output_iterator_adapter.hpp \
/u/germann/opt64/include/boost/iostreams/detail/is_dereferenceable.hpp \
/u/germann/opt64/include/boost/iostreams/device/array.hpp \
tpt/ug_ttrack_base.h /u/germann/opt64/include/boost/dynamic_bitset.hpp \
/u/germann/opt64/include/boost/dynamic_bitset/dynamic_bitset.hpp \
/u/germann/opt64/include/boost/dynamic_bitset/config.hpp \
/u/germann/opt64/include/boost/dynamic_bitset_fwd.hpp \
/u/germann/opt64/include/boost/detail/dynamic_bitset.hpp \
/u/germann/opt64/include/boost/pending/lowest_bit.hpp \
/u/germann/opt64/include/boost/pending/integer_log2.hpp \
tpt/ug_ttrack_position.h /h/116/germann/diss/code/basic/ug_vocab.h \
/h/116/germann/diss/code/tpt/tpt_typedefs.h \
/h/116/germann/diss/code/tpt/ugdiss_typedefs.h \
/u/germann/opt64/include/boost/scoped_ptr.hpp \
/u/germann/opt64/include/boost/smart_ptr/scoped_ptr.hpp \
/h/116/germann/diss/code/tpt/tplm.h \
/h/116/germann/diss/code/tpt/tpt_tokenindex.h \
/h/116/germann/diss/code/tpt/tpt_typedefs.h \
/h/116/germann/diss/code/tpt/tpt_pickler.h \
/h/116/germann/diss/code/tpt/num_read_write.h \
/h/116/germann/diss/code/tpt/tpt_tightindex.h \
/h/116/germann/diss/code/tpt/ug_mm_2d_table.h \
/h/116/germann/diss/code/tpt/ugdiss_typedefs.h tpt/tpt_pickler.h

145
moses/mm/ug_im_ttrack.h Normal file
View File

@ -0,0 +1,145 @@
// -*- c++-mode -*-
// In-memory corpus track
// (c) 2006-2012 Ulrich Germann.
#ifndef __ug_im_ttrack
#define __ug_im_ttrack
#include <string>
#include <iostream>
#include <boost/shared_ptr.hpp>
#include <boost/unordered_map.hpp>
#include "tpt_typedefs.h"
#include "tpt_tokenindex.h"
#include "ug_ttrack_base.h"
#include "tpt_tokenindex.h"
// #include "ug_vocab.h"
namespace ugdiss
{
using namespace std;
namespace bio=boost::iostreams;
template<typename Token=id_type>
class imTtrack : public Ttrack<Token>
{
private:
size_t numToks;
boost::shared_ptr<vector<vector<Token> > > myData; // pointer to corpus data
public:
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
imTtrack(istream& in, TokenIndex const& V, ostream* log);
imTtrack();
// imTtrack(istream& in, Vocab& V);
/** return pointer to beginning of sentence */
Token const* sntStart(size_t sid) const;
/** return pointer to beginning of sentence */
Token const* sntEnd(size_t sid) const;
size_t size() const;
size_t numTokens() const;
id_type findSid(Token const* t) const;
};
template<typename Token>
Token const*
imTtrack<Token>::
sntStart(size_t sid) const // return pointer to beginning of sentence
{
assert(sid < size());
if ((*myData)[sid].size() == 0) return NULL;
return &((*myData)[sid].front());
}
template<typename Token>
Token const*
imTtrack<Token>::
sntEnd(size_t sid) const // return pointer to end of sentence
{
assert(sid < size());
if ((*myData)[sid].size() == 0) return NULL;
return &(*myData)[sid].back();
}
template<typename Token>
size_t
imTtrack<Token>::
size() const // return size of corpus (in number of sentences)
{
// we assume that myIndex has pointers to both the beginning of the
// first sentence and the end point of the last, so there's one more
// offset in the myIndex than there are sentences
return myData.size();
}
template<typename Token>
size_t
imTtrack<Token>::
numTokens() const // return size of corpus (in number of words)
{
return numToks;
}
template<typename Token>
imTtrack<Token>::
imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL)
{
myData.reset(new vector<vector<Token> >());
numToks = 0;
string line,w;
size_t linectr=0;
boost::unordered_map<string,id_type> H;
for (id_type i = 0; i < V.knownVocabSize(); ++i)
H[V[i]] = i;
while (getline(in,line))
{
myData->push_back(vector<Token>());
if (log && ++linectr%1000000==0)
*log << linectr/1000000 << "M lines of input processed" << endl;
istringstream buf(line);
while (buf>>w)
myData->back().push_back(Token(H[w]));
myData->back().resize(myData.back().size());
numToks += myData->back().size();
}
}
template<typename Token>
imTtrack<Token>::
imTtrack()
{
myData.reset(new vector<vector<Token> >());
}
template<typename Token>
imTtrack<Token>::
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
{
myData = d;
}
template<typename Token>
id_type
imTtrack<Token>::
findSid(Token const* t) const
{
id_type i;
for (i = 0; i < myData->size(); ++i)
{
vector<Token> const& v = (*myData)[i];
if (v.size() == 0) continue;
if (&v.front() <= t && &v.back() >= t)
break;
}
return i;
}
}
#endif

View File

@ -0,0 +1,159 @@
// -*- c++ -*-
// lexical phrase scorer, version 1
// written by Ulrich Germann
#ifndef __ug_lexical_phrase_scorer_h
#define __ug_lexical_phrase_scorer_h
#include "ug_stream.h"
#include "tpt_tokenindex.h"
#include <string>
#include <boost/unordered_map.hpp>
#include "tpt_pickler.h"
using namespace std;
namespace ugdiss
{
template<typename TKN>
class
LexicalPhraseScorer1
{
typedef boost::unordered_map<id_type, float> inner_map_t;
vector<inner_map_t> L1_given_L2;
vector<inner_map_t> L2_given_L1;
void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex);
public:
void open(string const& bname, string const& L1, string const& L2,
TokenIndex & V1, TokenIndex & V2);
void score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<ushort> aln, float & fwd_score, float& bwd_score);
void score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float & fwd_score, float& bwd_score);
float permissive_lookup(vector<inner_map_t> const& lex,
id_type const s, id_type const t) const;
};
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
vector<inner_map_t> & lex)
{
boost::iostreams::filtering_istream in;
cout << fname << endl;
open_input_stream(fname,in);
lex.resize(V1.ksize());
string w1,w2; float p;
while (in >> w1 >> w2 >> p)
{
id_type id1 = V1[w1];
while (lex.size() <= id1)
lex.push_back(inner_map_t());
lex[id1][V2[w2]] = p;
}
}
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
open(string const& bname, string const& L1, string const& L2,
TokenIndex & V1, TokenIndex & V2)
{
string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
cout << lex1 << endl;
cout << lex2 << endl;
load_lex(lex1,V1,V2,L1_given_L2);
load_lex(lex2,V2,V1,L2_given_L1);
}
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<ushort> aln, float & fwd_score, float& bwd_score)
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (size_t k = 0; k < aln.size(); ++k)
{
i1 = aln[k]; i2 = aln[++k];
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
p1[i1] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
++c1[i1];
p2[i2] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
++c2[i2];
}
fwd_score = 0;
for (size_t i = s1; i < e1; ++i)
{
if (c1[i] == 1) fwd_score += log(p1[i]);
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
else fwd_score += log(L1_given_L2[snt1[i].id()][0]);
}
bwd_score = 0;
for (size_t i = s2; i < e2; ++i)
{
if (c2[i] == 1) bwd_score += log(p2[i]);
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
else bwd_score += log(L2_given_L1[snt2[i].id()][0]);
}
}
template<typename TKN>
float
LexicalPhraseScorer1<TKN>::
permissive_lookup(vector<inner_map_t> const& lex,
id_type const s, id_type const t) const
{
if (s >= lex.size()) return 1.0;
inner_map_t::const_iterator m = lex[s].find(t);
return m == lex[s].end() ? 1.0 : m->second;
}
template<typename TKN>
void
LexicalPhraseScorer1<TKN>::
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float & fwd_score, float& bwd_score)
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (char const* x = aln_start; x < aln_end;)
{
x = binread(binread(x,i1),i2);
// assert(snt1[i2].id() < L1_given_L2.size());
// assert(snt2[i2].id() < L2_given_L1.size());
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
p1[i1] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
++c1[i1];
p2[i2] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
++c2[i2];
}
fwd_score = 0;
for (size_t i = s1; i < e1; ++i)
{
if (c1[i] == 1) fwd_score += log(p1[i]);
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
else fwd_score += log(L1_given_L2[snt1[i].id()][0]);
}
bwd_score = 0;
for (size_t i = s2; i < e2; ++i)
{
if (c2[i] == 1) bwd_score += log(p2[i]);
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
else bwd_score += log(L2_given_L1[snt2[i].id()][0]);
}
}
}
#endif

View File

@ -0,0 +1,149 @@
// -*- c++ -*-
// lexical phrase scorer, version 1
// written by Ulrich Germann
#ifndef __ug_lexical_phrase_scorer_h
#define __ug_lexical_phrase_scorer_h
#include "moses/generic/file_io/ug_stream.h"
#include "tpt_tokenindex.h"
#include <string>
#include <boost/unordered_map.hpp>
#include "tpt_pickler.h"
#include "ug_mm_2d_table.h"
using namespace std;
namespace ugdiss
{
template<typename TKN>
class
LexicalPhraseScorer2
{
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> table_t;
table_t COOC;
public:
void open(string const& fname);
template<typename someint>
void
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<someint> & aln, float & fwd_score, float& bwd_score) const;
void
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float & fwd_score, float& bwd_score) const;
// plup: permissive lookup
float plup_fwd(id_type const s,id_type const t) const;
float plup_bwd(id_type const s,id_type const t) const;
// to be done:
// - on-the-fly smoothing ?
// - better (than permissive-lookup) treatment of unknown combinations
// permissive lookup is currently used for compatibility reasons
// - zens-ney smoothed scoring via noisy-or combination
};
template<typename TKN>
void
LexicalPhraseScorer2<TKN>::
open(string const& fname)
{
COOC.open(fname);
}
template<typename TKN>
template<typename someint>
void
LexicalPhraseScorer2<TKN>::
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
vector<someint> & aln, float & fwd_score, float& bwd_score) const
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (size_t k = 0; k < aln.size(); ++k)
{
i1 = aln[k]; i2 = aln[++k];
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
p1[i1] += plup_fwd(snt1[i1].id(),snt2[i2].id());
++c1[i1];
p2[i2] += plup_bwd(snt1[i1].id(),snt2[i2].id());
++c2[i2];
}
fwd_score = 0;
for (size_t i = s1; i < e1; ++i)
{
if (c1[i] == 1) fwd_score += log(p1[i]);
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
else fwd_score += log(plup_fwd(snt1[i].id(),0));
}
bwd_score = 0;
for (size_t i = s2; i < e2; ++i)
{
if (c2[i] == 1) bwd_score += log(p2[i]);
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
else bwd_score += log(plup_bwd(0,snt2[i].id()));
}
}
template<typename TKN>
float
LexicalPhraseScorer2<TKN>::
plup_fwd(id_type const s, id_type const t) const
{
if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
// if (!COOC[s][t]) cout << s << " " << t << endl;
assert(COOC[s][t]);
return float(COOC[s][t])/COOC.m1(s);
}
template<typename TKN>
float
LexicalPhraseScorer2<TKN>::
plup_bwd(id_type const s, id_type const t) const
{
if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
assert(COOC[s][t]);
return float(COOC[s][t])/COOC.m2(t);
}
template<typename TKN>
void
LexicalPhraseScorer2<TKN>::
score(TKN const* snt1, size_t const s1, size_t const e1,
TKN const* snt2, size_t const s2, size_t const e2,
char const* const aln_start, char const* const aln_end,
float & fwd_score, float& bwd_score) const
{
vector<float> p1(e1,0), p2(e2,0);
vector<int> c1(e1,0), c2(e2,0);
size_t i1=0,i2=0;
for (char const* x = aln_start; x < aln_end;)
{
x = binread(binread(x,i1),i2);
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
p1[i1] += plup_fwd(snt1[i1].id(), snt2[i2].id());
++c1[i1];
p2[i2] += plup_bwd(snt1[i1].id(), snt2[i2].id());
++c2[i2];
}
fwd_score = 0;
for (size_t i = s1; i < e1; ++i)
{
if (c1[i] == 1) fwd_score += log(p1[i]);
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
else fwd_score += log(plup_fwd(snt1[i].id(),0));
}
bwd_score = 0;
for (size_t i = s2; i < e2; ++i)
{
if (c2[i] == 1) bwd_score += log(p2[i]);
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
else bwd_score += log(plup_bwd(0,snt2[i].id()));
}
}
}
#endif

View File

@ -0,0 +1,28 @@
#include "ug_load_primer.h"
#include <boost/interprocess/mapped_region.hpp>
#include <boost/thread.hpp>
namespace Moses
{
FastLoader::
FastLoader(boost::iostreams::mapped_file_source const& f)
: file(f) {}
void
FastLoader::
operator()() const
{
size_t const pagesize = boost::interprocess::mapped_region::get_page_size();
char const* stop = file.data() + file.size();
int dummy=0;
for (char const* x = file.data(); x < stop; x += pagesize) dummy += *x;
}
void prime(boost::iostreams::mapped_file_source const& f)
{
boost::thread foo(FastLoader(f));
// foo.detach();
}
}

18
moses/mm/ug_load_primer.h Normal file
View File

@ -0,0 +1,18 @@
//-*- c++ -*-
#pragma once
#include <boost/iostreams/device/mapped_file.hpp>
//
namespace Moses
{
class FastLoader
{
boost::iostreams::mapped_file_source const& file;
public:
FastLoader(boost::iostreams::mapped_file_source const& f);
void operator()() const;
};
void prime(boost::iostreams::mapped_file_source const& f);
};

228
moses/mm/ug_mm_2d_table.h Normal file
View File

@ -0,0 +1,228 @@
// -*- c++ -*-
// (c) 2007-2012 Ulrich Germann
#ifndef __ug_mm_2d_table_h
#define __ug_mm_2d_table_h
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/shared_ptr.hpp>
#include <vector>
#include <map>
#include "tpt_typedefs.h"
#include "tpt_pickler.h"
#include "ug_typedefs.h"
namespace bio=boost::iostreams;
namespace ugdiss
{
using namespace std;
template<typename OFFSET, typename ID, typename VAL, typename INIT>
class
mm2dTable
{
public:
struct Cell
{
ID id;
VAL val;
bool
operator<(ID const otherId) const
{
return id < otherId;
}
bool
operator<(Cell const& other) const
{
return id < other.id;
}
struct SortDescendingByValue
{
bool operator()(Cell const& a, Cell const& b) const
{
return a.val > b.val;
}
};
};
struct Row
{
Cell const* start;
Cell const* stop;
VAL operator[](ID key) const;
};
Cell* data;
VAL *M1, *M2;
OFFSET * index;
ID numRows;
ID numCols;
boost::shared_ptr<bio::mapped_file> file;
VAL m1(ID key) const
{
return (key < numRows) ? M1[key] : INIT(0);
}
VAL m2(ID key) const
{
return (key < numCols) ? M2[key] : INIT(0);
}
void open(string fname);
void close();
Row operator[](ID key) const;
mm2dTable(string const fname="") { if (!fname.empty()) open(fname); };
~mm2dTable() { file.reset(); };
};
template<typename OFFSET, typename ID, typename VAL, typename INIT>
typename mm2dTable<OFFSET,ID,VAL,INIT>::Row
mm2dTable<OFFSET,ID,VAL,INIT>::
operator[](ID key) const
{
Row ret;
if (key < numRows)
{
ret.start = data+index[key];
ret.stop = data+index[key+1];
}
else
ret.start = ret.stop = data+index[key+1];
return ret;
}
template<typename OFFSET, typename ID, typename VAL, typename INIT>
VAL
mm2dTable<OFFSET,ID,VAL,INIT>::
Row::
operator[](ID key) const
{
if (start==stop) return INIT(0);
Cell const* c = lower_bound(start,stop,key);
return (c != stop && c->id == key ? c->val : INIT(0));
}
template<typename OFFSET, typename ID, typename VAL, typename INIT>
void
mm2dTable<OFFSET,ID,VAL,INIT>::
open(string fname)
{
// cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << endl;
if (access(fname.c_str(),R_OK))
{
cerr << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
<< "file '" << fname << " is not accessible." << endl;
exit(1);
}
file.reset(new bio::mapped_file());
file->open(fname,ios::in|ios::out);
if (!file->is_open())
{
cerr << "Error opening file " << fname << endl;
assert(0);
}
char* p = file->data();
filepos_type offset = *reinterpret_cast<filepos_type*>(p);
index = reinterpret_cast<OFFSET*>(p+offset); p += sizeof(offset);
numRows = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
numCols = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
data = reinterpret_cast<Cell*>(p);
// cout << numRows << " rows; " << numCols << " columns " << endl;
M1 = reinterpret_cast<VAL*>(index+numRows+1);
M2 = M1+numRows;
// cout << "Table " << fname << " has " << numRows << " rows and "
// << numCols << " columns." << endl;
// cout << "File size is " << file.size()*1024 << " bytes; ";
// cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
// << " bytes into the file" << endl;
// cout << M2[0] << endl;
}
template<
typename OFFSET, // integer type of file offsets
typename ID, // integer type of column ids
typename VAL, // type of cell values
typename INIT, // INIT(0) initializes default values
typename ICONT // inner container type
>
void
write_mm_2d_table(ostream& out, vector<ICONT> const& T,
vector<VAL> const* m1 = NULL,
vector<VAL> const* m2 = NULL)
{
assert(T.size());
typedef typename ICONT::const_iterator iter;
// compute marginals if necessary
vector<VAL> m1x,m2x;
if (!m1)
{
m1x.resize(T.size(),INIT(0));
for (size_t r = 0; r < T.size(); ++r)
for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c)
m1x[r] = m1x[r] + c->second;
m1 = &m1x;
}
if (!m2)
{
for (size_t r = 0; r < T.size(); ++r)
for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c)
{
while (c->first >= m2x.size())
m2x.push_back(INIT(0));
m2x[c->first] = m2x[c->first] + c->second;
}
m2 = &m2x;
}
filepos_type idxOffset=0;
numwrite(out,idxOffset); // place holder, we'll return here at the end
numwrite(out,id_type(m1->size())); // number of rows
numwrite(out,id_type(m2->size())); // number of columns
// write actual table
vector<OFFSET> index;
size_t ctr =0;
index.reserve(m1->size()+1);
for (ID r = 0; r < ID(T.size()); ++r)
{
//index.push_back(out.tellp());
index.push_back(ctr);
ID lastId = 0;
if (T.at(r).size())
lastId = T.at(r).begin()->first;
for (typename ICONT::const_iterator c = T.at(r).begin();
c != T.at(r).end(); ++c)
{
ctr++;
assert(c->first >= lastId);
lastId = c->first;
typename mm2dTable<OFFSET,ID,VAL,INIT>::Cell item;
item.id = c->first;
item.val = c->second;
out.write(reinterpret_cast<char const*>(&item),sizeof(item));
}
}
// index.push_back(out.tellp());
index.push_back(ctr);
idxOffset=out.tellp();
// write index
for (size_t i = 0; i < index.size(); ++i)
{
OFFSET o = index[i]; // (index[i]-index[0])/sizeof(VAL);
out.write(reinterpret_cast<char*>(&o),sizeof(OFFSET));
}
// write marginals
out.write(reinterpret_cast<char const*>(&(*m1)[0]),m1->size()*sizeof(VAL));
out.write(reinterpret_cast<char const*>(&(*m2)[0]),m2->size()*sizeof(VAL));
out.seekp(0);
numwrite(out,idxOffset);
}
}
#endif

263
moses/mm/ug_mm_tsa.h Normal file
View File

@ -0,0 +1,263 @@
// -*- c++ -*-
#ifndef _ug_mm_tsa_h
#define _ug_mm_tsa_h
// (c) 2007-2009 Ulrich Germann. All rights reserved.
#include <iostream>
#include <stdexcept>
#include <sstream>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/dynamic_bitset.hpp>
#include "tpt_tightindex.h"
#include "tpt_tokenindex.h"
#include "tpt_pickler.h"
#include "ug_tsa_base.h"
namespace ugdiss
{
using namespace std;
namespace bio=boost::iostreams;
template<typename TOKEN>
class mmTSA : public TSA<TOKEN>
{
public:
typedef typename TSA<TOKEN>::tree_iterator tree_iterator;
friend class TSA_tree_iterator<TOKEN>;
private:
bio::mapped_file_source file;
public: // temporarily for debugging
filepos_type const* index; // random access to top-level sufa ranges
private:
char const* index_jump(char const* a, char const* z, float ratio) const;
char const* getLowerBound(id_type t) const;
char const* getUpperBound(id_type t) const;
public:
mmTSA();
mmTSA(string fname, Ttrack<TOKEN> const* c);
void open(string fname, Ttrack<TOKEN> const* c);
count_type
sntCnt(char const* p, char const * const q) const;
count_type
rawCnt(char const* p, char const * const q) const;
void
getCounts(char const* p, char const * const q,
count_type& sids, count_type& raw) const;
char const*
readSid(char const* p, char const* q, id_type& sid) const;
char const*
readSid(char const* p, char const* q, uint64_t& sid) const;
char const*
readOffset(char const* p, char const* q, uint16_t& offset) const;
char const*
readOffset(char const* p, char const* q, uint64_t& offset) const;
void sanityCheck() const;
};
// ======================================================================
/** jump to the point 1/ratio in a tightly packed index
* assumes that keys are flagged with '1', values with '0'
*/
template<typename TOKEN>
char const*
mmTSA<TOKEN>::
index_jump(char const* a, char const* z, float ratio) const
{
assert(ratio >= 0 && ratio < 1);
char const* m = a+int(ratio*(z-a));
if (m > a)
{
while (m > a && *m < 0) --m;
while (m > a && *m >= 0) --m;
if (*m < 0) ++m;
}
assert(*m >= 0);
return m;
}
// ======================================================================
template<typename TOKEN>
mmTSA<TOKEN>::
mmTSA()
{
this->corpus = NULL;
this->startArray = NULL;
this->endArray = NULL;
this->BitSetCachingThreshold=4096;
};
// ======================================================================
template<typename TOKEN>
mmTSA<TOKEN>::
mmTSA(string fname, Ttrack<TOKEN> const* c)
{
open(fname,c);
}
// ======================================================================
template<typename TOKEN>
void
mmTSA<TOKEN>::
open(string fname, Ttrack<TOKEN> const* c)
{
this->bsc.reset(new BitSetCache<TSA<TOKEN> >(this));
if (access(fname.c_str(),F_OK))
{
ostringstream msg;
msg << "mmTSA<>::open: File '" << fname << "' does not exist.";
throw std::runtime_error(msg.str().c_str());
}
assert(c);
this->corpus = c;
file.open(fname);
Moses::prime(file);
char const* p = file.data();
filepos_type idxOffset;
p = numread(p,idxOffset);
p = numread(p,this->indexSize);
// cerr << fname << ": " << idxOffset << " " << this->indexSize << endl;
this->startArray = p;
this->index = reinterpret_cast<filepos_type const*>(file.data()+idxOffset);
this->endArray = reinterpret_cast<char const*>(index);
this->corpusSize = c->size();
this->numTokens = c->numTokens();
}
// ======================================================================
template<typename TOKEN>
char const*
mmTSA<TOKEN>::
getLowerBound(id_type id) const
{
if (id >= this->indexSize)
return NULL;
return this->startArray + this->index[id];
}
// ======================================================================
template<typename TOKEN>
char const*
mmTSA<TOKEN>::
getUpperBound(id_type id) const
{
if (id >= this->indexSize)
return NULL;
// if (index[id] == index[id+1])
// return NULL;
else
return this->startArray + this->index[id+1];
}
// ======================================================================
template<typename TOKEN>
char const*
mmTSA<TOKEN>::
readSid(char const* p, char const* q, id_type& sid) const
{
return tightread(p,q,sid);
}
// ======================================================================
template<typename TOKEN>
char const*
mmTSA<TOKEN>::
readSid(char const* p, char const* q, uint64_t& sid) const
{
return tightread(p,q,sid);
}
// ======================================================================
template<typename TOKEN>
inline
char const*
mmTSA<TOKEN>::
readOffset(char const* p, char const* q, uint16_t& offset) const
{
return tightread(p,q,offset);
}
// ======================================================================
template<typename TOKEN>
inline
char const*
mmTSA<TOKEN>::
readOffset(char const* p, char const* q, uint64_t& offset) const
{
return tightread(p,q,offset);
}
// ======================================================================
template<typename TOKEN>
count_type
mmTSA<TOKEN>::
rawCnt(char const* p, char const* const q) const
{
id_type sid; uint16_t off;
size_t ret=0;
while (p < q)
{
p = tightread(p,q,sid);
p = tightread(p,q,off);
ret++;
}
return ret;
}
// ======================================================================
template<typename TOKEN>
void
mmTSA<TOKEN>::
getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const
{
raw = 0;
id_type sid; uint16_t off;
boost::dynamic_bitset<uint64_t> check(this->corpus->size());
while (p < q)
{
p = tightread(p,q,sid);
p = tightread(p,q,off);
check.set(sid);
raw++;
}
sids = check.count();
}
// ======================================================================
} // end of namespace ugdiss
// #include "ug_mm_tsa_extra.h"
#endif

View File

@ -0,0 +1,53 @@
// -*- c++ -*-
// (c) 2007-2009 Ulrich Germann. All rights reserved.
#if 0
#ifndef __ug_mm_tsa_tree_iterator_h
#define __ug_mm_tsa_tree_iterator_h
// namespace ugdiss
// {
// template<typename TOKEN>
// class
// mmTSA<TOKEN>::
// tree_iterator : public TSA<TOKEN>::tree_iterator
// {
// public:
// tree_iterator(TSA<TOKEN> const* s);
// tree_iterator(TSA<TOKEN> const* s, Token const& t);
// tree_iterator(TSA<TOKEN> const* s, Token const* kstart, Token const* kend);
// bool down() { return TSA<TOKEN>::tree_iterator::down(); }
// bool over() { return TSA<TOKEN>::tree_iterator::over(); }
// };
// // ======================================================================
// // ======================================================================
// // ======================================================================
// template<typename TOKEN>
// mmTSA<TOKEN>::
// tree_iterator::
// tree_iterator(TSA<TOKEN> const* s)
// : TSA<TOKEN>::tree_iterator::tree_iterator(s)
// {};
// template<typename TOKEN>
// mmTSA<TOKEN>::
// tree_iterator::
// tree_iterator(TSA<TOKEN> const* s, Token const& t)
// : TSA<TOKEN>::tree_iterator::tree_iterator(s,t)
// {};
// template<typename TOKEN>
// mmTSA<TOKEN>::
// tree_iterator::
// tree_iterator(TSA<TOKEN> const* s, Token const* kstart, Token const* kend)
// : TSA<TOKEN>::tree_iterator::tree_iterator(s,kstart,kend)
// {};
// // ======================================================================
// // ======================================================================
// // ======================================================================
// }
#endif
#endif

250
moses/mm/ug_mm_ttrack.h Normal file
View File

@ -0,0 +1,250 @@
// -*- c++ -*-
// Memory-mapped corpus track. The corpus (each Token occupying a fixed number
// of bytes (must be compatible with the memory alignment in the OS) is stored
// as one huge array. The "index" maps from sentence IDs to positions within
// that array.
// (c) 2007-2010 Ulrich Germann. All rights reserved
#ifndef __ug_mm_ttrack
#define __ug_mm_ttrack
#include <sstream>
#include <string>
#include <stdexcept>
#include <boost/iostreams/device/mapped_file.hpp>
#include <boost/shared_ptr.hpp>
#include "tpt_typedefs.h"
#include "tpt_tokenindex.h"
#include "ug_ttrack_base.h"
#include "num_read_write.h"
#include "ug_load_primer.h"
namespace ugdiss
{
using namespace std;
namespace bio=boost::iostreams;
template<typename TKN=id_type>
class mmTtrack : public Ttrack<TKN>
{
public:
typedef TKN Token;
private:
bio::mapped_file_source file;
Token const* data; // pointer to first word of first sentence
id_type const* index; /* pointer to index (change data type for corpora
* of more than four billion words)
*/
public:
mmTtrack(string fname);
mmTtrack();
// return pointer to beginning of sentence
Token const* sntStart(size_t sid) const;
// return pointer to end of sentence
Token const* sntEnd(size_t sid) const;
// return size of corpus (in number of sentences)
size_t size() const;
// return size of corpus (in number of sentences)
size_t numTokens() const;
// open an mmTtrack file
void open(string fname);
// FUNCTIONS FOR BUILDING CORPUS TRACKS
// write a blank file header at the beginning of a new ttrack file
void write_blank_file_header(ostream& out) const;
// write the sentence index /idx/ and fill the file header
void write_index_and_finalize(ostream& out,
vector<id_type> const& idx,
count_type tokenCount) const;
// copy a contiguous sequence of sentences to another stream
// return the number of tokens copied
id_type copySentences(ostream& trg, id_type start, id_type stop) const;
/** find the sentence id of a given token */
id_type findSid(TKN const* t) const;
id_type findSid(id_type tokenOffset) const;
/// re-assign ids based on the id maps in /f/
void remap(string const fname, vector<id_type const*> const & f) const;
};
/// re-assign ids based on the id maps in /f/
template<typename TKN>
void
mmTtrack<TKN>::
remap(string const fname, vector<id_type const*> const & f) const
{
bio::mapped_file myfile(fname);
assert(myfile.is_open());
Moses::prime(myfile);
filepos_type idxOffset;
char* p = myfile.data();
id_type numSent,numWords;
p = numread(p,idxOffset);
p = numread(p,numSent);
p = numread(p,numWords);
data = reinterpret_cast<TKN*>(p);
for (size_t i = 0; i < numWords; ++i)
data[i] = data[i].remap(f);
myfile.close();
}
template<typename TKN>
size_t
mmTtrack<TKN>::
size() const
{
return this->numSent;
}
template<typename TKN>
size_t
mmTtrack<TKN>::
numTokens() const
{
return this->numWords;
}
template<typename TKN>
TKN const*
mmTtrack<TKN>::
sntStart(size_t sid) const // return pointer to beginning of sentence
{
if (sid >= this->numSent)
{
cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
<< this->numSent <<")" << endl;
}
assert(sid < this->numSent);
return data+index[sid];
}
template<typename TKN>
TKN const*
mmTtrack<TKN>::
sntEnd(size_t sid) const // return pointer to end of sentence
{
assert(sid < this->numSent);
return data+index[sid+1];
}
template<typename TKN>
mmTtrack<TKN>::
mmTtrack()
{
data = NULL;
index = NULL;
this->numSent = this->numWords = 0;
}
template<typename TKN>
mmTtrack<TKN>::
mmTtrack(string fname)
{
open(fname);
}
template<typename TKN>
void
mmTtrack<TKN>::
open(string fname)
{
if (access(fname.c_str(),F_OK))
{
ostringstream msg;
msg << "mmTtrack<>::open: File '" << fname << "' does not exist.";
throw std::runtime_error(msg.str().c_str());
}
file.open(fname);
if (!file.is_open())
{
cerr << "Error opening file " << fname << endl;
assert(0);
}
filepos_type idxOffset;
char const* p = file.data();
p = numread(p,idxOffset);
p = numread(p,this->numSent);
p = numread(p,this->numWords);
data = reinterpret_cast<Token const*>(p);
index = reinterpret_cast<id_type const*>(file.data()+idxOffset);
}
template<typename TKN>
id_type
mmTtrack<TKN>::
findSid(TKN const* t) const
{
id_type tokenPos = t-data;
id_type const* p = upper_bound(index,index+this->numSent,tokenPos);
assert(p>index);
return p-index-1;
}
template<typename TKN>
id_type
mmTtrack<TKN>::
findSid(id_type tokenPos) const
{
id_type const* p = upper_bound(index,index+this->numSent,tokenPos);
assert(p>index);
return p-index-1;
}
template<typename TKN>
void
mmTtrack<TKN>::
write_blank_file_header(ostream& out) const
{
numwrite(out,filepos_type(0)); // place holder for index start
numwrite(out,id_type(0)); // place holder for index size
numwrite(out,id_type(0)); // place holder for token count
}
template<typename TKN>
void
mmTtrack<TKN>::
write_index_and_finalize(ostream& out,
vector<id_type>const& idx,
id_type tokenCount) const
{
id_type idxSize = idx.size();
filepos_type idxStart = out.tellp();
for (size_t i = 0; i < idx.size(); ++i)
numwrite(out,idx[i]);
out.seekp(0);
numwrite(out,idxStart);
numwrite(out,idxSize-1);
numwrite(out,tokenCount);
}
template<typename TKN>
id_type
mmTtrack<TKN>::
copySentences(ostream& trg, id_type start, id_type stop) const
{
assert(stop > start);
TKN const* a = sntStart(start);
TKN const* z = sntEnd(stop-1);
size_t len = (z-a)*sizeof(TKN);
if (!len) return 0;
trg.write(reinterpret_cast<char const*>(a),len);
return z-a;
}
}
#endif

450
moses/mm/ug_mmbitext.cc Normal file
View File

@ -0,0 +1,450 @@
// #include "ug_mmbitext.h"
// #include <algorithm>
// namespace Moses
// {
// using namespace ugdiss;
// using namespace std;
// mmbitext::
// pstats::
// pstats()
// : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
// {}
// void
// mmbitext::
// pstats::
// register_worker()
// {
// this->lock.lock();
// ++this->in_progress;
// this->lock.unlock();
// }
// void
// pstats::
// release()
// {
// this->lock.lock();
// if (this->in_progress-- == 1) // last one - >we're done
// this->ready.notify_all();
// this->lock.unlock();
// }
// void
// mmbitext::
// open(string const base, string const L1, string L2)
// {
// T1.open(base+L1+".mct");
// T2.open(base+L2+".mct");
// Tx.open(base+L1+"-"+L2+".mam");
// V1.open(base+L1+".tdx"); V1.iniReverseIndex();
// V2.open(base+L2+".tdx"); V2.iniReverseIndex();
// I1.open(base+L1+".sfa",&T1);
// I2.open(base+L2+".sfa",&T2);
// // lexscorer.open(base+L1+"-"+L2+".lex");
// assert(T1.size() == T2.size());
// }
// mmbitext::
// mmbitext()
// : ag(NULL)
// {
// }
// bool
// mmbitext::
// find_trg_phr_bounds(size_t const sid, size_t const start, size_t const stop,
// size_t & s1, size_t & s2, size_t & e1, size_t & e2,
// vector<uchar>* core_alignment, bool const flip) const
// {
// // if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
// // a word on the core_alignment:
// // since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1 < e2, respectively)
// // are be definition unaligned, we store only the core alignment in *core_alignment
// // it is up to the calling function to shift alignment points over for start positions
// // of extracted phrases that start with a fringe word
// char const* p = Tx.sntStart(sid);
// char const* x = Tx.sntEnd(sid);
// bitvector forbidden((flip ? T1 : T2).sntLen(sid));
// size_t src,trg;
// size_t lft = forbidden.size();
// size_t rgt = 0;
// vector<vector<ushort> > aln(T1.sntLen(sid));
// while (p < x)
// {
// if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
// else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
// if (src < start || src >= stop)
// forbidden.set(trg);
// else
// {
// lft = min(lft,trg);
// rgt = max(rgt,trg);
// if (core_alignment)
// {
// if (flip) aln[trg].push_back(src);
// else aln[src].push_back(trg);
// }
// }
// }
// #if 0
// cout << setw(5) << mctr << " " << setw(3) << xctr << " ";
// for (size_t i = 0; i < forbidden.size(); ++i)
// {
// if (i == lft) cout << '(';
// cout << (forbidden[i] ? 'x' : '-');
// if (i == rgt) cout << ')';
// }
// cout << endl;
// #endif
// for (size_t i = lft; i <= rgt; ++i)
// if (forbidden[i])
// return false;
// s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
// e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
// if (lft > rgt) return false;
// if (core_alignment)
// {
// core_alignment->clear();
// if (flip)
// {
// for (size_t i = lft; i <= rgt; ++i)
// {
// sort(aln[i].begin(),aln[i].end());
// BOOST_FOREACH(ushort x, aln[i])
// {
// core_alignment->push_back(i-lft);
// core_alignment->push_back(x-start);
// }
// }
// }
// else
// {
// for (size_t i = start; i < stop; ++i)
// {
// BOOST_FOREACH(ushort x, aln[i])
// {
// core_alignment->push_back(i-start);
// core_alignment->push_back(x-lft);
// }
// }
// }
// }
// return lft <= rgt;
// }
// void
// mmbitext::
// prep(iter const& phrase)
// {
// prep2(phrase);
// }
// sptr<mmbitext::pstats>
// mmbitext::
// prep2(iter const& phrase)
// {
// if (!ag)
// {
// ag = new agenda(*this);
// ag->add_workers(20);
// }
// typedef boost::unordered_map<uint64_t,sptr<pstats> > pcache_t;
// uint64_t pid = phrase.getPid();
// pcache_t & cache(phrase.root == &this->I1 ? cache1 : cache2);
// pcache_t::value_type entry(pid,sptr<pstats>());
// pair<pcache_t::iterator,bool> foo = cache.emplace(entry);
// if (foo.second) foo.first->second = ag->add_job(phrase, 1000);
// return foo.first->second;
// }
// sptr<mmbitext::pstats>
// mmbitext::
// lookup(iter const& phrase)
// {
// sptr<pstats> ret = prep2(phrase);
// boost::unique_lock<boost::mutex> lock(ret->lock);
// while (ret->in_progress)
// ret->ready.wait(lock);
// return ret;
// }
// void
// mmbitext::
// agenda::
// worker::
// operator()()
// {
// uint64_t sid=0, offset=0, len=0; // of the source phrase
// bool fwd=false; // source phrase is L1
// sptr<mmbitext::pstats> stats;
// size_t s1=0, s2=0, e1=0, e2=0;
// for (; ag.get_task(sid,offset,len,fwd,stats); )
// {
// if (!stats) break;
// vector<uchar> aln;
// if (!ag.bitext.find_trg_phr_bounds
// (sid, offset, offset+len, s1, s2, e1, e2, fwd ? &aln : NULL, !fwd))
// {
// stats->release();
// continue;
// }
// stats->lock.lock();
// stats->good += 1;
// stats->lock.unlock();
// for (size_t k = 0; k < aln.size(); k += 2)
// aln[k] += s2 - s1;
// Token const* o = (fwd ? ag.bitext.T2 : ag.bitext.T1).sntStart(sid);
// float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
// for (size_t s = s1; s <= s2; ++s)
// {
// iter b(&(fwd ? ag.bitext.I2 : ag.bitext.I1));
// for (size_t i = s; i < e1; ++i)
// assert(b.extend(o[i].id()));
// for (size_t i = e1; i <= e2; ++i)
// {
// stats->add(b,sample_weight,aln);
// if (i < e2) assert(b.extend(o[i].id()));
// }
// if (fwd && s < s2)
// for (size_t k = 0; k < aln.size(); k += 2)
// --aln[k];
// }
// stats->release();
// }
// }
// void
// mmbitext::
// pstats::
// add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a)
// {
// this->lock.lock();
// jstats& entry = this->trg[trg_phrase.getPid()];
// this->lock.unlock();
// entry.add(w,a);
// }
// mmbitext::
// agenda::
// agenda(mmbitext const& thebitext)
// : shutdown(false), doomed(0), bitext(thebitext)
// {
// }
// mmbitext::
// agenda::
// ~agenda()
// {
// this->lock.lock();
// this->shutdown = true;
// this->ready.notify_all();
// this->lock.unlock();
// for (size_t i = 0; i < workers.size(); ++i)
// workers[i]->join();
// }
// mmbitext::
// ~mmbitext()
// {
// if (ag) delete ag;
// }
// sptr<mmbitext::pstats>
// mmbitext::
// agenda::
// add_job(mmbitext::iter const& phrase, size_t const max_samples)
// {
// static boost::posix_time::time_duration nodelay(0,0,0,0);
// job j;
// j.stats.reset(new mmbitext::pstats());
// j.stats->register_worker();
// j.stats->raw_cnt = phrase.approxOccurrenceCount();
// j.max_samples = max_samples;
// j.next = phrase.lower_bound(-1);
// j.stop = phrase.upper_bound(-1);
// j.len = phrase.size();
// j.ctr = 0;
// j.fwd = phrase.root == &bitext.I1;
// boost::unique_lock<boost::mutex> lk(this->lock);
// joblist.push_back(j);
// if (joblist.size() == 1)
// {
// for (size_t i = 0; i < workers.size(); ++i)
// {
// if (workers[i]->timed_join(nodelay))
// {
// workers[i] = sptr<boost::thread>(new boost::thread(worker(*this)));
// }
// }
// }
// return j.stats;
// }
// bool
// mmbitext::
// agenda::
// get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
// bool & fwd, sptr<mmbitext::pstats> & stats)
// {
// boost::unique_lock<boost::mutex> lock(this->lock);
// if (this->doomed || this->shutdown)
// {
// if (this->doomed) --this->doomed;
// return false;
// }
// // while (joblist.empty())
// // {
// // cerr << "no jobs" << endl;
// // this->ready.wait(lock);
// // if (this->doomed || this->shutdown)
// // {
// // if (this->doomed) --this->doomed;
// // return false;
// // }
// // }
// while (joblist.size())
// {
// if (joblist.front().step(sid,offset))
// {
// job const& j = joblist.front();
// len = j.len;
// fwd = j.fwd;
// stats = j.stats;
// stats->register_worker();
// return true;
// }
// joblist.front().stats->release();
// joblist.pop_front();
// }
// stats.reset();
// return true;
// }
// bool
// mmbitext::
// agenda::
// job::
// step(uint64_t & sid, uint64_t & offset)
// {
// while (next < stop && stats->good < max_samples)
// {
// next = tightread(tightread(next,stop,sid),stop,offset);
// {
// boost::lock_guard<boost::mutex> lock(stats->lock);
// if (stats->raw_cnt == ctr) ++stats->raw_cnt;
// size_t rnum = randInt(stats->raw_cnt - ctr++);
// // cout << stats->raw_cnt << " " << ctr-1 << " "
// // << rnum << " " << max_samples - stats->good << endl;
// if (rnum < max_samples - stats->good)
// {
// stats->sample_cnt++;
// return true;
// }
// }
// }
// return false;
// }
// void
// mmbitext::
// agenda::
// add_workers(int n)
// {
// static boost::posix_time::time_duration nodelay(0,0,0,0);
// boost::lock_guard<boost::mutex> lock(this->lock);
// // house keeping: remove all workers that have finished
// for (size_t i = 0; i < workers.size(); )
// {
// if (workers[i]->timed_join(nodelay))
// {
// if (i + 1 < workers.size())
// workers[i].swap(workers.back());
// workers.pop_back();
// }
// else ++i;
// }
// if (n < 0)
// {
// this->doomed -= n;
// }
// else
// {
// for (int i = 0; i < n; ++i)
// {
// sptr<boost::thread> w(new boost::thread(worker(*this)));
// workers.push_back(w);
// }
// }
// }
// mmbitext::
// jstats::
// jstats()
// {
// my_aln.reserve(1);
// }
// mmbitext::
// jstats::
// jstats(jstats const& other)
// {
// my_rcnt = other.rcnt();
// my_wcnt = other.wcnt();
// my_aln = other.aln();
// }
// void
// mmbitext::
// jstats::
// add(float w, vector<uchar> const& a)
// {
// boost::lock_guard<boost::mutex> lk(this->lock);
// my_rcnt += 1;
// my_wcnt += w;
// if (a.size())
// {
// size_t i = 0;
// while (i < my_aln.size() && my_aln[i].second != a) ++i;
// if (i == my_aln.size())
// my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
// else
// my_aln[i].first++;
// if (my_aln[i].first > my_aln[i/2].first)
// push_heap(my_aln.begin(),my_aln.begin()+i+1);
// }
// }
// uint32_t
// mmbitext::
// jstats::
// rcnt() const
// { return my_rcnt; }
// float
// mmbitext::
// jstats::
// wcnt() const
// { return my_wcnt; }
// vector<pair<size_t, vector<uchar> > > const&
// mmbitext::
// jstats::
// aln() const
// { return my_aln; }
// }

191
moses/mm/ug_mmbitext.h Normal file
View File

@ -0,0 +1,191 @@
#ifndef __ug_mm_bitext_h
#define __ug_mm_bitext_h
// Memory-mapped, word-aligned bitext
// Written by Ulrich Germann
// things we can do to speed up things:
// - set up threads at startup time that force the
// data in to memory sequentially
//
// - use multiple agendas for better load balancing and to avoid
// competition for locks
#include <string>
#include <vector>
#include <cassert>
#include <iomanip>
#include <algorithm>
#include <boost/unordered_map.hpp>
#include <boost/foreach.hpp>
#include <boost/thread.hpp>
#include "moses/generic/sorting/VectorIndexSorter.h"
#include "moses/generic/sampling/Sampling.h"
#include "moses/generic/file_io/ug_stream.h"
#include "ug_typedefs.h"
#include "ug_mm_ttrack.h"
#include "ug_mm_tsa.h"
#include "tpt_tokenindex.h"
#include "ug_corpus_token.h"
#include "tpt_pickler.h"
using namespace ugdiss;
using namespace std;
namespace Moses {
typedef L2R_Token<SimpleWordId> Token;
typedef mmTSA<Token>::tree_iterator iter;
class mmbitext
{
public:
typedef mmTSA<Token>::tree_iterator iter;
class pstats; // one-sided phrase statistics
class jstats; // phrase pair ("joint") statistics
class agenda
{
boost::mutex lock;
boost::condition_variable ready;
class job;
class worker;
list<job> joblist;
vector<sptr<boost::thread> > workers;
bool shutdown;
size_t doomed;
public:
mmbitext const& bitext;
agenda(mmbitext const& bitext);
~agenda();
void add_workers(int n);
sptr<pstats> add_job(mmbitext::iter const& phrase,
size_t const max_samples);
bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
bool & fwd, sptr<mmbitext::pstats> & stats);
};
// stores the list of unfinished jobs;
// maintains a pool of workers and assigns the jobs to them
agenda* ag;
mmTtrack<char> Tx; // word alignments
mmTtrack<Token> T1,T2; // token tracks
TokenIndex V1,V2; // vocabs
mmTSA<Token> I1,I2; // suffix arrays
/// given the source phrase sid[start:stop]
// find the possible start (s1 .. s2) and end (e1 .. e2)
// points of the target phrase; if non-NULL, store word
// alignments in *core_alignment. If /flip/, source phrase is
// L2.
bool
find_trg_phr_bounds
(size_t const sid, size_t const start, size_t const stop,
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
vector<uchar> * core_alignment, bool const flip) const;
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
private:
sptr<pstats>
prep2(iter const& phrase);
public:
mmbitext();
~mmbitext();
void open(string const base, string const L1, string const L2);
sptr<pstats> lookup(iter const& phrase);
void prep(iter const& phrase);
};
// "joint" (i.e., phrase pair) statistics
class
mmbitext::
jstats
{
uint32_t my_rcnt; // unweighted count
float my_wcnt; // weighted count
vector<pair<size_t, vector<uchar> > > my_aln;
boost::mutex lock;
public:
jstats();
jstats(jstats const& other);
uint32_t rcnt() const;
float wcnt() const;
vector<pair<size_t, vector<uchar> > > const & aln() const;
void add(float w, vector<uchar> const& a);
};
// struct
// mmbitext:
// phrasepair
// {
// Token const* t;
// size_t len;
// size_t cnt;
// float fwd, bwd;
// map<uint32_t,uint32_t> aln;
// string toString(TokenIndex const& V) const;
// bool operator<(phrase const& other) const;
// bool operator>(phrase const& other) const;
// phrase(pair<pair<Token const*, size_t>,jstats> const & foo);
// };
struct
mmbitext::
pstats
{
boost::mutex lock; // for parallel gathering of stats
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
size_t raw_cnt; // (approximate) raw occurrence count
size_t sample_cnt; // number of instances selected during sampling
size_t good; // number of selected instances with valid word alignments
size_t sum_pairs;
// size_t snt_cnt;
// size_t sample_snt;
size_t in_progress; // keeps track of how many threads are currently working on this
boost::unordered_map<uint64_t, jstats> trg;
pstats();
// vector<phrase> nbest;
// void select_nbest(size_t const N=10);
void release();
void register_worker();
void add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a);
};
class
mmbitext::
agenda::
worker
{
agenda& ag;
public:
worker(agenda& a);
void operator()();
};
class
mmbitext::
agenda::
job
{
public:
char const* next;
char const* stop;
size_t max_samples;
size_t ctr;
size_t len;
bool fwd;
sptr<mmbitext::pstats> stats;
bool step(uint64_t & sid, uint64_t & offset);
};
}
#endif

View File

@ -0,0 +1,15 @@
#include "ug_tsa_array_entry.h"
#include "ug_ttrack_position.h"
#include "moses/generic/sampling/Sampling.h"
// (c) 2007-2010 Ulrich Germann
namespace ugdiss
{
namespace tsa
{
ArrayEntry::ArrayEntry() : ttrack::Position(0,0), pos(NULL), next(NULL) {};
ArrayEntry::ArrayEntry(char const* p) : ttrack::Position(0,0), pos(NULL), next(p) {};
}
}

View File

@ -0,0 +1,83 @@
// -*- c++ -*-
// (c) 2007-2010 Ulrich Germann
// implementation of stuff related to ArrayEntries
// this file should only be included via ug_tsa_base.h,
// never by itself
#ifndef __ug_tsa_array_entry_h
#define __ug_tsa_array_entry_h
#include "ug_ttrack_position.h"
namespace ugdiss
{
namespace tsa
{
class
ArrayEntry : public ttrack::Position
{
public:
char const* pos;
char const* next;
ArrayEntry();
ArrayEntry(char const* p);
template<typename TSA_TYPE>
ArrayEntry(TSA_TYPE const* S, char const* p);
};
template<typename TSA_TYPE>
ArrayEntry::
ArrayEntry(TSA_TYPE const* S, char const* p)
{
S->readEntry(p,*this);
}
// template<typename TSA_TYPE>
// class SamplingArrayEntryIterator
// : public tsa::ArrayEntry
// {
// size_t const N; // (approximate) total number of occurrences
// size_t const samplesize; // how many samples to chose from the range
// size_t const sampled; // how many occurrences we've looked at so far
// size_t const chosen; // how many we have chosen
// TSA_TYPE const* root; // the underlying TSA
// char const* stop; // end of the range
// public:
// SamplingArrayEntryIterator(TSA_TYPE::tree_iterator const& m, size_t const s);
// bool step(); // returns false when at end of range
// bool done(); //
// };
// template<typename TSA_TYPE>
// SamplingArrayEntryIterator::
// SamplingArrayEntryIterator(typename TSA_TYPE::tree_iterator const& m, size_t const s)
// : ArrayEntry<TSA_TYPE>(m.lower_bound(-1))
// , N(m.approxOccurrenceCount())
// , samplesize(min(s,N))
// , sampled(0)
// , chosen(0)
// , root(m.root)
// , stop(m.upper_bound(-1))
// { }
// template<typename TSA_TYPE>
// bool
// SamplingArrayEntryIterator::
// step()
// {
// while (chosen < samplesize && next < stop)
// {
// root->readEntry(next,*this);
// if (randInt(N - sampled++) < samplesize - chosen)
// {
// ++chosen;
// return true;
// }
// }
// return false;
// }
} // end of namespace tsa
} // end of namespace ugdiss
#endif

827
moses/mm/ug_tsa_base.h Normal file
View File

@ -0,0 +1,827 @@
// -*- c++ -*-
// Base class for Token Sequence Arrays
// (c) 2007-2010 Ulrich Germann. All rights reserved.
#ifndef _ug_tsa_base_h
#define _ug_tsa_base_h
#include <iostream>
#include <string>
#include <boost/iostreams/device/mapped_file.hpp>
#include "tpt_tokenindex.h"
#include "ug_ttrack_base.h"
#include "ug_corpus_token.h"
#include "ug_tsa_tree_iterator.h"
#include "ug_tsa_array_entry.h"
#include "ug_tsa_bitset_cache.h"
#include "ug_typedefs.h"
namespace ugdiss
{
using namespace std;
using namespace boost;
namespace bio=boost::iostreams;
template<typename TKN>
TKN const*
next(TKN const* x)
{
return static_cast<TKN const*>(x ? x->next() : NULL);
}
/** Base class for [T]oken [S]equence [A]arrays, a generalization of
* Suffix arrays.
*
* Token types (TKN) must provide a number of functions, see the
* class SimpleWordId (as a simple example of a "core token base
* class") and the template class L2R_Token (a class derived from
* its template parameter (e.g. SimpleWordId) that handles the
* ordering of sequences. Both are decleared/defined in
* ug_corpus_token.{h|cc}
*/
template<typename TKN>
class TSA
{
public:
virtual ~TSA() {};
typedef TSA_tree_iterator<TKN> tree_iterator;
// allows iteration over the array as if it were a trie
typedef tsa::ArrayEntry ArrayEntry;
/* an entry in the array, for iteration over all occurrences of a
* particular sequence */
// typedef boost::dynamic_bitset<uint64_t> bitset;
typedef shared_ptr<bitvector> bitset_pointer;
typedef TKN Token;
typedef BitSetCache<TSA<TKN> > BSC_t;
/* to allow caching of bit vectors that are expensive to create on
* the fly */
friend class TSA_tree_iterator<TKN>;
protected:
Ttrack<TKN> const* corpus; // pointer to the underlying corpus
char const* startArray; // beginning ...
char const* endArray; // ... and end ...
// of memory block storing the actual TSA
size_t corpusSize;
/** size of the corpus (in number of sentences) of the corpus
* underlying the sequence array.
*
* ATTENTION: This number may differ from
* corpus->size(), namely when the
* suffix array is based on a subset
* of the sentences of /corpus/.
*/
id_type numTokens;
/** size of the corpus (in number of tokens) of the corpus underlying the
* sequence array.
*
* ATTENTION: This number may differ from corpus->numTokens(), namely when
* the suffix array is based on a subset of the sentences of
* /corpus/.
*/
id_type indexSize;
// (number of entries +1) in the index of root-level nodes
size_t BitSetCachingThreshold;
////////////////////////////////////////////////////////////////
// private member functions:
/** @return an index position approximately /fraction/ between
* /startRange/ and /endRange/.
*/
virtual
char const*
index_jump(char const* startRange,
char const* stopRange,
float fraction) const = 0;
/** return the index position of the first item that
* is equal to or includes [refStart,refStart+refLen) as a prefix
*/
char const*
find_start(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
/** return the index position of the first item that is greater than
* [refStart,refStart+refLen) and does not include it as a prefix
*/
char const*
find_end(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
/** return the index position of the first item that is longer than
* [refStart,refStart+refLen) and includes it as a prefix
*/
char const*
find_longer(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const;
/** Returns a char const* pointing to the position in the data block
* where the first item starting with token /id/ is located.
*/
virtual
char const*
getLowerBound(id_type id) const = 0;
virtual
char const*
getUpperBound(id_type id) const = 0;
public:
shared_ptr<BSC_t> bsc;
char const* arrayStart() const { return startArray; }
char const* arrayEnd() const { return endArray; }
/** @return a pointer to the beginning of the index entry range covering
* [keyStart,keyStop)
*/
char const*
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
char const*
lower_bound(TKN const* keyStart, TKN const* keyStop) const;
char const*
lower_bound(TKN const* keyStart, int keyLen) const;
/** @return a pointer to the end point of the index entry range covering
* [keyStart,keyStop)
*/
char const*
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const;
char const*
upper_bound(TKN const* keyStart, int keyLength) const;
/** dump all suffixes in order to /out/ */
void dump(ostream& out, TokenIndex const& T) const;
/** fill the dynamic bit set with true for all sentences that contain
* /phrase/.
* @return the raw number of occurrences.
*/
count_type
fillBitSet(vector<TKN> const& phrase, bdBitset& dest) const;
count_type
fillBitSet(TKN const* key, size_t keyLen, bdBitset& dest) const;
count_type
setBits(char const* startRange, char const* endRange,
boost::dynamic_bitset<uint64_t>& bs) const;
void
setTokenBits(char const* startRange, char const* endRange, size_t len,
bitvector& bs) const;
/** read the sentence ID into /sid/
* @return position of associated offset.
*
* The function provides an abstraction that uses the right
* interpretation of the position based on the subclass
* (memory-mapped or in-memory).
*/
virtual
char const*
readSid(char const* p, char const* q, id_type& sid) const = 0;
virtual
char const*
readSid(char const* p, char const* q, uint64_t& sid) const = 0;
/** read the offset part of the index entry into /offset/
* @return position of the next entry in the index.
*
* The function provides an abstraction that uses the right
* interpretation of the position based on the subclass
* (memory-mapped or in-memory).
*/
virtual
char const*
readOffset(char const* p, char const* q, uint16_t& offset) const = 0;
virtual
char const*
readOffset(char const* p, char const* q, uint64_t& offset) const = 0;
/** @return sentence count
*/
count_type
sntCnt(char const* p, char const* const q) const;
count_type
rawCnt2(TKN const* keyStart, size_t keyLen) const;
/** @return raw occurrence count
*
* depending on the subclass, this is constant time (imTSA) or
* linear in in the number of occurrences (mmTSA).
*/
virtual
count_type
rawCnt(char const* p, char const* const q) const = 0;
/** get both sentence and word counts.
*
* Avoids having to go over the byte range representing the range
* of suffixes in question twice when dealing with memory-mapped
* suffix arrays.
*/
virtual
void
getCounts(char const* p, char const* const q,
count_type& sids, count_type& raw) const = 0;
string
suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
const;
string
suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
const;
tsa::ArrayEntry& readEntry(char const* p, tsa::ArrayEntry& I) const;
/** return pointer to the end of the data block */
char const* dataEnd() const;
bool sanityCheck1() const;
/** Return an ID that represents a given phrase;
This should NEVER be 0!
Structure of a phrase ID:
leftmost 32 bits: sentence ID in the corpus
next 16 bits: offset from the start of the sentence
next 16 bits: length of the phrase
*/
uint64_t
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
typename vector<TKN>::const_iterator const& pstop) const;
uint64_t
getSequenceId(TKN const* t, ushort plen) const;
/** Return the phrase represented by phrase ID pid_ */
string
getSequence(uint64_t pid, TokenIndex const& V) const;
/** Return the phrase represented by phrase ID pid_ */
vector<TKN>
getSequence(uint64_t pid) const;
TKN const*
getSequenceStart(uint64_t) const;
ushort
getSequenceLength(uint64_t) const;
size_t
getCorpusSize() const;
Ttrack<TKN> const*
getCorpus() const;
bitset_pointer
getBitSet(TKN const* startKey, size_t keyLen) const;
shared_ptr<bitvector>
findTree(TKN const* treeStart, TKN const* treeEnd,
bitvector const* filter) const;
size_t markOccurrences(char const* lo, char const* up, size_t len,
bitvector& bitset,
bool markOnlyStartPosition) const;
bool
findBranches(TKN const* base, bitvector const& terminals,
vector<tree_iterator>& dest) const;
double aveIndexEntrySize() const
{
return (endArray-startArray)/double(numTokens);
}
public:
// virtual
sptr<TSA_tree_iterator<TKN> >
find(TKN const* start, size_t len) const
{
typedef TSA_tree_iterator<TKN> iter;
sptr<iter> ret(new iter(this));
size_t i = 0;
while (i < len && ret->extend(start[i])) ++i;
if (i < len) ret.reset();
return ret;
}
};
// ======================================================================
// template<typename TOKEN>
// sptr<TSA_tree_iterator<TOKEN> >
// TSA<TOKEN>::
// find(TOKEN const* start, size_t len) const
// {
// typedef TSA_tree_iterator<TOKEN> iter;
// sptr<iter> ret(new iter(this));
// size_t i = 0;
// while (i < len && ret->extend(start[i])) ++i;
// if (i < len) ret.reset();
// return ret;
// }
// ---------------------------------------------------------------------------
/** fill the dynamic bitset with information as to which sentences
* the phrase occurs in
* @return number of total occurrences of the phrase in the corpus
*/
template<typename TKN>
count_type
TSA<TKN>::
fillBitSet(vector<TKN> const& key,
bitvector& bitset) const
{
if (!key.size()) return 0;
return fillBitset(&(key[0]),key.size(),bitset);
}
// ---------------------------------------------------------------------------
/** fill the dynamic bitset with information as to which sentences
* the phrase occurs in
* @return number of total occurrences of the phrase in the corpus
*/
template<typename TKN>
count_type
TSA<TKN>::
fillBitSet(TKN const* key, size_t keyLen,
bitvector& bitset) const
{
char const* lo = lower_bound(key,keyLen);
char const* up = upper_bound(key,keyLen);
bitset.resize(corpus->size());
bitset.reset();
return setBits(lo,up,bitset);
}
// ---------------------------------------------------------------------------
template<typename TKN>
count_type
TSA<TKN>::
setBits(char const* startRange, char const* endRange,
bitvector& bs) const
{
count_type wcount=0;
char const* p = startRange;
id_type sid;
ushort off;
while (p < endRange)
{
p = readSid(p,endRange,sid);
p = readOffset(p,endRange,off);
bs.set(sid);
wcount++;
}
return wcount;
}
// ---------------------------------------------------------------------------
template<typename TKN>
void
TSA<TKN>::
setTokenBits(char const* startRange, char const* endRange, size_t len,
bitvector& bs) const
{
ArrayEntry I;
I.next = startRange;
do {
readEntry(I.next,I);
Token const* t = corpus->getToken(I);
Token const* stop = t->stop(*corpus,I.sid);
for (size_t i = 1; i < len; ++i)
{
assert(t != stop);
t = t->next();
}
assert(t != stop);
bs.set(t - corpus->sntStart(0));
} while (I.next != endRange);
}
// ---------------------------------------------------------------------------
template<typename TKN>
count_type
TSA<TKN>::
sntCnt(char const* p, char const* const q) const
{
id_type sid; uint16_t off;
bitvector check(corpus->size());
while (p < q)
{
p = readSid(p,q,sid);
p = readOffset(p,q,off);
check.set(sid);
}
return check.count();
}
//---------------------------------------------------------------------------
/** return the lower bound (first matching entry)
* of the token range matching [startKey,endKey)
*/
template<typename TKN>
char const*
TSA<TKN>::
find_start(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const
{
char const* up = upX;
if (lo >= up) return NULL;
int x;
ArrayEntry I;
while (lo < up)
{
readEntry(index_jump(lo,up,.5),I);
x = corpus->cmp(I,refStart,refLen,d);
if (x >= 0) up = I.pos;
else lo = I.next;
}
assert(lo==up);
if (lo < upX)
{
readEntry(lo,I);
x = corpus->cmp(I,refStart,refLen,d);
}
// return (x >= 0) ? lo : NULL;
return (x == 0 || x == 1) ? lo : NULL;
}
//---------------------------------------------------------------------------
/** return the upper bound (first entry beyond)
* of the token range matching [startKey,endKey)
*/
template<typename TKN>
char const*
TSA<TKN>::
find_end(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const
{
char const* up = upX;
if (lo >= up) return NULL;
int x;
ArrayEntry I;
// float ratio = .1;
while (lo < up)
{
readEntry(index_jump(lo,up,.1),I);
x = corpus->cmp(I,refStart,refLen,d);
if (x == 2) up = I.pos;
else lo = I.next;
// ratio = .5;
}
assert(lo==up);
if (lo < upX)
{
readEntry(lo,I);
x = corpus->cmp(I,refStart,refLen,d);
}
return (x == 2) ? up : upX;
}
//---------------------------------------------------------------------------
/** return the first entry that has the prefix [refStart,refStart+refLen)
* but continues on
*/
template<typename TKN>
char const*
TSA<TKN>::
find_longer(char const* lo, char const* const upX,
TKN const* const refStart, int refLen,
size_t d) const
{
char const* up = upX;
if (lo >= up) return NULL;
int x;
ArrayEntry I;
while (lo < up)
{
readEntry(index_jump(lo,up,.5),I);
x = corpus->cmp(I,refStart,refLen,d);
if (x > 0) up = I.pos;
else lo = I.next;
}
assert(lo==up);
if (lo < upX)
{
readEntry(index_jump(lo,up,.5),I);
x = corpus->cmp(I,refStart,refLen,d);
}
return (x == 1) ? up : NULL;
}
//---------------------------------------------------------------------------
/** returns the start position in the byte array representing
* the tightly packed sorted list of corpus positions for the
* given search phrase
*/
template<typename TKN>
char const*
TSA<TKN>::
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
{
TKN const* const a = &(*keyStart);
TKN const* const z = &(*keyStop);
return lower_bound(a,z);
}
//---------------------------------------------------------------------------
/** returns the start position in the byte array representing
* the tightly packed sorted list of corpus positions for the
* given search phrase
*/
template<typename TKN>
char const*
TSA<TKN>::
lower_bound(TKN const* const keyStart,
TKN const* const keyStop) const
{
return lower_bound(keyStart,keyStop-keyStart);
}
template<typename TKN>
char const*
TSA<TKN>::
lower_bound(TKN const* const keyStart, int keyLen) const
{
if (keyLen == 0) return startArray;
char const* const lower = getLowerBound(keyStart->id());
char const* const upper = getUpperBound(keyStart->id());
return find_start(lower,upper,keyStart,keyLen,0);
}
//---------------------------------------------------------------------------
/** returns the upper bound in the byte array representing
* the tightly packed sorted list of corpus positions for the
* given search phrase (i.e., points just beyond the range)
*/
template<typename TKN>
char const*
TSA<TKN>::
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
typename vector<TKN>::const_iterator const& keyStop) const
{
TKN const* const a = &((TKN)*keyStart);
TKN const* const z = &((TKN)*keyStop);
return upper_bound(a,z-a);
}
//---------------------------------------------------------------------------
/** returns the upper bound in the byte array representing
* the tightly packed sorted list of corpus positions for the
* given search phrase (i.e., points just beyond the range)
*/
template<typename TKN>
char const*
TSA<TKN>::
upper_bound(TKN const* keyStart, int keyLength) const
{
if (keyLength == 0) return arrayEnd();
char const* const lower = getLowerBound(keyStart->id());
char const* const upper = getUpperBound(keyStart->id());
return find_end(lower,upper,keyStart,keyLength,0);
}
//---------------------------------------------------------------------------
template<typename TKN>
count_type
TSA<TKN>::
rawCnt2(TKN const* keyStart, size_t keyLen) const
{
char const* lo = lower_bound(keyStart,keyLen);
char const* up = upper_bound(keyStart,keyLen);
// cerr << up-lo << endl;
return rawCnt(lo,up);
}
//---------------------------------------------------------------------------
template<typename TKN>
uint64_t
TSA<TKN>::
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
typename vector<TKN>::const_iterator const& pstop) const
{
return getSequenceId(&(*pstart),pstop-pstart);
}
//---------------------------------------------------------------------------
template<typename TKN>
uint64_t
TSA<TKN>::
getSequenceId(TKN const* pstart, ushort plen) const
{
char const* p = lower_bound(pstart,plen);
if (!p) return 0; // not found!
ArrayEntry I;
readEntry(p,I);
uint64_t ret = I.sid;
ret <<= 16;
ret += I.offset;
ret <<= 16;
ret += plen;
return ret;
}
//---------------------------------------------------------------------------
template<typename TKN>
vector<TKN>
TSA<TKN>::
getSequence(uint64_t pid) const
{
size_t plen = pid % 65536;
size_t offset = (pid >> 16) % 65536;
TKN const* w = corpus->sntStart(pid >> 32)+offset;
vector<TKN> ret(plen);
for (size_t i = 0; i < plen; i++, w = w->next())
{
assert(w);
ret[i] = *w;
}
return ret;
}
template<typename TKN>
string
TSA<TKN>::
getSequence(uint64_t pid, TokenIndex const& V) const
{
ostringstream buf;
TKN const* a = getSequenceStart(pid);
buf << V[a->id()];
size_t len = getSequenceLength(pid);
for (a = a->next(); --len>0; a = a->next())
buf << " " << V[a->id()];
return buf.str();
}
//---------------------------------------------------------------------------
template<typename TKN>
TKN const*
TSA<TKN>::
getSequenceStart(uint64_t pid) const
{
size_t offset = (pid >> 16) % 65536;
return corpus->sntStart(pid >> 32)+offset;
}
//---------------------------------------------------------------------------
template<typename TKN>
ushort
TSA<TKN>::
getSequenceLength(uint64_t pid) const
{
return (pid % 65536);
}
//---------------------------------------------------------------------------
template<typename TKN>
size_t
TSA<TKN>::
getCorpusSize() const
{
return corpusSize;
}
//---------------------------------------------------------------------------
template<typename TKN>
Ttrack<TKN> const*
TSA<TKN>::
getCorpus() const
{
return corpus;
}
//---------------------------------------------------------------------------
template<typename TKN>
tsa::ArrayEntry &
TSA<TKN>::
readEntry(char const* p, tsa::ArrayEntry& I) const
{
I.pos = p;
p = readSid(p,endArray,I.sid);
I.next = readOffset(p,endArray,I.offset);
assert(I.sid < corpus->size());
assert(I.offset < corpus->sntLen(I.sid));
return I;
};
//---------------------------------------------------------------------------
/// find all instances of the tree described by [treeStart, treeEnd)
template<typename TKN>
typename TSA<TKN>::bitset_pointer
TSA<TKN>::
getBitSet(TKN const* startKey, size_t keyLen) const
{
bitset_pointer ret;
if (bsc != NULL)
ret = bsc->get(startKey,keyLen);
else
{
ret.reset(new bitvector(corpus->size()));
fillBitSet(startKey,keyLen,*ret);
}
return ret;
}
//---------------------------------------------------------------------------
template<typename TKN>
size_t
TSA<TKN>::
markOccurrences(char const* lo, char const* up, size_t len,
bitvector& bitset, bool markOnlyStartPosition) const
{
id_type sid;
ushort off;
count_type wcount=0;
TKN const* crpStart = corpus->sntStart(0);
char const* p = lo;
while (p < up)
{
p = readSid(p,up,sid);
p = readOffset(p,up,off);
TKN const* t = corpus->sntStart(sid)+off;
if (markOnlyStartPosition)
bitset.set(t-crpStart);
else
for (size_t i = 0; i < len; ++i, t = t->next())
bitset.set(t-crpStart);
wcount++;
}
return wcount;
}
#if 1
template<typename TKN>
bool
TSA<TKN>::
findBranches(TKN const* base, bitvector const& terminals,
vector<tree_iterator>& dest) const
{
dest.assign(terminals.count(),tree_iterator(this));
for (size_t i = terminals.find_first(), k = 0;
i < terminals.size();
i = terminals.find_next(i),++k)
{
for (TKN const* x = base+i; x && x->id(); x = x->next())
if (!dest[k].extend(x->id()))
return false;
}
typename tree_iterator::SortByApproximateCount sorter;
sort(dest.begin(),dest.end(),sorter);
return true;
}
#endif
}
#endif

View File

@ -0,0 +1,118 @@
// -*- c++ -*-
// (c) 2010 Ulrich Germann. All rights reserved.
#ifndef __ug_tsa_bitset_cache_h
#define __ug_tsa_bitset_cache_h
//#include "ug_tsa_base.h"
#include <map>
#include <boost/shared_ptr.hpp>
#include <boost/dynamic_bitset.hpp>
#include <stdint.h>
#include <iostream>
// A simple mechanism for caching bit vectors representing occurrences of token
// sequences in a corpus. Useful for very frequent items for which the bit
// vector is expensive to create on the fly. The variable threshold determines
// when bit vectors are cached and when they are created on the fly, using the
// size of the range of entries in the TSA's index in bytes to determine
// whether or not to store the respective bit vector in the cache.
namespace ugdiss
{
using namespace std;
template<typename TSA>
class
BitSetCache
{
public:
typedef boost::dynamic_bitset<uint64_t> BitSet;
typedef boost::shared_ptr<BitSet> bsptr;
typedef map<pair<char const*,ushort>,bsptr> myMap;
typedef myMap::iterator myMapIter;
private:
TSA const* tsa;
myMap cached1,cached2;
int threshold;
public:
BitSetCache() : tsa(NULL), threshold(0) {};
BitSetCache(TSA const* t, size_t th=4194304)
{
init(t,th);
};
void
init(TSA const* t, size_t th=4194304)
{
tsa = t;
threshold = th;
}
bsptr
get(typename TSA::Token const* keyStart, size_t keyLen)
{
bsptr ret;
char const* lo = tsa->lower_bound(keyStart,keyLen);
char const* up = tsa->upper_bound(keyStart,keyLen);
if (!lo) return ret;
if (up-lo > threshold)
{
pair<char const*,ushort> k(lo,keyLen);
myMapIter m = cached1.find(k);
if (m != cached1.end())
ret = m->second;
else
{
ret.reset(new BitSet(tsa->getCorpus()->size()));
cached1[k] = ret;
}
}
else if (ret == NULL)
ret.reset(new BitSet(tsa->getCorpus()->size()));
if (ret->count() == 0)
tsa->setBits(lo,up,*ret);
return ret;
}
// get bitvector with the path occurrences marked
bsptr
get2(typename TSA::Token const* keyStart, size_t keyLen, bool onlyEndpoint=true)
{
bsptr ret;
char const* lo = tsa->lower_bound(keyStart,keyLen);
char const* up = tsa->upper_bound(keyStart,keyLen);
if (!lo) return ret;
if (up-lo > threshold)
{
pair<char const*,ushort> k(lo,keyLen);
// cout << "bla " << keyStart->id() << " "
// << cached2.size() << " " << up-lo << " " << k.second << endl;
myMapIter m = cached2.find(k);
if (m != cached2.end())
ret = m->second;
else
{
ret.reset(new BitSet(tsa->getCorpus()->numTokens()));
cached2[k] = ret;
}
}
else if (ret == NULL)
ret.reset(new BitSet(tsa->getCorpus()->numTokens()));
if (ret->count() == 0)
{
if (onlyEndpoint)
tsa->setTokenBits(lo,up,keyLen,*ret);
else
tsa->markOccurrences(lo,up,keyLen,*ret,false);
}
return ret;
}
void clear()
{
cached1.clear();
cached2.clear();
}
};
}
#endif

View File

@ -0,0 +1,868 @@
// -*- c++ -*-
// (c) 2007 - 2010 Ulrich Germann. All rights reserved.
#ifndef __ug_tsa_tree_iterator_h
#define __ug_tsa_tree_iterator_h
#include "ug_tsa_array_entry.h"
#include "ug_typedefs.h"
#include "tpt_tokenindex.h"
#include <iostream>
// #include "ug_bv_iter.h"
namespace ugdiss
{
#ifndef _DISPLAY_CHAIN
#define _DISPLAY_CHAIN
// for debugging only
template<typename T>
void display(T const* x, string label)
{
cout << label << ":"; for (;x;x=next(x)) cout << " " << x->lemma; cout << endl;
}
#endif
template<typename T> class TSA;
// CLASS DEFINITION
// The TSA_tree_iterator allows traversal of a Token Sequence Array as if it was a trie.
// down(): go to first child
// over(): go to next sibling
// up(): go to parent
// extend(id): go to a specific child node
// all four functions return true if successful, false otherwise
// lower_bound() and upper_bound() give the range of entries in the array covered by the
// "virtual trie node".
template<typename TKN>
class
TSA_tree_iterator
{
protected:
vector<char const*> lower;
vector<char const*> upper;
// for debugging ...
void showBounds(ostream& out) const;
public:
typedef TKN Token;
virtual ~TSA_tree_iterator() {};
TSA<Token> const* root;
// TO BE DONE: make the pointer private and add a const function to return the pointer
// TSA_tree_iterator(TSA_tree_iterator const& other);
TSA_tree_iterator(TSA<Token> const* s);
// TSA_tree_iterator(TSA<Token> const* s, Token const& t);
// TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, Token const* kend);
// TSA_tree_iterator(TSA<Token> const* s,
// TokenIndex const& V,
// string const& key);
char const* lower_bound(int p) const;
char const* upper_bound(int p) const;
size_t size() const;
// Token const& wid(int p) const;
Token const* getToken(int p) const;
id_type getSid() const;
ushort getOffset(int p) const;
size_t sntCnt(int p=-1) const;
size_t rawCnt(int p=-1) const;
uint64_t getPid(int p=-1) const; // get phrase id
virtual bool extend(Token const& id);
virtual bool extend(id_type id);
virtual bool down();
virtual bool over();
virtual bool up();
string str(TokenIndex const* V=NULL, int start=0, int stop=0) const;
// checks if the sentence [start,stop) contains the given sequence.
bool match(Token const* start, Token const* stop) const;
// checks if the sentence /sid/ contains the given sequence.
bool match(id_type sid) const;
// fillBitSet: deprecated; use markSentences() instead
count_type
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const;
count_type
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<uint64_t>& dest) const;
count_type
markSequence(Token const* start, Token const* stop, bitvector& dest) const;
count_type
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const;
count_type
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset,
bool markOnlyStartPosition=false) const;
count_type
markOccurrences(vector<ushort>& dest) const;
uint64_t
getSequenceId() const;
// equivalent but more efficient than
// bitvector tmp; markSentences(tmp); foo &= tmp;
bitvector& filterSentences(bitvector& foo) const;
/// a special auxiliary function for finding trees
void
tfAndRoot(bitvector const& ref, // reference root positions
bitvector const& snt, // relevant sentences
bitvector& dest) const;
size_t arrayByteSpanSize(int p = -1) const
{
if (lower.size()==0) return 0; // or endArray-startArray???
if (p < 0) p = lower.size()+p;
assert(p >=0 && p < int(lower.size()));
return lower.size() ? upper[p]-lower[p] : 0;
}
struct SortByApproximateCount
{
bool operator()(TSA_tree_iterator const& a,
TSA_tree_iterator const& b) const
{
if (a.size()==0) return b.size() ? true : false;
if (b.size()==0) return false;
return a.arrayByteSpanSize() < b.arrayByteSpanSize();
}
};
double approxOccurrenceCount(int p=-1) const
{
return arrayByteSpanSize(p)/root->aveIndexEntrySize();
}
size_t grow(Token const* t, Token const* stop)
{
while ((t != stop) && extend(*t)) t = t->next();
return this->size();
}
size_t grow(Token const* snt, bitvector const& cov)
{
size_t x = cov.find_first();
while (x < cov.size() && extend(snt[x]))
x = cov.find_next(x);
return this->size();
}
sptr<vector<typename ttrack::Position> >
randomSample(int level, size_t N) const;
};
//---------------------------------------------------------------------------
// DOWN
//---------------------------------------------------------------------------
template<typename TSA_TYPE>
bool
TSA_tree_iterator<TSA_TYPE>::
down()
{
assert(root);
if (lower.size() == 0)
{
char const* lo = root->arrayStart();
assert(lo < root->arrayEnd());
if (lo == root->arrayEnd()) return false; // array is empty, can't go down
tsa::ArrayEntry A(root,lo);
assert(root->corpus->getToken(A));
assert(lo < root->getUpperBound(root->corpus->getToken(A)->id()));
lower.push_back(lo);
Token const* foo = this->getToken(0);
upper.push_back(root->upper_bound(foo,lower.size()));
return lower.size();
}
else
{
char const* lo = lower.back();
tsa::ArrayEntry A(root,lo);
Token const* a = root->corpus->getToken(A); assert(a);
Token const* z = next(a);
for (size_t i = 1; i < size(); ++i) z = next(z);
if (z < root->corpus->sntStart(A.sid) || z >= root->corpus->sntEnd(A.sid))
{
char const* up = upper.back();
lo = root->find_longer(lo,up,a,lower.size(),0);
if (!lo) return false;
root->readEntry(lo,A);
a = root->corpus->getToken(A); assert(a);
z = next(a);
assert(z >= root->corpus->sntStart(A.sid) && z < root->corpus->sntEnd(A.sid));
}
lower.push_back(lo);
char const* up = root->getUpperBound(a->id());
char const* u = root->find_end(lo,up,a,lower.size(),0);
assert(u);
upper.push_back(u);
return true;
}
}
// ---------------------------------------------------------------------------
// OVER
//---------------------------------------------------------------------------
template<typename Token>
bool
TSA_tree_iterator<Token>::
over()
{
if (lower.size() == 0)
return false;
if (lower.size() == 1)
{
Token const* t = this->getToken(0);
id_type wid = t->id();
char const* hi = root->getUpperBound(wid);
if (upper[0] < hi)
{
lower[0] = upper[0];
Token const* foo = this->getToken(0);
upper.back() = root->upper_bound(foo,lower.size());
}
else
{
for (++wid; wid < root->indexSize; ++wid)
{
char const* lo = root->getLowerBound(wid);
if (lo == root->endArray) return false;
char const* hi = root->getUpperBound(wid);
if (!hi) return false;
if (lo == hi) continue;
assert(lo);
lower[0] = lo;
Token const* foo = this->getToken(0);
upper.back() = root->upper_bound(foo,lower.size());
break;
}
}
return wid < root->indexSize;
}
else
{
if (upper.back() == root->arrayEnd())
return false;
tsa::ArrayEntry L(root,lower.back());
tsa::ArrayEntry U(root,upper.back());
// display(root->corpus->getToken(L),"L1");
// display(root->corpus->getToken(U),"U1");
int x = root->corpus->cmp(U,L,lower.size()-1);
// cerr << "x=" << x << endl;
if (x != 1)
return false;
lower.back() = upper.back();
// display(root->corpus->getToken(U),"L2");
Token const* foo = this->getToken(0);
// display(foo,"F!");
upper.back() = root->upper_bound(foo,lower.size());
return true;
}
}
// ---------------------------------------------------------------------------
// UP
//---------------------------------------------------------------------------
template<typename Token>
bool
TSA_tree_iterator<Token>::
up()
{
if (lower.size())
{
lower.pop_back();
upper.pop_back();
return true;
}
else
return false;
}
// ---------------------------------------------------------------------------
// CONSTRUCTORS
//----------------------------------------------------------------------------
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s)
: root(s)
{};
// ---------------------------------------------------------------------------
#if 0
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s,
TokenIndex const& V,
string const& key)
: root(s)
{
istringstream buf(key); string w;
while (buf >> w)
{
if (this->extend(V[w]))
continue;
else
{
lower.clear();
upper.clear();
break;
}
}
};
// ---------------------------------------------------------------------------
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA_tree_iterator<Token> const& other)
: root(other.root)
{
lower = other.lower;
upper = other.upper;
};
// ---------------------------------------------------------------------------
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, Token const& t)
: root(s)
{
char const* up = root->getUpperBound(t.id());
if (!up) return;
lower.push_back(root->getLowerBound(t.id()));
upper.push_back(up);
};
// ---------------------------------------------------------------------------
template<typename Token>
TSA_tree_iterator<Token>::
TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, Token const* kend)
: root(s)
{
for (;kstart != kend; kstart = kstart->next())
if (!extend(*kstart))
break;
if (kstart != kend)
{
lower.clear();
upper.clear();
}
};
#endif
// ---------------------------------------------------------------------------
// EXTEND
// ---------------------------------------------------------------------------
template<typename Token>
bool
TSA_tree_iterator<Token>::
extend(id_type const id)
{
return extend(Token(id));
}
template<typename Token>
bool
TSA_tree_iterator<Token>::
extend(Token const& t)
{
if (lower.size())
{
char const* lo = lower.back();
char const* hi = upper.back();
lo = root->find_start(lo, hi, &t, 1, lower.size());
if (!lo) return false;
lower.push_back(lo);
hi = root->find_end(lo, hi, getToken(-1), 1, lower.size()-1);
upper.push_back(hi);
}
else
{
char const* lo = root->getLowerBound(t.id());
char const* hi = root->getUpperBound(t.id());
if (lo==hi) return false;
lo = root->find_start(lo, hi, &t, 1, lower.size());
if (!lo) return false;
lower.push_back(lo);
#if 0
tsa::ArrayEntry I;
root->readEntry(lo,I);
cout << I.sid << " " << I.offset << endl;
cout << root->corpus->sntLen(I.sid) << endl;
#endif
hi = root->find_end(lo, hi, getToken(0), 1, 0);
upper.push_back(hi);
}
return true;
};
// ---------------------------------------------------------------------------
template<typename Token>
size_t
TSA_tree_iterator<Token>::
size() const
{
return lower.size();
}
// ---------------------------------------------------------------------------
template<typename Token>
id_type
TSA_tree_iterator<Token>::
getSid() const
{
char const* p = (lower.size() ? lower.back() : root->startArray);
char const* q = (upper.size() ? upper.back() : root->endArray);
id_type sid;
root->readSid(p,q,sid);
return sid;
}
// ---------------------------------------------------------------------------
template<typename Token>
uint64_t
TSA_tree_iterator<Token>::
getPid(int p) const
{
if (p < 0) p += upper.size();
char const* lb = lower_bound(p);
char const* ub = upper_bound(p);
uint64_t sid,off;
root->readOffset(root->readSid(lb,ub,sid),ub,off);
uint64_t ret = (sid<<32) + (off<<16) + uint64_t(p+1);
return ret;
}
// ---------------------------------------------------------------------------
template<typename Token>
char const*
TSA_tree_iterator<Token>::
lower_bound(int p) const
{
if (p < 0) p += lower.size();
assert(p >= 0 && p < int(lower.size()));
return lower[p];
}
// ---------------------------------------------------------------------------
template<typename Token>
char const*
TSA_tree_iterator<Token>::
upper_bound(int p) const
{
if (p < 0) p += upper.size();
assert(p >= 0 && p < int(upper.size()));
return upper[p];
}
// ---------------------------------------------------------------------------
/* @return a pointer to the position in the corpus
* where this->wid(p) is read from
*/
template<typename Token>
Token const*
TSA_tree_iterator<Token>::
getToken(int p) const
{
if (lower.size()==0) return NULL;
tsa::ArrayEntry A(root,lower.back());
Token const* t = root->corpus->getToken(A); assert(t);
#ifndef NDEBUG
Token const* bos = root->corpus->sntStart(A.sid);
Token const* eos = root->corpus->sntEnd(A.sid);
#endif
if (p < 0) p += lower.size();
// cerr << p << ". " << t->id() << endl;
while (p-- > 0)
{
t = next(t);
// if (t) cerr << p << ". " << t->id() << endl;
assert(t >= bos && t < eos);
}
return t;
}
// ---------------------------------------------------------------------------
template<typename Token>
size_t
TSA_tree_iterator<Token>::
sntCnt(int p) const
{
if (p < 0)
p = lower.size()+p;
assert(p>=0);
if (lower.size() == 0) return root->getCorpusSize();
return reinterpret_cast<TSA<Token> const* const>(root)->sntCnt(lower[p],upper[p]);
}
// ---------------------------------------------------------------------------
template<typename Token>
size_t
TSA_tree_iterator<Token>::
rawCnt(int p) const
{
if (p < 0)
p = lower.size()+p;
assert(p>=0);
if (lower.size() == 0) return root->getCorpusSize();
return root->rawCnt(lower[p],upper[p]);
}
//---------------------------------------------------------------------------
template<typename Token>
count_type
TSA_tree_iterator<Token>::
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const
{
return markSentences(bitset);
}
//---------------------------------------------------------------------------
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const
{
assert(root && root->corpus);
bitset.resize(root->corpus->size());
bitset.reset();
if (lower.size()==0) return 0;
char const* lo = lower.back();
char const* up = upper.back();
char const* p = lo;
id_type sid;
ushort off;
count_type wcount=0;
while (p < up)
{
p = root->readSid(p,up,sid);
p = root->readOffset(p,up,off);
bitset.set(sid);
wcount++;
}
return wcount;
}
//---------------------------------------------------------------------------
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset, bool markOnlyStartPosition) const
{
assert(root && root->corpus);
if (bitset.size() != root->corpus->numTokens())
bitset.resize(root->corpus->numTokens());
bitset.reset();
if (lower.size()==0) return 0;
char const* lo = lower.back();
char const* up = upper.back();
return root->markOccurrences(lo,up,lower.size(),bitset,markOnlyStartPosition);
}
//---------------------------------------------------------------------------
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markOccurrences(vector<ushort>& dest) const
{
assert(root && root->corpus);
assert(dest.size() == root->corpus->numTokens());
if (lower.size()==0) return 0;
char const* lo = lower.back();
char const* up = upper.back();
char const* p = lo;
id_type sid;
ushort off;
count_type wcount=0;
Token const* crpStart = root->corpus->sntStart(0);
while (p < up)
{
p = root->readSid(p,up,sid);
p = root->readOffset(p,up,off);
Token const* t = root->corpus->sntStart(sid)+off;
for (size_t i = 1; i < lower.size(); ++i, t = t->next());
dest[t-crpStart]++;
wcount++;
}
return wcount;
}
//---------------------------------------------------------------------------
// mark all endpoints of instances of the path represented by this
// iterator in the sentence [start,stop)
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markEndOfSequence(Token const* start, Token const* stop,
boost::dynamic_bitset<uint64_t>& dest) const
{
count_type matchCount=0;
Token const* a = getToken(0);
for (Token const* x = start; x < stop; ++x)
{
if (*x != *a) continue;
Token const* y = x;
Token const* b = a;
size_t i;
for (i = 0; *b==*y && ++i < this->size();)
{
b = b->next();
y = y->next();
if (y < start || y >= stop) break;
}
if (i == this->size())
{
dest.set(y-start);
++matchCount;
}
}
return matchCount;
}
//---------------------------------------------------------------------------
// mark all occurrences of the sequence represented by this
// iterator in the sentence [start,stop)
template<typename Token>
count_type
TSA_tree_iterator<Token>::
markSequence(Token const* start,
Token const* stop,
bitvector& dest) const
{
count_type numMatches=0;
Token const* a = getToken(0);
for (Token const* x = start; x < stop; ++x)
{
if (*x != *a) continue;
Token const* y = x;
Token const* b = a;
size_t i;
for (i = 0; *b==*y && i++ < this->size();)
{
dest.set(y-start);
b = b->next();
y = y->next();
if (y < start || y >= stop) break;
}
if (i == this->size()) ++numMatches;
}
return numMatches;
}
//---------------------------------------------------------------------------
template<typename Token>
uint64_t
TSA_tree_iterator<Token>::
getSequenceId() const
{
if (this->size() == 0) return 0;
char const* p = this->lower_bound(-1);
typename Token::ArrayEntry I;
root->readEntry(p,I);
return (uint64_t(I.sid)<<32)+(I.offset<<16)+this->size();
}
template<typename Token>
string
TSA_tree_iterator<Token>::
str(TokenIndex const* V, int start, int stop) const
{
if (this->size()==0) return "";
if (start < 0) start = this->size()+start;
if (stop <= 0) stop = this->size()+stop;
assert(start>=0 && start < int(this->size()));
assert(stop > 0 && stop <= int(this->size()));
Token const* x = this->getToken(0);
ostringstream buf;
for (int i = start; i < stop; ++i, x = x->next())
{
assert(x);
buf << (i > start ? " " : "");
if (V) buf << (*V)[x->id()];
else buf << x->id();
}
return buf.str();
}
#if 0
template<typename Token>
string
TSA_tree_iterator<Token>::
str(Vocab const& V, int start, int stop) const
{
if (this->size()==0) return "";
if (start < 0) start = this->size()+start;
if (stop <= 0) stop = this->size()+stop;
assert(start>=0 && start < int(this->size()));
assert(stop > 0 && stop <= int(this->size()));
Token const* x = this->getToken(0);
ostringstream buf;
for (int i = start; i < stop; ++i, x = x->next())
{
assert(x);
buf << (i > start ? " " : "");
buf << V[x->id()].str;
}
return buf.str();
}
#endif
/// @return true if the sentence [start,stop) contains the sequence
template<typename Token>
bool
TSA_tree_iterator<Token>::
match(Token const* start, Token const* stop) const
{
Token const* a = getToken(0);
for (Token const* t = start; t < stop; ++t)
{
if (*t != *a) continue;
Token const* b = a;
Token const* y = t;
size_t i;
for (i = 1; i < lower.size(); ++i)
{
y = y->next();
if (y < start || y >= stop) break;
b = b->next();
if (*b != *y) break;
}
if (i == lower.size()) return true;
}
return false;
}
/// @return true if the sentence /sid/ contains the sequence
template<typename Token>
bool
TSA_tree_iterator<Token>::
match(id_type sid) const
{
return match(root->corpus->sntStart(sid),root->corpus->sntEnd(sid));
}
/// a special auxiliary function for finding trees
// @param sntcheck: number of roots in the respective sentence
// @param dest: bitvector to keep track of the exact root location
template<typename Token>
void
TSA_tree_iterator<Token>::
tfAndRoot(bitvector const& ref, // reference root positions
bitvector const& snt, // relevant sentences
bitvector& dest) const
{
tsa::ArrayEntry I(lower.back());
Token const* crpStart = root->corpus->sntStart(0);
do
{
root->readEntry(I.next,I);
if (!snt.test(I.sid)) continue; // skip, no root there
// find my endpoint:
Token const* t = root->corpus->getToken(I)->next(lower.size()-1);
assert(t >= crpStart);
size_t p = t-crpStart;
if (ref.test(p)) // it's a valid root
dest.set(p);
} while (I.next != upper.back());
}
// @param bv: bitvector with bits set for selected sentences
// @return: reference to bv
template<typename Token>
bitvector&
TSA_tree_iterator<Token>::
filterSentences(bitvector& bv) const
{
float aveSntLen = root->corpus->numTokens()/root->corpus->size();
size_t ANDcost = bv.size()/8; // cost of dest&=ref;
float aveEntrySize = ((root->endArray-root->startArray)
/root->corpus->numTokens());
if (arrayByteSpanSize()+ANDcost < aveEntrySize*aveSntLen*bv.count())
{
bitvector tmp(bv.size());
markSentences(tmp);
bv &= tmp;
}
else
{
for (size_t i = bv.find_first(); i < bv.size(); i = bv.find_next(i))
if (!match(i)) bv.reset(i);
}
return bv;
}
inline
size_t
randInt(size_t N)
{
return size_t(N*(rand()/(RAND_MAX+1.)));
}
/// randomly select up to N occurrences of the sequence
template<typename Token>
sptr<vector<typename ttrack::Position> >
TSA_tree_iterator<Token>::
randomSample(int level, size_t N) const
{
if (level < 0) level += lower.size();
assert(level >=0);
sptr<vector<typename ttrack::Position> >
ret(new vector<typename ttrack::Position>(N));
size_t m=0; // number of samples selected so far
typename Token::ArrayEntry I(lower.at(level));
char const* stop = upper.at(level);
while (m < N && I.next < stop)
{
root->readEntry(I.next,I);
// t: expected number of remaining samples
double t = (stop - I.pos)/root->aveIndexEntrySize();
double r = t*rand()/(RAND_MAX+1.);
if (r < N-m)
{
ret->at(m).offset = I.offset;
ret->at(m++).sid = I.sid;
}
}
ret->resize(m);
return ret;
}
} // end of namespace ugdiss
#endif

View File

@ -0,0 +1,55 @@
// Memory-mapped corpus track
// (c) Ulrich Germann. All rights reserved
#include <sstream>
#include "ug_mm_ttrack.h"
#include "tpt_pickler.h"
namespace ugdiss
{
using namespace std;
#if 0
template<>
id_type
Ttrack<id_type>::
toID(id_type const& t)
{
return t;
}
#endif
/** @return string representation of sentence /sid/ */
template<>
string
Ttrack<id_type>::
str(id_type sid, TokenIndex const& T) const
{
assert(sid < numTokens());
id_type const* stop = sntEnd(sid);
id_type const* strt = sntStart(sid);
ostringstream buf;
if (strt < stop) buf << T[*strt];
while (++strt < stop)
buf << " " << T[*strt];
return buf.str();
}
#if 0
template<>
string
Ttrack<id_type>::
str(id_type sid, Vocab const& V) const
{
assert(sid < numTokens());
id_type const* stop = sntEnd(sid);
id_type const* strt = sntStart(sid);
ostringstream buf;
if (strt < stop) buf << V[*strt].str;
while (++strt < stop)
buf << " " << V[*strt].str;
return buf.str();
}
#endif
}

348
moses/mm/ug_ttrack_base.h Normal file
View File

@ -0,0 +1,348 @@
// -*- c++ -*-
// Base class for corpus tracks. mmTtrack (memory-mapped Ttrack) and imTtrack (in-memory Ttrack)
// are derived from this class.
// This code is part of a refactorization of the earlier Ttrack class as a template class for
// tokens of arbitrary fixed-length size.
// (c) 2007-2009 Ulrich Germann. All rights reserved.
#ifndef __ug_ttrack_base
#define __ug_ttrack_base
#include <string>
#include <vector>
#include <boost/dynamic_bitset.hpp>
#include "ug_ttrack_position.h"
#include "tpt_typedefs.h"
#include "tpt_tokenindex.h"
// #include "ug_vocab.h"
namespace ugdiss
{
using namespace std;
typedef boost::dynamic_bitset<uint64_t> bdBitset;
template<typename TKN=id_type>
class Ttrack
{
protected:
id_type numSent;
id_type numWords;
public:
virtual ~Ttrack() {};
typedef typename ttrack::Position Position;
typedef TKN Token;
/** @return a pointer to beginning of sentence /sid/ */
virtual
TKN const*
sntStart(size_t sid) const = 0;
/** @return end point of sentence /sid/ */
virtual
TKN const*
sntEnd(size_t sid) const = 0;
TKN const*
getToken(Position const& p) const;
template<typename T>
T const*
getTokenAs(Position const& p) const
{ return reinterpret_cast<T const*>(getToken(p)); }
template<typename T>
T const*
sntStartAs(id_type sid) const
{ return reinterpret_cast<T const*>(sntStart(sid)); }
template<typename T>
T const*
sntEndAs(id_type sid) const
{ return reinterpret_cast<T const*>(sntEnd(sid)); }
/** @return length of sentence /sid/ */
size_t sntLen(size_t sid) const { return sntEnd(sid) - sntStart(sid); }
size_t
startPos(id_type sid) const { return sntStart(sid)-sntStart(0); }
size_t
endPos(id_type sid) const { return sntEnd(sid)-sntStart(0); }
/** Don't use this unless you want a copy of the sentence */
vector<TKN>
operator[](id_type sid) const
{
return vector<TKN>(sntStart(sid),sntEnd(sid));
}
/** @return size of corpus in number of sentences */
virtual size_t size() const = 0;
/** @return size of corpus in number of words/tokens */
virtual size_t numTokens() const = 0;
/** @return string representation of sentence /sid/
* Currently only defined for Ttrack<id_type> */
string str(id_type sid, TokenIndex const& T) const;
// /** @return string representation of sentence /sid/
// * Currently only defined for Ttrack<id_type> */
// string str(id_type sid, Vocab const& V) const;
/** counts the tokens in the corpus; used for example in the construction of
* token sequence arrays */
count_type count_tokens(vector<count_type>& cnt, bdBitset const& filter,
int lengthCutoff=0, ostream* log=NULL) const;
// static id_type toID(TKN const& t);
int cmp(Position const& A, Position const& B, int keyLength) const;
int cmp(Position const& A, TKN const* keyStart, int keyLength=-1,
int depth=0) const;
virtual id_type findSid(TKN const* t) const = 0; // find the sentence id of a given token
// virtual id_type findSid(id_type TokenOffset) const = 0; // find the sentence id of a given token
// the following three functions are currently not used by any program ... (deprecate?)
TKN const*
find_next_within_sentence(TKN const* startKey,
int keyLength,
Position startHere) const;
Position
find_first(TKN const* startKey, int keyLength,
bdBitset const* filter=NULL) const;
Position
find_next(TKN const* startKey, int keyLength, Position startAfter,
bdBitset const* filter=NULL) const;
virtual size_t offset(TKN const* t) const { return t-sntStart(0); }
};
// ---------------------------------------------------------------------------
template<typename TKN>
TKN const*
Ttrack<TKN>::
getToken(Position const& p) const
{
TKN const* ret = sntStart(p.sid)+p.offset;
return (ret < sntEnd(p.sid)) ? ret : NULL;
}
// ---------------------------------------------------------------------------
template<typename TKN>
count_type
Ttrack<TKN>::
count_tokens(vector<count_type>& cnt, bdBitset const& filter,
int lengthCutoff, ostream* log) const
{
cnt.clear();
cnt.reserve(500000);
count_type totalCount=0;
int64_t expectedTotal=numTokens();
for (size_t sid = filter.find_first();
sid < filter.size();
sid = filter.find_next(sid))
{
TKN const* k = sntStart(sid);
TKN const* const stop = sntEnd(sid);
if (lengthCutoff && stop-k >= lengthCutoff)
{
if (log)
*log << "WARNING: skipping sentence #" << sid
<< " with more than 65536 tokens" << endl;
expectedTotal -= stop-k;
}
else
{
totalCount += stop-k;
for (; k < stop; ++k)
{
// cout << sid << " " << stop-k << " " << k->lemma << " " << k->id() << " " << sizeof(*k) << endl;
id_type wid = k->id();
while (wid >= cnt.size()) cnt.push_back(0);
cnt[wid]++;
}
}
}
if (this->size() == filter.count())
{
if (totalCount != expectedTotal)
cerr << "OOPS: expected " << expectedTotal
<< " tokens but counted " << totalCount << endl;
assert(totalCount == expectedTotal);
}
return totalCount;
}
template<typename TKN>
int
Ttrack<TKN>::
cmp(Position const& A, Position const& B, int keyLength) const
{
if (keyLength==0) return 2;
assert(A.sid < this->size());
assert(B.sid < this->size());
TKN const* a = getToken(A);
TKN const* bosA = sntStart(A.sid);
TKN const* eosA = sntEnd(A.sid);
TKN const* b = getToken(B);
TKN const* bosB = sntStart(B.sid);
TKN const* eosB = sntEnd(B.sid);
int ret=-1;
#if 0
cerr << "A: "; for (TKN const* x = a; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
cerr << "B: "; for (TKN const* x = b; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
#endif
while (a >= bosA && a < eosA)
{
// cerr << keyLength << "a. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
if (*a < *b) { break; } // return -1;
if (*a > *b) { ret = 2; break; } // return 2;
a = next(a);
b = next(b);
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
if (--keyLength==0 || b < bosB || b >= eosB)
{
ret = (a < bosA || a >= eosA) ? 0 : 1;
break;
}
}
// cerr << "RETURNING " << ret << endl;
return ret;
}
template<typename TKN>
int
Ttrack<TKN>::
cmp(Position const& A, TKN const* key, int keyLength, int depth) const
{
if (keyLength==0 || !key) return 2;
assert(A.sid < this->size());
TKN const* x = getToken(A);
TKN const* stopx = x->stop(*this,A.sid);
for (int i = 0; i < depth; ++i)
{
x = x->next();
if (x == stopx) return -1;
// assert(x != stopx);
}
while (x != stopx)
{
if (*x < *key) return -1;
if (*x > *key) return 2;
key = key->next();
x = x->next();
if (--keyLength==0) // || !key)
return (x == stopx) ? 0 : 1;
assert(key);
}
return -1;
}
template<typename TKN>
TKN const*
Ttrack<TKN>::
find_next_within_sentence(TKN const* startKey, int keyLength,
Position startHere) const
{
for (TKN const* t = getToken(startHere); t; t = getToken(startHere))
{
#if 0
int foo = cmp(startHere,startKey,1);
if (foo == 0 || foo ==1)
{
TKN const* k = startKey->next();
TKN const* t2 = t->next();
if (t2)
{
cout << t2->lemma << "." << int(t2->minpos) << " "
<< k->lemma << "." << int(k->minpos) << " "
<< t2->cmp(*k) << endl;
}
}
#endif
int x = cmp(startHere,startKey,keyLength,0);
if (x == 0 || x == 1) return t;
startHere.offset++;
}
return NULL;
}
template<typename TKN>
typename Ttrack<TKN>::Position
Ttrack<TKN>::
find_first(TKN const* startKey, int keyLength, bdBitset const* filter) const
{
if (filter)
{
for (size_t sid = filter->find_first();
sid < filter->size();
sid = filter->find_next(sid))
{
TKN const* x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
if (x) return Position(sid,x-sntStart(sid));
}
}
else
{
for (size_t sid = 0; sid < this->size(); ++sid)
{
TKN const* x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
if (x) return Position(sid,x-sntStart(sid));
}
}
return Position(this->size(),0);
}
template<typename TKN>
typename Ttrack<TKN>::Position
Ttrack<TKN>::
find_next(TKN const* startKey, int keyLength, Position startAfter, bdBitset const* filter) const
{
id_type sid = startAfter.sid;
startAfter.offset++;
if (filter) assert(filter->test(sid));
TKN const* x = find_next_within_sentence(startKey,keyLength,startAfter);
if (x) return Position(sid,x -sntStart(sid));
if (filter)
{
for (sid = filter->find_next(sid); sid < filter->size(); sid = filter->find_next(sid))
{
x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
if (x) break;
}
}
else
{
for (++sid; sid < this->size(); sid++)
{
x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
if (x) break;
}
}
if (x)
return Position(sid,x-sntStart(sid));
else
return Position(this->size(),0);
}
}
#endif

View File

@ -0,0 +1,9 @@
#include "ug_ttrack_position.h"
namespace ugdiss
{
namespace ttrack
{
Position::Position() : sid(0), offset(0) {};
Position::Position(id_type _sid, ushort _off) : sid(_sid), offset(_off) {};
}
}

View File

@ -0,0 +1,89 @@
// -*- c++ -*-
#ifndef __ug_ttrack_position_h
#define __ug_ttrack_position_h
#include <cassert>
#include "ug_typedefs.h"
// A token position in a Ttrack, with a LESS functor for comparing token
// positions in whatever sorting order the underlying token type implies.
//
// (c) 2007-2010 Ulrich Germann. All rights reserved.
namespace ugdiss
{
namespace ttrack
{
/** Represents a position in a corpus (sentence Id + offset from beginning
* of sentence) */
class
Position
{
public:
id_type sid;
ushort offset;
Position();
Position(id_type _sid, ushort _off);
template<typename TTRACK_TYPE> class LESS; // probably abandoned
}; // end of deklaration of Position
#if 1
template<typename TTRACK_TYPE>
class
Position::
LESS
{
TTRACK_TYPE const* c;
public:
typedef typename TTRACK_TYPE::Token Token;
LESS(TTRACK_TYPE const* crp) : c(crp) {};
bool operator()(Position const& A, Position const& B) const
{
Token const* a = c->getToken(A); assert(a);
Token const* b = c->getToken(B); assert(b);
if (a == b) return false;
Token const* bosA = c->sntStart(A.sid);
Token const* eosA = c->sntEnd(A.sid);
Token const* bosB = c->sntStart(B.sid);
Token const* eosB = c->sntEnd(B.sid);
#if 0
Token const* z = a;
cout << "A: " << z->id();
for (z = next(z); z >= bosA && z < eosA; z = next(z))
cout << "-" << z->id();
cout << endl;
z = b;
cout << "B: " << z->id();
for (z = next(z); z >= bosB && z < eosB; z = next(z))
cout << "-" << z->id();
cout << endl;
#endif
while (*a == *b)
{
a = next(a);
b = next(b);
if (a < bosA || a >= eosA)
return (b >= bosB && b < eosB);
if (b < bosB || b >= eosB)
return false;
}
int x = a->cmp(*b);
// cout << " " << (x < 0 ? "YES" : "NO") << endl;
assert (x != 0);
return x < 0;
}
}; // end of definition of LESS
#endif
} // end of namespace ttrack
} // end of namespace ugdiss
#endif

36
moses/mm/ug_typedefs.h Normal file
View File

@ -0,0 +1,36 @@
// -*- c++ -*-
// typedefs for Uli Germann's stuff
#ifndef __ug_typedefs_h
#define __ug_typedefs_h
#include <boost/dynamic_bitset.hpp>
#include <boost/shared_ptr.hpp>
#include <boost/scoped_ptr.hpp>
#include <vector>
#include <stdint.h>
#include "tpt_typedefs.h"
namespace ugdiss
{
using namespace std;
typedef boost::dynamic_bitset<uint64_t> bitvector;
typedef vector<vector<float> > flt_2d_table;
typedef vector<flt_2d_table> flt_3d_table;
typedef vector<flt_3d_table> flt_4d_table;
typedef vector<vector<ushort> > ushort_2d_table;
typedef vector<ushort_2d_table> ushort_3d_table;
typedef vector<ushort_3d_table> ushort_4d_table;
typedef vector<vector<short> > short_2d_table;
typedef vector<short_2d_table> short_3d_table;
typedef vector<short_3d_table> short_4d_table;
typedef vector<vector<int> > int_2d_table;
typedef vector<int_2d_table> int_3d_table;
typedef vector<int_3d_table> int_4d_table;
}
#define sptr boost::shared_ptr
#define scoptr boost::scoped_ptr
#define rcast reinterpret_cast
#endif