mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 12:52:29 +03:00
Initial check-in.
This commit is contained in:
parent
db849ce2f5
commit
e3ba4a9890
156
moses/TranslationModel/mmsapt.cpp
Normal file
156
moses/TranslationModel/mmsapt.cpp
Normal file
@ -0,0 +1,156 @@
|
||||
#include "mmsapt.h"
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/tokenizer.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
using namespace bitext;
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
|
||||
void
|
||||
parseLine(string const& line, map<string,string> & params)
|
||||
{
|
||||
char_separator<char> sep("; ");
|
||||
tokenizer<char_separator<char> > tokens(line,sep);
|
||||
BOOST_FOREACH(string const& t,tokens)
|
||||
{
|
||||
size_t i = t.find_first_not_of(" =");
|
||||
size_t j = t.find_first_of(" =",i+1);
|
||||
size_t k = t.find_first_not_of(" =",j+1);
|
||||
assert(i != string::npos);
|
||||
assert(k != string::npos);
|
||||
params[t.substr(i,j)] = t.substr(k);
|
||||
}
|
||||
}
|
||||
|
||||
Mmsapt::
|
||||
Mmsapt(string const& description, string const& line)
|
||||
: PhraseDictionary(description,line)
|
||||
{
|
||||
this->init(line);
|
||||
}
|
||||
|
||||
Mmsapt::
|
||||
Mmsapt(string const& line)
|
||||
: PhraseDictionary("Mmsapt",line)
|
||||
{
|
||||
this->init(line);
|
||||
}
|
||||
|
||||
void
|
||||
Mmsapt::
|
||||
init(string const& line)
|
||||
{
|
||||
map<string,string> param;
|
||||
parseLine(line,param);
|
||||
bname = param["base"];
|
||||
L1 = param["L1"];
|
||||
L2 = param["L2"];
|
||||
assert(bname.size());
|
||||
assert(L1.size());
|
||||
assert(L2.size());
|
||||
map<string,string>::const_iterator m;
|
||||
m = param.find("smooth");
|
||||
lbop_parameter = m != param.end() ? atof(m->second.c_str()) : .05;
|
||||
m = param.find("max-samples");
|
||||
default_sample_size = m != param.end() ? atoi(m->second.c_str()) : 1000;
|
||||
this->m_numScoreComponents = atoi(param["num-features"].c_str());
|
||||
// num_features = 0;
|
||||
m = param.find("ifactor");
|
||||
input_factor = m != param.end() ? atoi(m->second.c_str()) : 0;
|
||||
}
|
||||
|
||||
void
|
||||
Mmsapt::
|
||||
Load()
|
||||
{
|
||||
bt.open(bname, L1, L2);
|
||||
size_t num_feats;
|
||||
num_feats = calc_pfwd.init(0,lbop_parameter);
|
||||
num_feats = calc_pbwd.init(num_feats,lbop_parameter);
|
||||
num_feats = calc_lex.init(num_feats, bname + L1 + "-" + L2 + ".lex");
|
||||
num_feats = apply_pp.init(num_feats);
|
||||
assert (num_feats == this->m_numScoreComponents);
|
||||
// cerr << "MMSAPT provides " << num_feats << " features at "
|
||||
// << __FILE__ << ":" << __LINE__ << endl;
|
||||
}
|
||||
|
||||
|
||||
// this is not the most efficient way of phrase lookup!
|
||||
TargetPhraseCollection const*
|
||||
Mmsapt::
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const
|
||||
{
|
||||
TSA<Token>::tree_iterator m(bt.I1.get());
|
||||
for (size_t i = 0; i < src.GetSize(); ++i)
|
||||
{
|
||||
Factor const* f = src.GetFactor(i,input_factor);
|
||||
id_type wid = (*bt.V1)[f->ToString()];
|
||||
// cout << (*bt.V1)[wid] << " ";
|
||||
if (!m.extend(wid)) break;
|
||||
}
|
||||
#if 0
|
||||
cout << endl;
|
||||
Token const* sphrase = m.getToken(0);
|
||||
for (size_t i = 0; i < m.size(); ++i)
|
||||
cout << (*bt.V1)[sphrase[i].id()] << " ";
|
||||
cout << endl;
|
||||
#endif
|
||||
|
||||
sptr<pstats> s;
|
||||
if (m.size() < src.GetSize()) return NULL;
|
||||
{
|
||||
boost::lock_guard<boost::mutex> guard(this->lock);
|
||||
s = bt.lookup(m);
|
||||
}
|
||||
PhrasePair pp; pp.init(m.getPid(), *s, this->m_numScoreComponents);
|
||||
TargetPhraseCollection* ret = new TargetPhraseCollection();
|
||||
|
||||
vector<FactorType> ofact(1,0);
|
||||
boost::unordered_map<uint64_t,jstats>::const_iterator t;
|
||||
for (t = s->trg.begin(); t != s->trg.end(); ++t)
|
||||
{
|
||||
pp.update(t->first,t->second);
|
||||
calc_pfwd(bt,pp);
|
||||
calc_pbwd(bt,pp);
|
||||
calc_lex (bt,pp);
|
||||
apply_pp (bt,pp);
|
||||
|
||||
uint32_t sid,off,len;
|
||||
parse_pid(t->first,sid,off,len);
|
||||
size_t stop = off + len;
|
||||
Token const* x = bt.T2->sntStart(sid);
|
||||
|
||||
TargetPhrase* tp = new TargetPhrase();
|
||||
for (size_t k = off; k < stop; ++k)
|
||||
{
|
||||
StringPiece wrd = (*bt.V2)[x[k].id()];
|
||||
Word w; w.CreateFromString(Output,ofact,wrd,false);
|
||||
tp->AddWord(w);
|
||||
}
|
||||
tp->GetScoreBreakdown().Assign(this,pp.fvals);
|
||||
tp->Evaluate(src);
|
||||
ret->Add(tp);
|
||||
}
|
||||
ret->NthElement(m_tableLimit);
|
||||
#if 0
|
||||
sort(ret->begin(), ret->end(), CompareTargetPhrase());
|
||||
cout << "SOURCE PHRASE: " << src << endl;
|
||||
size_t i = 0;
|
||||
for (TargetPhraseCollection::iterator r = ret->begin(); r != ret->end(); ++r)
|
||||
{
|
||||
cout << ++i << " " << **r << endl;
|
||||
}
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
ChartRuleLookupManager*
|
||||
Mmsapt::
|
||||
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &)
|
||||
{
|
||||
throw "CreateRuleLookupManager is currently not supported in Moses!";
|
||||
}
|
||||
|
||||
}
|
73
moses/TranslationModel/mmsapt.h
Normal file
73
moses/TranslationModel/mmsapt.h
Normal file
@ -0,0 +1,73 @@
|
||||
// -*- c++ -*-
|
||||
// Sampling phrase table implementation based on memory-mapped suffix arrays.
|
||||
// Design and code by Ulrich Germann.
|
||||
#pragma once
|
||||
|
||||
#include <boost/thread.hpp>
|
||||
|
||||
#include "moses/generic/sorting/VectorIndexSorter.h"
|
||||
#include "moses/generic/sampling/Sampling.h"
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
|
||||
#include "moses/mm/ug_mm_ttrack.h"
|
||||
#include "moses/mm/ug_mm_tsa.h"
|
||||
#include "moses/mm/tpt_tokenindex.h"
|
||||
#include "moses/mm/ug_corpus_token.h"
|
||||
#include "moses/mm/ug_typedefs.h"
|
||||
#include "moses/mm/tpt_pickler.h"
|
||||
#include "moses/mm/ug_bitext.h"
|
||||
#include "moses/mm/ug_lexical_phrase_scorer2.h"
|
||||
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "moses/FactorTypeSet.h"
|
||||
#include "moses/TargetPhrase.h"
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include "moses/TargetPhraseCollection.h"
|
||||
#include <map>
|
||||
|
||||
#include "PhraseDictionary.h"
|
||||
|
||||
using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
using namespace bitext;
|
||||
class Mmsapt : public PhraseDictionary
|
||||
{
|
||||
|
||||
typedef L2R_Token<SimpleWordId> Token;
|
||||
typedef mmBitext<Token> mmbitext;
|
||||
mmbitext bt;
|
||||
|
||||
// string description;
|
||||
string bname;
|
||||
string L1;
|
||||
string L2;
|
||||
float lbop_parameter;
|
||||
size_t default_sample_size;
|
||||
// size_t num_features;
|
||||
size_t input_factor;
|
||||
size_t output_factor; // we can actually return entire Tokens!
|
||||
// built-in feature functions
|
||||
PScorePfwd<Token> calc_pfwd;
|
||||
PScorePbwd<Token> calc_pbwd;
|
||||
PScoreLex<Token> calc_lex; // this one I'd like to see as an external ff eventually
|
||||
PScorePP<Token> apply_pp; // apply phrase penalty
|
||||
void init(string const& line);
|
||||
mutable boost::mutex lock;
|
||||
public:
|
||||
Mmsapt(string const& description, string const& line);
|
||||
Mmsapt(string const& line);
|
||||
void
|
||||
Load();
|
||||
|
||||
TargetPhraseCollection const*
|
||||
GetTargetPhraseCollectionLEGACY(const Phrase& src) const;
|
||||
|
||||
//! Create a sentence-specific manager for SCFG rule lookup.
|
||||
ChartRuleLookupManager*
|
||||
CreateRuleLookupManager(const ChartParser &, const ChartCellCollectionBase &);
|
||||
|
||||
private:
|
||||
};
|
||||
} // end namespace
|
||||
|
57
moses/generic/file_io/ug_stream.cpp
Normal file
57
moses/generic/file_io/ug_stream.cpp
Normal file
@ -0,0 +1,57 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2006,2007,2008 Ulrich Germann
|
||||
// makes opening files a little more convenient
|
||||
|
||||
#include "ug_stream.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
using namespace boost::iostreams;
|
||||
|
||||
filtering_istream*
|
||||
open_input_stream(string fname)
|
||||
{
|
||||
filtering_istream* ret = new filtering_istream();
|
||||
open_input_stream(fname,*ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
filtering_ostream*
|
||||
open_output_stream(string fname)
|
||||
{
|
||||
filtering_ostream* ret = new filtering_ostream();
|
||||
open_output_stream(fname,*ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
open_input_stream(string fname, filtering_istream& in)
|
||||
{
|
||||
if (fname.size()>3 && fname.substr(fname.size()-3,3)==".gz")
|
||||
{
|
||||
in.push(gzip_decompressor());
|
||||
}
|
||||
else if (fname.size() > 4 && fname.substr(fname.size()-4,4)==".bz2")
|
||||
{
|
||||
in.push(bzip2_decompressor());
|
||||
}
|
||||
in.push(file_source(fname.c_str()));
|
||||
}
|
||||
|
||||
void
|
||||
open_output_stream(string fname, filtering_ostream& out)
|
||||
{
|
||||
if ((fname.size() > 3 && fname.substr(fname.size()-3,3)==".gz") ||
|
||||
(fname.size() > 4 && fname.substr(fname.size()-4,4)==".gz_"))
|
||||
{
|
||||
out.push(gzip_compressor());
|
||||
}
|
||||
else if ((fname.size() > 4 && fname.substr(fname.size()-4,4)==".bz2") ||
|
||||
(fname.size() > 5 && fname.substr(fname.size()-5,5)==".bz2_"))
|
||||
{
|
||||
out.push(bzip2_compressor());
|
||||
}
|
||||
out.push(file_sink(fname.c_str()));
|
||||
}
|
||||
}
|
37
moses/generic/file_io/ug_stream.h
Normal file
37
moses/generic/file_io/ug_stream.h
Normal file
@ -0,0 +1,37 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2006,2007,2008 Ulrich Germann
|
||||
// makes opening files a little more convenient
|
||||
|
||||
#ifndef __UG_STREAM_HH
|
||||
#define __UG_STREAM_HH
|
||||
#include <boost/iostreams/device/file.hpp>
|
||||
#include <boost/iostreams/categories.hpp> // input_filter_tag
|
||||
#include <boost/iostreams/operations.hpp> // get, WOULD_BLOCK
|
||||
#include <boost/iostreams/copy.hpp> // get, WOULD_BLOCK
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
#include <boost/iostreams/filter/bzip2.hpp>
|
||||
#include <boost/iostreams/filter/gzip.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
using namespace boost::iostreams;
|
||||
|
||||
/** open input file that is possibly compressed
|
||||
* decompression filters are automatically added based on the file name
|
||||
* gzip for .gz; bzip2 for bz2.
|
||||
*/
|
||||
filtering_istream* open_input_stream(string fname);
|
||||
void open_input_stream(string fname, filtering_istream& in);
|
||||
// filtering_streambuf<input>* open_input_stream(string fname);
|
||||
|
||||
filtering_ostream* open_output_stream(string fname);
|
||||
void open_output_stream(string fname, filtering_ostream& in);
|
||||
|
||||
|
||||
}
|
||||
#endif
|
46
moses/generic/program_options/ug_get_options.cpp
Normal file
46
moses/generic/program_options/ug_get_options.cpp
Normal file
@ -0,0 +1,46 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2009 Ulrich Germann
|
||||
// boilerplate code to declutter my usual interpret_args() routine
|
||||
#include "ug_get_options.h"
|
||||
#include <fstream>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
void
|
||||
get_options(int ac, char* av[], progopts& o, posopts& a, optsmap& vm,
|
||||
char const* cfgFileParam)
|
||||
{
|
||||
// only get named parameters from command line
|
||||
po::store(po::command_line_parser(ac,av).options(o).run(),vm);
|
||||
|
||||
if (cfgFileParam && vm.count(cfgFileParam))
|
||||
{
|
||||
string cfgFile = vm[cfgFileParam].as<string>();
|
||||
if (!cfgFile.empty())
|
||||
{
|
||||
if (!access(cfgFile.c_str(),F_OK))
|
||||
{
|
||||
ifstream cfg(cfgFile.c_str());
|
||||
po::store(po::parse_config_file(cfg,o),vm);
|
||||
}
|
||||
else
|
||||
{
|
||||
cerr << "Error: cannot find config file '"
|
||||
<< cfgFile << "'!" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// process positional args, ignoring those set in the config file
|
||||
if (a.max_total_count())
|
||||
po::store(po::command_line_parser(ac,av)
|
||||
.options(o).positional(a).run(),vm);
|
||||
po::notify(vm); // IMPORTANT
|
||||
}
|
||||
}
|
24
moses/generic/program_options/ug_get_options.h
Normal file
24
moses/generic/program_options/ug_get_options.h
Normal file
@ -0,0 +1,24 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2009 Ulrich Germann
|
||||
// boilerplate code to declutter my usual interpret_args() routine
|
||||
#ifndef __ug_get_options_h
|
||||
#define __ug_get_options_h
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
namespace po=boost::program_options;
|
||||
typedef po::options_description progopts;
|
||||
typedef po::positional_options_description posopts;
|
||||
typedef po::variables_map optsmap;
|
||||
|
||||
void
|
||||
get_options(int ac, char* av[],
|
||||
progopts & o,
|
||||
posopts & a,
|
||||
optsmap & vm,
|
||||
char const* cfgFileParam=NULL);
|
||||
|
||||
}
|
||||
#endif
|
41
moses/mm/Jamfile
Normal file
41
moses/mm/Jamfile
Normal file
@ -0,0 +1,41 @@
|
||||
exe mtt-build :
|
||||
mtt-build.cc
|
||||
$(TOP)/moses/generic//generic
|
||||
$(TOP)//boost_iostreams
|
||||
$(TOP)//boost_program_options
|
||||
$(TOP)/moses/mm//mm
|
||||
$(TOP)/util//kenutil
|
||||
;
|
||||
|
||||
exe mtt-dump :
|
||||
mtt-dump.cc
|
||||
$(TOP)/moses/generic//generic
|
||||
$(TOP)//boost_iostreams
|
||||
$(TOP)//boost_program_options
|
||||
$(TOP)/moses/mm//mm
|
||||
$(TOP)/util//kenutil
|
||||
;
|
||||
|
||||
exe symal2mam :
|
||||
symal2mam.cc
|
||||
$(TOP)/moses/generic//generic
|
||||
$(TOP)//boost_iostreams
|
||||
$(TOP)//boost_program_options
|
||||
$(TOP)/moses/mm//mm
|
||||
$(TOP)/util//kenutil
|
||||
;
|
||||
|
||||
exe custom-pt :
|
||||
custom-pt.cc
|
||||
$(TOP)/moses/generic//generic
|
||||
$(TOP)//boost_iostreams
|
||||
$(TOP)//boost_program_options
|
||||
$(TOP)/moses/mm//mm
|
||||
$(TOP)/util//kenutil
|
||||
;
|
||||
|
||||
|
||||
install $(PREFIX)/bin : mtt-build mtt-dump symal2mam custom-pt ;
|
||||
|
||||
fakelib mm : [ glob ug_*.cc tpt_*.cc ] ;
|
||||
|
100
moses/mm/Makefile
Normal file
100
moses/mm/Makefile
Normal file
@ -0,0 +1,100 @@
|
||||
# Some systems apparently distinguish between shell
|
||||
# variables and environment variables. The latter are
|
||||
# visible to the make utility, the former apparently not,
|
||||
# so we need to set them if they are not defined yet
|
||||
|
||||
# ===============================================================================
|
||||
# COMPILATION PREFERENCES
|
||||
# ===============================================================================
|
||||
# CCACHE: if set to ccache, use ccache to speed up compilation
|
||||
# OPTI: optimization level
|
||||
# PROF: profiler switches
|
||||
|
||||
CCACHE = ccache
|
||||
OPTI = 3
|
||||
EXE_TAG = exe
|
||||
PROF =
|
||||
# PROF = -g -pg
|
||||
|
||||
# ===============================================================================
|
||||
|
||||
SHELL = bash
|
||||
MAKEFLAGS += --warn-undefined-variables
|
||||
.DEFAULT_GOAL = all
|
||||
.SUFFIXES:
|
||||
|
||||
# ===============================================================================
|
||||
# COMPILATION 'LOCALIZATION'
|
||||
HOST ?= $(shell hostname)
|
||||
HOSTTYPE ?= $(shell uname -m)
|
||||
|
||||
MOSES_ROOT = ${HOME}/code/moses/master/mosesdecoder
|
||||
WDIR = build/${HOSTTYPE}/${OPTI}
|
||||
VPATH = ${HOME}/code/moses/master/mosesdecoder/
|
||||
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}
|
||||
CXXFLAGS += -DMAX_NUM_FACTORS=4
|
||||
CXXFLAGS += -DKENLM_MAX_ORDER=5
|
||||
modirs := $(addprefix -I,$(shell find ${MOSES_ROOT}/moses ${MOSES_ROOT}/contrib -type d))
|
||||
CXXFLAGS += -I${MOSES_ROOT}
|
||||
INCLUDES =
|
||||
BZLIB =
|
||||
BOOSTLIBTAG =
|
||||
|
||||
REQLIBS = m z pthread lzma ${BZLIB} \
|
||||
boost_thread${BOOSTLIBTAG} \
|
||||
boost_iostreams${BOOSTLIBTAG} \
|
||||
boost_program_options${BOOSTLIBTAG} \
|
||||
boost_system${BOOSTLIBTAG} \
|
||||
boost_filesystem${BOOSTLIBTAG}
|
||||
|
||||
# icuuc icuio icui18n \
|
||||
|
||||
LIBS = $(addprefix -l, ${REQLIBS} moses)
|
||||
LIBDIRS = -L${HOME}/code/moses/master/mosesdecoder/lib
|
||||
BINDIR = bin
|
||||
ifeq "$(OPTI)" "0"
|
||||
BINPREF = debug.
|
||||
else
|
||||
BINPREF =
|
||||
endif
|
||||
|
||||
|
||||
OBJ2 :=
|
||||
|
||||
define compile
|
||||
|
||||
DEP += ${WDIR}/$(basename $(notdir $1)).d
|
||||
${WDIR}/$(basename $(notdir $1)).o : $1 $(wildcard $(basename $1).h)
|
||||
@echo -e "COMPILING $1"
|
||||
@mkdir -p $$(@D)
|
||||
${CXX} ${CXXFLAGS} -MD -MP -c $$(abspath $$<) -o $$@
|
||||
|
||||
endef
|
||||
|
||||
programs = mtt-build mtt-dump symam2mam custom-pt mmlex-build
|
||||
|
||||
all: $(addprefix ${BINDIR}/${BINPREF}, $(programs))
|
||||
@echo $^
|
||||
clean:
|
||||
rm -f ${WDIR}/*.o ${WDIR}/*.d
|
||||
|
||||
custom-pt: ${BINDIR}/${BINPREF}custom-pt
|
||||
echo $^
|
||||
|
||||
INMOGEN = $(wildcard ${MOSES_ROOT}/moses/generic/*/*.cpp)
|
||||
OBJ = $(patsubst %.cc,%.o,$(wildcard $(patsubst %.h,%.cc,$(wildcard *.h))))
|
||||
OBJ += $(patsubst %.cpp,%.o,${INMOGEN})
|
||||
EXE = $(patsubst %.cc,%.o,$(filter-out $(patsubst %.h,%.cc,$(wildcard *.h)),$(wildcard *.cc)))
|
||||
|
||||
$(foreach cpp,${INMOGEN},$(eval $(call compile,${cpp})))
|
||||
$(foreach cpp,$(wildcard *.cc),$(eval $(call compile,${cpp})))
|
||||
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): $(addprefix ${WDIR}/,$(notdir ${OBJ}))
|
||||
$(addprefix ${BINDIR}/${BINPREF}, $(programs)): ${MOSES_ROOT}/lib/libmoses.a
|
||||
${BINDIR}/${BINPREF}%: ${WDIR}/%.o
|
||||
echo PREREQS: $<
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ ${LIBDIRS} ${LIBS}
|
||||
|
||||
.SECONDARY:
|
||||
|
||||
-include $(DEP)
|
||||
|
316
moses/mm/build/x86_64/0/tpt_pickler.d
Normal file
316
moses/mm/build/x86_64/0/tpt_pickler.d
Normal file
@ -0,0 +1,316 @@
|
||||
build/x86_64/0/tpt_pickler.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h \
|
||||
/usr/include/c++/4.5/iostream \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/bits/wordsize.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
|
||||
/usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
|
||||
/usr/include/ctype.h /usr/include/bits/types.h \
|
||||
/usr/include/bits/typesizes.h /usr/include/endian.h \
|
||||
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
|
||||
/usr/include/c++/4.5/bits/istream.tcc /usr/include/c++/4.5/vector \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/map \
|
||||
/usr/include/c++/4.5/bits/stl_tree.h /usr/include/c++/4.5/bits/stl_map.h \
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h \
|
||||
/usr/include/byteswap.h /usr/include/c++/4.5/cassert \
|
||||
/usr/include/assert.h /usr/include/sys/stat.h /usr/include/bits/stat.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h:
|
||||
|
||||
/usr/include/c++/4.5/iostream:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/map:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tree.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_map.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h:
|
||||
|
||||
/usr/include/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/sys/stat.h:
|
||||
|
||||
/usr/include/bits/stat.h:
|
294
moses/mm/build/x86_64/0/tpt_tightindex.d
Normal file
294
moses/mm/build/x86_64/0/tpt_tightindex.d
Normal file
@ -0,0 +1,294 @@
|
||||
build/x86_64/0/tpt_tightindex.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.cc \
|
||||
/usr/include/c++/4.5/iostream \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/bits/wordsize.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
|
||||
/usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
|
||||
/usr/include/ctype.h /usr/include/bits/types.h \
|
||||
/usr/include/bits/typesizes.h /usr/include/endian.h \
|
||||
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
|
||||
/usr/include/c++/4.5/bits/istream.tcc /usr/include/assert.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h \
|
||||
/usr/include/c++/4.5/map /usr/include/c++/4.5/bits/stl_tree.h \
|
||||
/usr/include/c++/4.5/bits/stl_map.h \
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h /usr/include/c++/4.5/sstream \
|
||||
/usr/include/c++/4.5/bits/sstream.tcc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h /usr/include/c++/4.5/cassert
|
||||
|
||||
/usr/include/c++/4.5/iostream:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h:
|
||||
|
||||
/usr/include/c++/4.5/map:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tree.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_map.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h:
|
||||
|
||||
/usr/include/c++/4.5/sstream:
|
||||
|
||||
/usr/include/c++/4.5/bits/sstream.tcc:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
2389
moses/mm/build/x86_64/0/tpt_tokenindex.d
Normal file
2389
moses/mm/build/x86_64/0/tpt_tokenindex.d
Normal file
File diff suppressed because it is too large
Load Diff
2571
moses/mm/build/x86_64/0/ug_bitext.d
Normal file
2571
moses/mm/build/x86_64/0/ug_bitext.d
Normal file
File diff suppressed because it is too large
Load Diff
542
moses/mm/build/x86_64/0/ug_conll_record.d
Normal file
542
moses/mm/build/x86_64/0/ug_conll_record.d
Normal file
@ -0,0 +1,542 @@
|
||||
build/x86_64/0/ug_conll_record.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
|
||||
/usr/include/boost/dynamic_bitset.hpp \
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
|
||||
/usr/include/assert.h /usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
|
||||
/usr/include/gnu/stubs-64.h /usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
|
||||
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
|
||||
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
|
||||
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
|
||||
/usr/include/endian.h /usr/include/bits/endian.h \
|
||||
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
|
||||
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
|
||||
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
|
||||
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
|
||||
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
|
||||
/usr/include/sys/select.h /usr/include/bits/select.h \
|
||||
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h \
|
||||
/usr/include/c++/4.5/bits/stl_heap.h \
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
|
||||
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
|
||||
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
|
||||
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
|
||||
/usr/include/bits/stdio_lim.h \
|
||||
/usr/include/boost/dynamic_bitset/config.hpp \
|
||||
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
|
||||
/usr/include/boost/config/select_compiler_config.hpp \
|
||||
/usr/include/boost/config/compiler/gcc.hpp \
|
||||
/usr/include/boost/config/select_stdlib_config.hpp \
|
||||
/usr/include/boost/config/no_tr1/utility.hpp \
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
|
||||
/usr/include/boost/config/select_platform_config.hpp \
|
||||
/usr/include/boost/config/platform/linux.hpp \
|
||||
/usr/include/boost/config/posix_features.hpp \
|
||||
/usr/include/boost/config/suffix.hpp \
|
||||
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
|
||||
/usr/include/c++/4.5/ctime \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
|
||||
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
|
||||
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc \
|
||||
/usr/include/c++/4.5/bits/istream.tcc \
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h \
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp \
|
||||
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h \
|
||||
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
|
||||
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
|
||||
/usr/include/boost/pending/integer_log2.hpp \
|
||||
/usr/include/boost/shared_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp \
|
||||
/usr/include/boost/config/no_tr1/memory.hpp \
|
||||
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
|
||||
/usr/include/boost/throw_exception.hpp \
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
|
||||
/usr/include/boost/exception/exception.hpp \
|
||||
/usr/include/boost/current_function.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
|
||||
/usr/include/c++/4.5/functional \
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
|
||||
/usr/include/boost/memory_order.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
|
||||
/usr/include/boost/scoped_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/stdexcept:
|
||||
|
||||
/usr/include/c++/4.5/algorithm:
|
||||
|
||||
/usr/include/c++/4.5/utility:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_relops.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algo.h:
|
||||
|
||||
/usr/include/c++/4.5/cstdlib:
|
||||
|
||||
/usr/include/stdlib.h:
|
||||
|
||||
/usr/include/bits/waitflags.h:
|
||||
|
||||
/usr/include/bits/waitstatus.h:
|
||||
|
||||
/usr/include/sys/types.h:
|
||||
|
||||
/usr/include/sys/select.h:
|
||||
|
||||
/usr/include/bits/select.h:
|
||||
|
||||
/usr/include/sys/sysmacros.h:
|
||||
|
||||
/usr/include/alloca.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_heap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/climits:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
|
||||
|
||||
/usr/include/limits.h:
|
||||
|
||||
/usr/include/bits/posix1_lim.h:
|
||||
|
||||
/usr/include/bits/local_lim.h:
|
||||
|
||||
/usr/include/linux/limits.h:
|
||||
|
||||
/usr/include/bits/posix2_lim.h:
|
||||
|
||||
/usr/include/bits/xopen_lim.h:
|
||||
|
||||
/usr/include/bits/stdio_lim.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/config.hpp:
|
||||
|
||||
/usr/include/boost/config.hpp:
|
||||
|
||||
/usr/include/boost/config/user.hpp:
|
||||
|
||||
/usr/include/boost/config/select_compiler_config.hpp:
|
||||
|
||||
/usr/include/boost/config/compiler/gcc.hpp:
|
||||
|
||||
/usr/include/boost/config/select_stdlib_config.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/utility.hpp:
|
||||
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
|
||||
|
||||
/usr/include/boost/config/select_platform_config.hpp:
|
||||
|
||||
/usr/include/boost/config/platform/linux.hpp:
|
||||
|
||||
/usr/include/boost/config/posix_features.hpp:
|
||||
|
||||
/usr/include/boost/config/suffix.hpp:
|
||||
|
||||
/usr/include/boost/detail/workaround.hpp:
|
||||
|
||||
/usr/include/c++/4.5/locale:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
|
||||
|
||||
/usr/include/c++/4.5/ctime:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
|
||||
|
||||
/usr/include/libintl.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/codecvt.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp:
|
||||
|
||||
/usr/include/c++/4.5/memory:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h:
|
||||
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/detail/iterator.hpp:
|
||||
|
||||
/usr/include/c++/4.5/iterator:
|
||||
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h:
|
||||
|
||||
/usr/include/boost/static_assert.hpp:
|
||||
|
||||
/usr/include/boost/limits.hpp:
|
||||
|
||||
/usr/include/c++/4.5/limits:
|
||||
|
||||
/usr/include/boost/pending/lowest_bit.hpp:
|
||||
|
||||
/usr/include/boost/pending/integer_log2.hpp:
|
||||
|
||||
/usr/include/boost/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/memory.hpp:
|
||||
|
||||
/usr/include/boost/assert.hpp:
|
||||
|
||||
/usr/include/boost/checked_delete.hpp:
|
||||
|
||||
/usr/include/boost/throw_exception.hpp:
|
||||
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
|
||||
|
||||
/usr/include/boost/exception/exception.hpp:
|
||||
|
||||
/usr/include/boost/current_function.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
|
||||
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp:
|
||||
|
||||
/usr/include/c++/4.5/typeinfo:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
|
||||
|
||||
/usr/include/c++/4.5/functional:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
|
||||
|
||||
/usr/include/boost/memory_order.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
|
||||
|
||||
/usr/include/boost/scoped_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
2388
moses/mm/build/x86_64/0/ug_corpus_token.d
Normal file
2388
moses/mm/build/x86_64/0/ug_corpus_token.d
Normal file
File diff suppressed because it is too large
Load Diff
2399
moses/mm/build/x86_64/0/ug_deptree.d
Normal file
2399
moses/mm/build/x86_64/0/ug_deptree.d
Normal file
File diff suppressed because it is too large
Load Diff
1119
moses/mm/build/x86_64/0/ug_get_options.d
Normal file
1119
moses/mm/build/x86_64/0/ug_get_options.d
Normal file
File diff suppressed because it is too large
Load Diff
2
moses/mm/build/x86_64/0/ug_mmbitext.d
Normal file
2
moses/mm/build/x86_64/0/ug_mmbitext.d
Normal file
@ -0,0 +1,2 @@
|
||||
build/x86_64/0/ug_mmbitext.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_mmbitext.cc
|
1919
moses/mm/build/x86_64/0/ug_stream.d
Normal file
1919
moses/mm/build/x86_64/0/ug_stream.d
Normal file
File diff suppressed because it is too large
Load Diff
552
moses/mm/build/x86_64/0/ug_tsa_array_entry.d
Normal file
552
moses/mm/build/x86_64/0/ug_tsa_array_entry.d
Normal file
@ -0,0 +1,552 @@
|
||||
build/x86_64/0/ug_tsa_array_entry.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
|
||||
/usr/include/c++/4.5/cassert /usr/include/assert.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
|
||||
/usr/include/gnu/stubs-64.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
|
||||
/usr/include/boost/dynamic_bitset.hpp \
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
|
||||
/usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
|
||||
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
|
||||
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
|
||||
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
|
||||
/usr/include/endian.h /usr/include/bits/endian.h \
|
||||
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
|
||||
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
|
||||
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
|
||||
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
|
||||
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
|
||||
/usr/include/sys/select.h /usr/include/bits/select.h \
|
||||
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h \
|
||||
/usr/include/c++/4.5/bits/stl_heap.h \
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
|
||||
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
|
||||
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
|
||||
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
|
||||
/usr/include/bits/stdio_lim.h \
|
||||
/usr/include/boost/dynamic_bitset/config.hpp \
|
||||
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
|
||||
/usr/include/boost/config/select_compiler_config.hpp \
|
||||
/usr/include/boost/config/compiler/gcc.hpp \
|
||||
/usr/include/boost/config/select_stdlib_config.hpp \
|
||||
/usr/include/boost/config/no_tr1/utility.hpp \
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
|
||||
/usr/include/boost/config/select_platform_config.hpp \
|
||||
/usr/include/boost/config/platform/linux.hpp \
|
||||
/usr/include/boost/config/posix_features.hpp \
|
||||
/usr/include/boost/config/suffix.hpp \
|
||||
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
|
||||
/usr/include/c++/4.5/ctime \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
|
||||
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
|
||||
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc \
|
||||
/usr/include/c++/4.5/bits/istream.tcc \
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h \
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp \
|
||||
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h \
|
||||
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
|
||||
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
|
||||
/usr/include/boost/pending/integer_log2.hpp \
|
||||
/usr/include/boost/shared_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp \
|
||||
/usr/include/boost/config/no_tr1/memory.hpp \
|
||||
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
|
||||
/usr/include/boost/throw_exception.hpp \
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
|
||||
/usr/include/boost/exception/exception.hpp \
|
||||
/usr/include/boost/current_function.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
|
||||
/usr/include/c++/4.5/functional \
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
|
||||
/usr/include/boost/memory_order.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
|
||||
/usr/include/boost/scoped_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
|
||||
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/stdexcept:
|
||||
|
||||
/usr/include/c++/4.5/algorithm:
|
||||
|
||||
/usr/include/c++/4.5/utility:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_relops.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algo.h:
|
||||
|
||||
/usr/include/c++/4.5/cstdlib:
|
||||
|
||||
/usr/include/stdlib.h:
|
||||
|
||||
/usr/include/bits/waitflags.h:
|
||||
|
||||
/usr/include/bits/waitstatus.h:
|
||||
|
||||
/usr/include/sys/types.h:
|
||||
|
||||
/usr/include/sys/select.h:
|
||||
|
||||
/usr/include/bits/select.h:
|
||||
|
||||
/usr/include/sys/sysmacros.h:
|
||||
|
||||
/usr/include/alloca.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_heap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/climits:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
|
||||
|
||||
/usr/include/limits.h:
|
||||
|
||||
/usr/include/bits/posix1_lim.h:
|
||||
|
||||
/usr/include/bits/local_lim.h:
|
||||
|
||||
/usr/include/linux/limits.h:
|
||||
|
||||
/usr/include/bits/posix2_lim.h:
|
||||
|
||||
/usr/include/bits/xopen_lim.h:
|
||||
|
||||
/usr/include/bits/stdio_lim.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/config.hpp:
|
||||
|
||||
/usr/include/boost/config.hpp:
|
||||
|
||||
/usr/include/boost/config/user.hpp:
|
||||
|
||||
/usr/include/boost/config/select_compiler_config.hpp:
|
||||
|
||||
/usr/include/boost/config/compiler/gcc.hpp:
|
||||
|
||||
/usr/include/boost/config/select_stdlib_config.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/utility.hpp:
|
||||
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
|
||||
|
||||
/usr/include/boost/config/select_platform_config.hpp:
|
||||
|
||||
/usr/include/boost/config/platform/linux.hpp:
|
||||
|
||||
/usr/include/boost/config/posix_features.hpp:
|
||||
|
||||
/usr/include/boost/config/suffix.hpp:
|
||||
|
||||
/usr/include/boost/detail/workaround.hpp:
|
||||
|
||||
/usr/include/c++/4.5/locale:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
|
||||
|
||||
/usr/include/c++/4.5/ctime:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
|
||||
|
||||
/usr/include/libintl.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/codecvt.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp:
|
||||
|
||||
/usr/include/c++/4.5/memory:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h:
|
||||
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/detail/iterator.hpp:
|
||||
|
||||
/usr/include/c++/4.5/iterator:
|
||||
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h:
|
||||
|
||||
/usr/include/boost/static_assert.hpp:
|
||||
|
||||
/usr/include/boost/limits.hpp:
|
||||
|
||||
/usr/include/c++/4.5/limits:
|
||||
|
||||
/usr/include/boost/pending/lowest_bit.hpp:
|
||||
|
||||
/usr/include/boost/pending/integer_log2.hpp:
|
||||
|
||||
/usr/include/boost/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/memory.hpp:
|
||||
|
||||
/usr/include/boost/assert.hpp:
|
||||
|
||||
/usr/include/boost/checked_delete.hpp:
|
||||
|
||||
/usr/include/boost/throw_exception.hpp:
|
||||
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
|
||||
|
||||
/usr/include/boost/exception/exception.hpp:
|
||||
|
||||
/usr/include/boost/current_function.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
|
||||
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp:
|
||||
|
||||
/usr/include/c++/4.5/typeinfo:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
|
||||
|
||||
/usr/include/c++/4.5/functional:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
|
||||
|
||||
/usr/include/boost/memory_order.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
|
||||
|
||||
/usr/include/boost/scoped_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
||||
|
||||
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h:
|
2402
moses/mm/build/x86_64/0/ug_ttrack_base.d
Normal file
2402
moses/mm/build/x86_64/0/ug_ttrack_base.d
Normal file
File diff suppressed because it is too large
Load Diff
546
moses/mm/build/x86_64/0/ug_ttrack_position.d
Normal file
546
moses/mm/build/x86_64/0/ug_ttrack_position.d
Normal file
@ -0,0 +1,546 @@
|
||||
build/x86_64/0/ug_ttrack_position.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
|
||||
/usr/include/c++/4.5/cassert /usr/include/assert.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
|
||||
/usr/include/gnu/stubs-64.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
|
||||
/usr/include/boost/dynamic_bitset.hpp \
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
|
||||
/usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
|
||||
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
|
||||
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
|
||||
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
|
||||
/usr/include/endian.h /usr/include/bits/endian.h \
|
||||
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
|
||||
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
|
||||
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
|
||||
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
|
||||
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
|
||||
/usr/include/sys/select.h /usr/include/bits/select.h \
|
||||
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h \
|
||||
/usr/include/c++/4.5/bits/stl_heap.h \
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
|
||||
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
|
||||
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
|
||||
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
|
||||
/usr/include/bits/stdio_lim.h \
|
||||
/usr/include/boost/dynamic_bitset/config.hpp \
|
||||
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
|
||||
/usr/include/boost/config/select_compiler_config.hpp \
|
||||
/usr/include/boost/config/compiler/gcc.hpp \
|
||||
/usr/include/boost/config/select_stdlib_config.hpp \
|
||||
/usr/include/boost/config/no_tr1/utility.hpp \
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
|
||||
/usr/include/boost/config/select_platform_config.hpp \
|
||||
/usr/include/boost/config/platform/linux.hpp \
|
||||
/usr/include/boost/config/posix_features.hpp \
|
||||
/usr/include/boost/config/suffix.hpp \
|
||||
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
|
||||
/usr/include/c++/4.5/ctime \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
|
||||
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
|
||||
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc \
|
||||
/usr/include/c++/4.5/bits/istream.tcc \
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h \
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp \
|
||||
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h \
|
||||
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
|
||||
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
|
||||
/usr/include/boost/pending/integer_log2.hpp \
|
||||
/usr/include/boost/shared_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp \
|
||||
/usr/include/boost/config/no_tr1/memory.hpp \
|
||||
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
|
||||
/usr/include/boost/throw_exception.hpp \
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
|
||||
/usr/include/boost/exception/exception.hpp \
|
||||
/usr/include/boost/current_function.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
|
||||
/usr/include/c++/4.5/functional \
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
|
||||
/usr/include/boost/memory_order.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
|
||||
/usr/include/boost/scoped_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/stdexcept:
|
||||
|
||||
/usr/include/c++/4.5/algorithm:
|
||||
|
||||
/usr/include/c++/4.5/utility:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_relops.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algo.h:
|
||||
|
||||
/usr/include/c++/4.5/cstdlib:
|
||||
|
||||
/usr/include/stdlib.h:
|
||||
|
||||
/usr/include/bits/waitflags.h:
|
||||
|
||||
/usr/include/bits/waitstatus.h:
|
||||
|
||||
/usr/include/sys/types.h:
|
||||
|
||||
/usr/include/sys/select.h:
|
||||
|
||||
/usr/include/bits/select.h:
|
||||
|
||||
/usr/include/sys/sysmacros.h:
|
||||
|
||||
/usr/include/alloca.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_heap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/climits:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
|
||||
|
||||
/usr/include/limits.h:
|
||||
|
||||
/usr/include/bits/posix1_lim.h:
|
||||
|
||||
/usr/include/bits/local_lim.h:
|
||||
|
||||
/usr/include/linux/limits.h:
|
||||
|
||||
/usr/include/bits/posix2_lim.h:
|
||||
|
||||
/usr/include/bits/xopen_lim.h:
|
||||
|
||||
/usr/include/bits/stdio_lim.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/config.hpp:
|
||||
|
||||
/usr/include/boost/config.hpp:
|
||||
|
||||
/usr/include/boost/config/user.hpp:
|
||||
|
||||
/usr/include/boost/config/select_compiler_config.hpp:
|
||||
|
||||
/usr/include/boost/config/compiler/gcc.hpp:
|
||||
|
||||
/usr/include/boost/config/select_stdlib_config.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/utility.hpp:
|
||||
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
|
||||
|
||||
/usr/include/boost/config/select_platform_config.hpp:
|
||||
|
||||
/usr/include/boost/config/platform/linux.hpp:
|
||||
|
||||
/usr/include/boost/config/posix_features.hpp:
|
||||
|
||||
/usr/include/boost/config/suffix.hpp:
|
||||
|
||||
/usr/include/boost/detail/workaround.hpp:
|
||||
|
||||
/usr/include/c++/4.5/locale:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
|
||||
|
||||
/usr/include/c++/4.5/ctime:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
|
||||
|
||||
/usr/include/libintl.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/codecvt.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp:
|
||||
|
||||
/usr/include/c++/4.5/memory:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h:
|
||||
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/detail/iterator.hpp:
|
||||
|
||||
/usr/include/c++/4.5/iterator:
|
||||
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h:
|
||||
|
||||
/usr/include/boost/static_assert.hpp:
|
||||
|
||||
/usr/include/boost/limits.hpp:
|
||||
|
||||
/usr/include/c++/4.5/limits:
|
||||
|
||||
/usr/include/boost/pending/lowest_bit.hpp:
|
||||
|
||||
/usr/include/boost/pending/integer_log2.hpp:
|
||||
|
||||
/usr/include/boost/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/memory.hpp:
|
||||
|
||||
/usr/include/boost/assert.hpp:
|
||||
|
||||
/usr/include/boost/checked_delete.hpp:
|
||||
|
||||
/usr/include/boost/throw_exception.hpp:
|
||||
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
|
||||
|
||||
/usr/include/boost/exception/exception.hpp:
|
||||
|
||||
/usr/include/boost/current_function.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
|
||||
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp:
|
||||
|
||||
/usr/include/c++/4.5/typeinfo:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
|
||||
|
||||
/usr/include/c++/4.5/functional:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
|
||||
|
||||
/usr/include/boost/memory_order.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
|
||||
|
||||
/usr/include/boost/scoped_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
3547
moses/mm/build/x86_64/3/custom-pt.d
Normal file
3547
moses/mm/build/x86_64/3/custom-pt.d
Normal file
File diff suppressed because it is too large
Load Diff
3458
moses/mm/build/x86_64/3/mmlex-build.d
Normal file
3458
moses/mm/build/x86_64/3/mmlex-build.d
Normal file
File diff suppressed because it is too large
Load Diff
2540
moses/mm/build/x86_64/3/mtt-build.d
Normal file
2540
moses/mm/build/x86_64/3/mtt-build.d
Normal file
File diff suppressed because it is too large
Load Diff
2492
moses/mm/build/x86_64/3/mtt-dump.d
Normal file
2492
moses/mm/build/x86_64/3/mtt-dump.d
Normal file
File diff suppressed because it is too large
Load Diff
2551
moses/mm/build/x86_64/3/symal2mam.d
Normal file
2551
moses/mm/build/x86_64/3/symal2mam.d
Normal file
File diff suppressed because it is too large
Load Diff
316
moses/mm/build/x86_64/3/tpt_pickler.d
Normal file
316
moses/mm/build/x86_64/3/tpt_pickler.d
Normal file
@ -0,0 +1,316 @@
|
||||
build/x86_64/3/tpt_pickler.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h \
|
||||
/usr/include/c++/4.5/iostream \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/bits/wordsize.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
|
||||
/usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
|
||||
/usr/include/ctype.h /usr/include/bits/types.h \
|
||||
/usr/include/bits/typesizes.h /usr/include/endian.h \
|
||||
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
|
||||
/usr/include/c++/4.5/bits/istream.tcc /usr/include/c++/4.5/vector \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/map \
|
||||
/usr/include/c++/4.5/bits/stl_tree.h /usr/include/c++/4.5/bits/stl_map.h \
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h \
|
||||
/usr/include/byteswap.h /usr/include/c++/4.5/cassert \
|
||||
/usr/include/assert.h /usr/include/sys/stat.h /usr/include/bits/stat.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_pickler.h:
|
||||
|
||||
/usr/include/c++/4.5/iostream:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/map:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tree.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_map.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/num_read_write.h:
|
||||
|
||||
/usr/include/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/sys/stat.h:
|
||||
|
||||
/usr/include/bits/stat.h:
|
294
moses/mm/build/x86_64/3/tpt_tightindex.d
Normal file
294
moses/mm/build/x86_64/3/tpt_tightindex.d
Normal file
@ -0,0 +1,294 @@
|
||||
build/x86_64/3/tpt_tightindex.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.cc \
|
||||
/usr/include/c++/4.5/iostream \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/bits/wordsize.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/gnu/stubs.h /usr/include/gnu/stubs-64.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/ostream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/iosfwd /usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/postypes.h /usr/include/c++/4.5/cwchar \
|
||||
/usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/cctype \
|
||||
/usr/include/ctype.h /usr/include/bits/types.h \
|
||||
/usr/include/bits/typesizes.h /usr/include/endian.h \
|
||||
/usr/include/bits/endian.h /usr/include/bits/byteswap.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h /usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/streambuf /usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc /usr/include/c++/4.5/istream \
|
||||
/usr/include/c++/4.5/bits/istream.tcc /usr/include/assert.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h \
|
||||
/usr/include/c++/4.5/map /usr/include/c++/4.5/bits/stl_tree.h \
|
||||
/usr/include/c++/4.5/bits/stl_map.h \
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h /usr/include/c++/4.5/sstream \
|
||||
/usr/include/c++/4.5/bits/sstream.tcc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h /usr/include/c++/4.5/cassert
|
||||
|
||||
/usr/include/c++/4.5/iostream:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_tightindex.h:
|
||||
|
||||
/usr/include/c++/4.5/map:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tree.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_map.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_multimap.h:
|
||||
|
||||
/usr/include/c++/4.5/sstream:
|
||||
|
||||
/usr/include/c++/4.5/bits/sstream.tcc:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
2393
moses/mm/build/x86_64/3/tpt_tokenindex.d
Normal file
2393
moses/mm/build/x86_64/3/tpt_tokenindex.d
Normal file
File diff suppressed because it is too large
Load Diff
3546
moses/mm/build/x86_64/3/ug_bitext.d
Normal file
3546
moses/mm/build/x86_64/3/ug_bitext.d
Normal file
File diff suppressed because it is too large
Load Diff
542
moses/mm/build/x86_64/3/ug_conll_record.d
Normal file
542
moses/mm/build/x86_64/3/ug_conll_record.d
Normal file
@ -0,0 +1,542 @@
|
||||
build/x86_64/3/ug_conll_record.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
|
||||
/usr/include/boost/dynamic_bitset.hpp \
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
|
||||
/usr/include/assert.h /usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
|
||||
/usr/include/gnu/stubs-64.h /usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
|
||||
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
|
||||
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
|
||||
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
|
||||
/usr/include/endian.h /usr/include/bits/endian.h \
|
||||
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
|
||||
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
|
||||
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
|
||||
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
|
||||
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
|
||||
/usr/include/sys/select.h /usr/include/bits/select.h \
|
||||
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h \
|
||||
/usr/include/c++/4.5/bits/stl_heap.h \
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
|
||||
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
|
||||
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
|
||||
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
|
||||
/usr/include/bits/stdio_lim.h \
|
||||
/usr/include/boost/dynamic_bitset/config.hpp \
|
||||
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
|
||||
/usr/include/boost/config/select_compiler_config.hpp \
|
||||
/usr/include/boost/config/compiler/gcc.hpp \
|
||||
/usr/include/boost/config/select_stdlib_config.hpp \
|
||||
/usr/include/boost/config/no_tr1/utility.hpp \
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
|
||||
/usr/include/boost/config/select_platform_config.hpp \
|
||||
/usr/include/boost/config/platform/linux.hpp \
|
||||
/usr/include/boost/config/posix_features.hpp \
|
||||
/usr/include/boost/config/suffix.hpp \
|
||||
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
|
||||
/usr/include/c++/4.5/ctime \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
|
||||
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
|
||||
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc \
|
||||
/usr/include/c++/4.5/bits/istream.tcc \
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h \
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp \
|
||||
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h \
|
||||
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
|
||||
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
|
||||
/usr/include/boost/pending/integer_log2.hpp \
|
||||
/usr/include/boost/shared_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp \
|
||||
/usr/include/boost/config/no_tr1/memory.hpp \
|
||||
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
|
||||
/usr/include/boost/throw_exception.hpp \
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
|
||||
/usr/include/boost/exception/exception.hpp \
|
||||
/usr/include/boost/current_function.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
|
||||
/usr/include/c++/4.5/functional \
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
|
||||
/usr/include/boost/memory_order.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
|
||||
/usr/include/boost/scoped_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_conll_record.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/stdexcept:
|
||||
|
||||
/usr/include/c++/4.5/algorithm:
|
||||
|
||||
/usr/include/c++/4.5/utility:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_relops.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algo.h:
|
||||
|
||||
/usr/include/c++/4.5/cstdlib:
|
||||
|
||||
/usr/include/stdlib.h:
|
||||
|
||||
/usr/include/bits/waitflags.h:
|
||||
|
||||
/usr/include/bits/waitstatus.h:
|
||||
|
||||
/usr/include/sys/types.h:
|
||||
|
||||
/usr/include/sys/select.h:
|
||||
|
||||
/usr/include/bits/select.h:
|
||||
|
||||
/usr/include/sys/sysmacros.h:
|
||||
|
||||
/usr/include/alloca.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_heap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/climits:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
|
||||
|
||||
/usr/include/limits.h:
|
||||
|
||||
/usr/include/bits/posix1_lim.h:
|
||||
|
||||
/usr/include/bits/local_lim.h:
|
||||
|
||||
/usr/include/linux/limits.h:
|
||||
|
||||
/usr/include/bits/posix2_lim.h:
|
||||
|
||||
/usr/include/bits/xopen_lim.h:
|
||||
|
||||
/usr/include/bits/stdio_lim.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/config.hpp:
|
||||
|
||||
/usr/include/boost/config.hpp:
|
||||
|
||||
/usr/include/boost/config/user.hpp:
|
||||
|
||||
/usr/include/boost/config/select_compiler_config.hpp:
|
||||
|
||||
/usr/include/boost/config/compiler/gcc.hpp:
|
||||
|
||||
/usr/include/boost/config/select_stdlib_config.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/utility.hpp:
|
||||
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
|
||||
|
||||
/usr/include/boost/config/select_platform_config.hpp:
|
||||
|
||||
/usr/include/boost/config/platform/linux.hpp:
|
||||
|
||||
/usr/include/boost/config/posix_features.hpp:
|
||||
|
||||
/usr/include/boost/config/suffix.hpp:
|
||||
|
||||
/usr/include/boost/detail/workaround.hpp:
|
||||
|
||||
/usr/include/c++/4.5/locale:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
|
||||
|
||||
/usr/include/c++/4.5/ctime:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
|
||||
|
||||
/usr/include/libintl.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/codecvt.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp:
|
||||
|
||||
/usr/include/c++/4.5/memory:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h:
|
||||
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/detail/iterator.hpp:
|
||||
|
||||
/usr/include/c++/4.5/iterator:
|
||||
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h:
|
||||
|
||||
/usr/include/boost/static_assert.hpp:
|
||||
|
||||
/usr/include/boost/limits.hpp:
|
||||
|
||||
/usr/include/c++/4.5/limits:
|
||||
|
||||
/usr/include/boost/pending/lowest_bit.hpp:
|
||||
|
||||
/usr/include/boost/pending/integer_log2.hpp:
|
||||
|
||||
/usr/include/boost/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/memory.hpp:
|
||||
|
||||
/usr/include/boost/assert.hpp:
|
||||
|
||||
/usr/include/boost/checked_delete.hpp:
|
||||
|
||||
/usr/include/boost/throw_exception.hpp:
|
||||
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
|
||||
|
||||
/usr/include/boost/exception/exception.hpp:
|
||||
|
||||
/usr/include/boost/current_function.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
|
||||
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp:
|
||||
|
||||
/usr/include/c++/4.5/typeinfo:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
|
||||
|
||||
/usr/include/c++/4.5/functional:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
|
||||
|
||||
/usr/include/boost/memory_order.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
|
||||
|
||||
/usr/include/boost/scoped_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
2393
moses/mm/build/x86_64/3/ug_corpus_token.d
Normal file
2393
moses/mm/build/x86_64/3/ug_corpus_token.d
Normal file
File diff suppressed because it is too large
Load Diff
2403
moses/mm/build/x86_64/3/ug_deptree.d
Normal file
2403
moses/mm/build/x86_64/3/ug_deptree.d
Normal file
File diff suppressed because it is too large
Load Diff
1121
moses/mm/build/x86_64/3/ug_get_options.d
Normal file
1121
moses/mm/build/x86_64/3/ug_get_options.d
Normal file
File diff suppressed because it is too large
Load Diff
2
moses/mm/build/x86_64/3/ug_mmbitext.d
Normal file
2
moses/mm/build/x86_64/3/ug_mmbitext.d
Normal file
@ -0,0 +1,2 @@
|
||||
build/x86_64/3/ug_mmbitext.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_mmbitext.cc
|
1922
moses/mm/build/x86_64/3/ug_stream.d
Normal file
1922
moses/mm/build/x86_64/3/ug_stream.d
Normal file
File diff suppressed because it is too large
Load Diff
552
moses/mm/build/x86_64/3/ug_tsa_array_entry.d
Normal file
552
moses/mm/build/x86_64/3/ug_tsa_array_entry.d
Normal file
@ -0,0 +1,552 @@
|
||||
build/x86_64/3/ug_tsa_array_entry.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
|
||||
/usr/include/c++/4.5/cassert /usr/include/assert.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
|
||||
/usr/include/gnu/stubs-64.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
|
||||
/usr/include/boost/dynamic_bitset.hpp \
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
|
||||
/usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
|
||||
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
|
||||
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
|
||||
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
|
||||
/usr/include/endian.h /usr/include/bits/endian.h \
|
||||
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
|
||||
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
|
||||
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
|
||||
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
|
||||
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
|
||||
/usr/include/sys/select.h /usr/include/bits/select.h \
|
||||
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h \
|
||||
/usr/include/c++/4.5/bits/stl_heap.h \
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
|
||||
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
|
||||
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
|
||||
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
|
||||
/usr/include/bits/stdio_lim.h \
|
||||
/usr/include/boost/dynamic_bitset/config.hpp \
|
||||
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
|
||||
/usr/include/boost/config/select_compiler_config.hpp \
|
||||
/usr/include/boost/config/compiler/gcc.hpp \
|
||||
/usr/include/boost/config/select_stdlib_config.hpp \
|
||||
/usr/include/boost/config/no_tr1/utility.hpp \
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
|
||||
/usr/include/boost/config/select_platform_config.hpp \
|
||||
/usr/include/boost/config/platform/linux.hpp \
|
||||
/usr/include/boost/config/posix_features.hpp \
|
||||
/usr/include/boost/config/suffix.hpp \
|
||||
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
|
||||
/usr/include/c++/4.5/ctime \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
|
||||
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
|
||||
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc \
|
||||
/usr/include/c++/4.5/bits/istream.tcc \
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h \
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp \
|
||||
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h \
|
||||
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
|
||||
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
|
||||
/usr/include/boost/pending/integer_log2.hpp \
|
||||
/usr/include/boost/shared_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp \
|
||||
/usr/include/boost/config/no_tr1/memory.hpp \
|
||||
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
|
||||
/usr/include/boost/throw_exception.hpp \
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
|
||||
/usr/include/boost/exception/exception.hpp \
|
||||
/usr/include/boost/current_function.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
|
||||
/usr/include/c++/4.5/functional \
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
|
||||
/usr/include/boost/memory_order.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
|
||||
/usr/include/boost/scoped_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h \
|
||||
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_tsa_array_entry.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/stdexcept:
|
||||
|
||||
/usr/include/c++/4.5/algorithm:
|
||||
|
||||
/usr/include/c++/4.5/utility:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_relops.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algo.h:
|
||||
|
||||
/usr/include/c++/4.5/cstdlib:
|
||||
|
||||
/usr/include/stdlib.h:
|
||||
|
||||
/usr/include/bits/waitflags.h:
|
||||
|
||||
/usr/include/bits/waitstatus.h:
|
||||
|
||||
/usr/include/sys/types.h:
|
||||
|
||||
/usr/include/sys/select.h:
|
||||
|
||||
/usr/include/bits/select.h:
|
||||
|
||||
/usr/include/sys/sysmacros.h:
|
||||
|
||||
/usr/include/alloca.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_heap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/climits:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
|
||||
|
||||
/usr/include/limits.h:
|
||||
|
||||
/usr/include/bits/posix1_lim.h:
|
||||
|
||||
/usr/include/bits/local_lim.h:
|
||||
|
||||
/usr/include/linux/limits.h:
|
||||
|
||||
/usr/include/bits/posix2_lim.h:
|
||||
|
||||
/usr/include/bits/xopen_lim.h:
|
||||
|
||||
/usr/include/bits/stdio_lim.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/config.hpp:
|
||||
|
||||
/usr/include/boost/config.hpp:
|
||||
|
||||
/usr/include/boost/config/user.hpp:
|
||||
|
||||
/usr/include/boost/config/select_compiler_config.hpp:
|
||||
|
||||
/usr/include/boost/config/compiler/gcc.hpp:
|
||||
|
||||
/usr/include/boost/config/select_stdlib_config.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/utility.hpp:
|
||||
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
|
||||
|
||||
/usr/include/boost/config/select_platform_config.hpp:
|
||||
|
||||
/usr/include/boost/config/platform/linux.hpp:
|
||||
|
||||
/usr/include/boost/config/posix_features.hpp:
|
||||
|
||||
/usr/include/boost/config/suffix.hpp:
|
||||
|
||||
/usr/include/boost/detail/workaround.hpp:
|
||||
|
||||
/usr/include/c++/4.5/locale:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
|
||||
|
||||
/usr/include/c++/4.5/ctime:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
|
||||
|
||||
/usr/include/libintl.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/codecvt.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp:
|
||||
|
||||
/usr/include/c++/4.5/memory:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h:
|
||||
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/detail/iterator.hpp:
|
||||
|
||||
/usr/include/c++/4.5/iterator:
|
||||
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h:
|
||||
|
||||
/usr/include/boost/static_assert.hpp:
|
||||
|
||||
/usr/include/boost/limits.hpp:
|
||||
|
||||
/usr/include/c++/4.5/limits:
|
||||
|
||||
/usr/include/boost/pending/lowest_bit.hpp:
|
||||
|
||||
/usr/include/boost/pending/integer_log2.hpp:
|
||||
|
||||
/usr/include/boost/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/memory.hpp:
|
||||
|
||||
/usr/include/boost/assert.hpp:
|
||||
|
||||
/usr/include/boost/checked_delete.hpp:
|
||||
|
||||
/usr/include/boost/throw_exception.hpp:
|
||||
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
|
||||
|
||||
/usr/include/boost/exception/exception.hpp:
|
||||
|
||||
/usr/include/boost/current_function.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
|
||||
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp:
|
||||
|
||||
/usr/include/c++/4.5/typeinfo:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
|
||||
|
||||
/usr/include/c++/4.5/functional:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
|
||||
|
||||
/usr/include/boost/memory_order.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
|
||||
|
||||
/usr/include/boost/scoped_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
||||
|
||||
/home/germann/code/moses/master/mosesdecoder/moses/generic/sampling/Sampling.h:
|
2407
moses/mm/build/x86_64/3/ug_ttrack_base.d
Normal file
2407
moses/mm/build/x86_64/3/ug_ttrack_base.d
Normal file
File diff suppressed because it is too large
Load Diff
546
moses/mm/build/x86_64/3/ug_ttrack_position.d
Normal file
546
moses/mm/build/x86_64/3/ug_ttrack_position.d
Normal file
@ -0,0 +1,546 @@
|
||||
build/x86_64/3/ug_ttrack_position.o: \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.cc \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h \
|
||||
/usr/include/c++/4.5/cassert /usr/include/assert.h \
|
||||
/usr/include/features.h /usr/include/sys/cdefs.h \
|
||||
/usr/include/bits/wordsize.h /usr/include/gnu/stubs.h \
|
||||
/usr/include/gnu/stubs-64.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h \
|
||||
/usr/include/boost/dynamic_bitset.hpp \
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp \
|
||||
/usr/include/c++/4.5/string \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h \
|
||||
/usr/include/c++/4.5/bits/stringfwd.h \
|
||||
/usr/include/c++/4.5/bits/char_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h /usr/include/c++/4.5/cstddef \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h \
|
||||
/usr/include/c++/4.5/bits/functexcept.h \
|
||||
/usr/include/c++/4.5/exception_defines.h \
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h \
|
||||
/usr/include/c++/4.5/ext/type_traits.h \
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h \
|
||||
/usr/include/c++/4.5/bits/stl_pair.h /usr/include/c++/4.5/bits/move.h \
|
||||
/usr/include/c++/4.5/bits/concept_check.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h \
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h \
|
||||
/usr/include/c++/4.5/debug/debug.h /usr/include/c++/4.5/bits/postypes.h \
|
||||
/usr/include/c++/4.5/cwchar /usr/include/wchar.h /usr/include/stdio.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h \
|
||||
/usr/include/bits/wchar.h /usr/include/xlocale.h \
|
||||
/usr/include/c++/4.5/bits/allocator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h \
|
||||
/usr/include/c++/4.5/ext/new_allocator.h /usr/include/c++/4.5/new \
|
||||
/usr/include/c++/4.5/exception /usr/include/c++/4.5/bits/localefwd.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h \
|
||||
/usr/include/c++/4.5/clocale /usr/include/locale.h \
|
||||
/usr/include/bits/locale.h /usr/include/c++/4.5/iosfwd \
|
||||
/usr/include/c++/4.5/cctype /usr/include/ctype.h \
|
||||
/usr/include/bits/types.h /usr/include/bits/typesizes.h \
|
||||
/usr/include/endian.h /usr/include/bits/endian.h \
|
||||
/usr/include/bits/byteswap.h /usr/include/c++/4.5/bits/ostream_insert.h \
|
||||
/usr/include/c++/4.5/cxxabi-forced.h \
|
||||
/usr/include/c++/4.5/bits/stl_function.h \
|
||||
/usr/include/c++/4.5/backward/binders.h \
|
||||
/usr/include/c++/4.5/bits/basic_string.h \
|
||||
/usr/include/c++/4.5/ext/atomicity.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h \
|
||||
/usr/include/pthread.h /usr/include/sched.h /usr/include/time.h \
|
||||
/usr/include/bits/sched.h /usr/include/bits/time.h /usr/include/signal.h \
|
||||
/usr/include/bits/sigset.h /usr/include/bits/pthreadtypes.h \
|
||||
/usr/include/bits/setjmp.h /usr/include/unistd.h \
|
||||
/usr/include/bits/posix_opt.h /usr/include/bits/environments.h \
|
||||
/usr/include/bits/confname.h /usr/include/getopt.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h \
|
||||
/usr/include/c++/4.5/initializer_list \
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc \
|
||||
/usr/include/c++/4.5/stdexcept /usr/include/c++/4.5/algorithm \
|
||||
/usr/include/c++/4.5/utility /usr/include/c++/4.5/bits/stl_relops.h \
|
||||
/usr/include/c++/4.5/bits/stl_algo.h /usr/include/c++/4.5/cstdlib \
|
||||
/usr/include/stdlib.h /usr/include/bits/waitflags.h \
|
||||
/usr/include/bits/waitstatus.h /usr/include/sys/types.h \
|
||||
/usr/include/sys/select.h /usr/include/bits/select.h \
|
||||
/usr/include/sys/sysmacros.h /usr/include/alloca.h \
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h \
|
||||
/usr/include/c++/4.5/bits/stl_heap.h \
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h \
|
||||
/usr/include/c++/4.5/bits/stl_construct.h \
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h \
|
||||
/usr/include/c++/4.5/vector /usr/include/c++/4.5/bits/stl_vector.h \
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h \
|
||||
/usr/include/c++/4.5/bits/vector.tcc /usr/include/c++/4.5/climits \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h \
|
||||
/usr/include/limits.h /usr/include/bits/posix1_lim.h \
|
||||
/usr/include/bits/local_lim.h /usr/include/linux/limits.h \
|
||||
/usr/include/bits/posix2_lim.h /usr/include/bits/xopen_lim.h \
|
||||
/usr/include/bits/stdio_lim.h \
|
||||
/usr/include/boost/dynamic_bitset/config.hpp \
|
||||
/usr/include/boost/config.hpp /usr/include/boost/config/user.hpp \
|
||||
/usr/include/boost/config/select_compiler_config.hpp \
|
||||
/usr/include/boost/config/compiler/gcc.hpp \
|
||||
/usr/include/boost/config/select_stdlib_config.hpp \
|
||||
/usr/include/boost/config/no_tr1/utility.hpp \
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp \
|
||||
/usr/include/boost/config/select_platform_config.hpp \
|
||||
/usr/include/boost/config/platform/linux.hpp \
|
||||
/usr/include/boost/config/posix_features.hpp \
|
||||
/usr/include/boost/config/suffix.hpp \
|
||||
/usr/include/boost/detail/workaround.hpp /usr/include/c++/4.5/locale \
|
||||
/usr/include/c++/4.5/bits/locale_classes.h \
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets.h /usr/include/c++/4.5/cwctype \
|
||||
/usr/include/wctype.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h \
|
||||
/usr/include/c++/4.5/bits/ios_base.h /usr/include/c++/4.5/streambuf \
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc \
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h \
|
||||
/usr/include/c++/4.5/ctime \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h \
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h \
|
||||
/usr/include/libintl.h /usr/include/c++/4.5/bits/codecvt.h \
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc \
|
||||
/usr/include/c++/4.5/istream /usr/include/c++/4.5/ios \
|
||||
/usr/include/c++/4.5/bits/basic_ios.h \
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc /usr/include/c++/4.5/ostream \
|
||||
/usr/include/c++/4.5/bits/ostream.tcc \
|
||||
/usr/include/c++/4.5/bits/istream.tcc \
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp /usr/include/c++/4.5/memory \
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h \
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h \
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp \
|
||||
/usr/include/boost/detail/iterator.hpp /usr/include/c++/4.5/iterator \
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h \
|
||||
/usr/include/boost/static_assert.hpp /usr/include/boost/limits.hpp \
|
||||
/usr/include/c++/4.5/limits /usr/include/boost/pending/lowest_bit.hpp \
|
||||
/usr/include/boost/pending/integer_log2.hpp \
|
||||
/usr/include/boost/shared_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp \
|
||||
/usr/include/boost/config/no_tr1/memory.hpp \
|
||||
/usr/include/boost/assert.hpp /usr/include/boost/checked_delete.hpp \
|
||||
/usr/include/boost/throw_exception.hpp \
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp \
|
||||
/usr/include/boost/exception/exception.hpp \
|
||||
/usr/include/boost/current_function.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp \
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp /usr/include/c++/4.5/typeinfo \
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
|
||||
/usr/include/c++/4.5/functional \
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp \
|
||||
/usr/include/boost/memory_order.hpp \
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp \
|
||||
/usr/include/boost/scoped_ptr.hpp \
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp \
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h \
|
||||
/usr/include/stdint.h \
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_ttrack_position.h:
|
||||
|
||||
/usr/include/c++/4.5/cassert:
|
||||
|
||||
/usr/include/assert.h:
|
||||
|
||||
/usr/include/features.h:
|
||||
|
||||
/usr/include/sys/cdefs.h:
|
||||
|
||||
/usr/include/bits/wordsize.h:
|
||||
|
||||
/usr/include/gnu/stubs.h:
|
||||
|
||||
/usr/include/gnu/stubs-64.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/ug_typedefs.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/c++/4.5/string:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++config.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/os_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/cpu_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stringfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/char_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algobase.h:
|
||||
|
||||
/usr/include/c++/4.5/cstddef:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stddef.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/functexcept.h:
|
||||
|
||||
/usr/include/c++/4.5/exception_defines.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/cpp_type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/type_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/numeric_traits.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_pair.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/move.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/concept_check.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_types.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator_base_funcs.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/debug/debug.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/postypes.h:
|
||||
|
||||
/usr/include/c++/4.5/cwchar:
|
||||
|
||||
/usr/include/wchar.h:
|
||||
|
||||
/usr/include/stdio.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdarg.h:
|
||||
|
||||
/usr/include/bits/wchar.h:
|
||||
|
||||
/usr/include/xlocale.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/new_allocator.h:
|
||||
|
||||
/usr/include/c++/4.5/new:
|
||||
|
||||
/usr/include/c++/4.5/exception:
|
||||
|
||||
/usr/include/c++/4.5/bits/localefwd.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/c++locale.h:
|
||||
|
||||
/usr/include/c++/4.5/clocale:
|
||||
|
||||
/usr/include/locale.h:
|
||||
|
||||
/usr/include/bits/locale.h:
|
||||
|
||||
/usr/include/c++/4.5/iosfwd:
|
||||
|
||||
/usr/include/c++/4.5/cctype:
|
||||
|
||||
/usr/include/ctype.h:
|
||||
|
||||
/usr/include/bits/types.h:
|
||||
|
||||
/usr/include/bits/typesizes.h:
|
||||
|
||||
/usr/include/endian.h:
|
||||
|
||||
/usr/include/bits/endian.h:
|
||||
|
||||
/usr/include/bits/byteswap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream_insert.h:
|
||||
|
||||
/usr/include/c++/4.5/cxxabi-forced.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_function.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/binders.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.h:
|
||||
|
||||
/usr/include/c++/4.5/ext/atomicity.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/gthr-default.h:
|
||||
|
||||
/usr/include/pthread.h:
|
||||
|
||||
/usr/include/sched.h:
|
||||
|
||||
/usr/include/time.h:
|
||||
|
||||
/usr/include/bits/sched.h:
|
||||
|
||||
/usr/include/bits/time.h:
|
||||
|
||||
/usr/include/signal.h:
|
||||
|
||||
/usr/include/bits/sigset.h:
|
||||
|
||||
/usr/include/bits/pthreadtypes.h:
|
||||
|
||||
/usr/include/bits/setjmp.h:
|
||||
|
||||
/usr/include/unistd.h:
|
||||
|
||||
/usr/include/bits/posix_opt.h:
|
||||
|
||||
/usr/include/bits/environments.h:
|
||||
|
||||
/usr/include/bits/confname.h:
|
||||
|
||||
/usr/include/getopt.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/atomic_word.h:
|
||||
|
||||
/usr/include/c++/4.5/initializer_list:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_string.tcc:
|
||||
|
||||
/usr/include/c++/4.5/stdexcept:
|
||||
|
||||
/usr/include/c++/4.5/algorithm:
|
||||
|
||||
/usr/include/c++/4.5/utility:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_relops.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_algo.h:
|
||||
|
||||
/usr/include/c++/4.5/cstdlib:
|
||||
|
||||
/usr/include/stdlib.h:
|
||||
|
||||
/usr/include/bits/waitflags.h:
|
||||
|
||||
/usr/include/bits/waitstatus.h:
|
||||
|
||||
/usr/include/sys/types.h:
|
||||
|
||||
/usr/include/sys/select.h:
|
||||
|
||||
/usr/include/bits/select.h:
|
||||
|
||||
/usr/include/sys/sysmacros.h:
|
||||
|
||||
/usr/include/alloca.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/algorithmfwd.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_heap.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_tempbuf.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_construct.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_uninitialized.h:
|
||||
|
||||
/usr/include/c++/4.5/vector:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_vector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_bvector.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/vector.tcc:
|
||||
|
||||
/usr/include/c++/4.5/climits:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/limits.h:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include-fixed/syslimits.h:
|
||||
|
||||
/usr/include/limits.h:
|
||||
|
||||
/usr/include/bits/posix1_lim.h:
|
||||
|
||||
/usr/include/bits/local_lim.h:
|
||||
|
||||
/usr/include/linux/limits.h:
|
||||
|
||||
/usr/include/bits/posix2_lim.h:
|
||||
|
||||
/usr/include/bits/xopen_lim.h:
|
||||
|
||||
/usr/include/bits/stdio_lim.h:
|
||||
|
||||
/usr/include/boost/dynamic_bitset/config.hpp:
|
||||
|
||||
/usr/include/boost/config.hpp:
|
||||
|
||||
/usr/include/boost/config/user.hpp:
|
||||
|
||||
/usr/include/boost/config/select_compiler_config.hpp:
|
||||
|
||||
/usr/include/boost/config/compiler/gcc.hpp:
|
||||
|
||||
/usr/include/boost/config/select_stdlib_config.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/utility.hpp:
|
||||
|
||||
/usr/include/boost/config/stdlib/libstdcpp3.hpp:
|
||||
|
||||
/usr/include/boost/config/select_platform_config.hpp:
|
||||
|
||||
/usr/include/boost/config/platform/linux.hpp:
|
||||
|
||||
/usr/include/boost/config/posix_features.hpp:
|
||||
|
||||
/usr/include/boost/config/suffix.hpp:
|
||||
|
||||
/usr/include/boost/detail/workaround.hpp:
|
||||
|
||||
/usr/include/c++/4.5/locale:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_classes.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.h:
|
||||
|
||||
/usr/include/c++/4.5/cwctype:
|
||||
|
||||
/usr/include/wctype.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_base.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/ios_base.h:
|
||||
|
||||
/usr/include/c++/4.5/streambuf:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/streambuf_iterator.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/ctype_inline.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.h:
|
||||
|
||||
/usr/include/c++/4.5/ctime:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/time_members.h:
|
||||
|
||||
/usr/include/c++/4.5/x86_64-suse-linux/bits/messages_members.h:
|
||||
|
||||
/usr/include/libintl.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/codecvt.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/locale_facets_nonio.tcc:
|
||||
|
||||
/usr/include/c++/4.5/istream:
|
||||
|
||||
/usr/include/c++/4.5/ios:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.h:
|
||||
|
||||
/usr/include/c++/4.5/bits/basic_ios.tcc:
|
||||
|
||||
/usr/include/c++/4.5/ostream:
|
||||
|
||||
/usr/include/c++/4.5/bits/ostream.tcc:
|
||||
|
||||
/usr/include/c++/4.5/bits/istream.tcc:
|
||||
|
||||
/usr/include/boost/dynamic_bitset_fwd.hpp:
|
||||
|
||||
/usr/include/c++/4.5/memory:
|
||||
|
||||
/usr/include/c++/4.5/bits/stl_raw_storage_iter.h:
|
||||
|
||||
/usr/include/c++/4.5/backward/auto_ptr.h:
|
||||
|
||||
/usr/include/boost/detail/dynamic_bitset.hpp:
|
||||
|
||||
/usr/include/boost/detail/iterator.hpp:
|
||||
|
||||
/usr/include/c++/4.5/iterator:
|
||||
|
||||
/usr/include/c++/4.5/bits/stream_iterator.h:
|
||||
|
||||
/usr/include/boost/static_assert.hpp:
|
||||
|
||||
/usr/include/boost/limits.hpp:
|
||||
|
||||
/usr/include/c++/4.5/limits:
|
||||
|
||||
/usr/include/boost/pending/lowest_bit.hpp:
|
||||
|
||||
/usr/include/boost/pending/integer_log2.hpp:
|
||||
|
||||
/usr/include/boost/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/shared_ptr.hpp:
|
||||
|
||||
/usr/include/boost/config/no_tr1/memory.hpp:
|
||||
|
||||
/usr/include/boost/assert.hpp:
|
||||
|
||||
/usr/include/boost/checked_delete.hpp:
|
||||
|
||||
/usr/include/boost/throw_exception.hpp:
|
||||
|
||||
/usr/include/boost/exception/detail/attribute_noreturn.hpp:
|
||||
|
||||
/usr/include/boost/exception/exception.hpp:
|
||||
|
||||
/usr/include/boost/current_function.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/shared_count.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/bad_weak_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_has_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp:
|
||||
|
||||
/usr/include/boost/detail/sp_typeinfo.hpp:
|
||||
|
||||
/usr/include/c++/4.5/typeinfo:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_counted_impl.hpp:
|
||||
|
||||
/usr/include/c++/4.5/functional:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/sp_convertible.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_pool.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/spinlock_sync.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/yield_k.hpp:
|
||||
|
||||
/usr/include/boost/memory_order.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/detail/operator_bool.hpp:
|
||||
|
||||
/usr/include/boost/scoped_ptr.hpp:
|
||||
|
||||
/usr/include/boost/smart_ptr/scoped_ptr.hpp:
|
||||
|
||||
/usr/lib64/gcc/x86_64-suse-linux/4.5/include/stdint.h:
|
||||
|
||||
/usr/include/stdint.h:
|
||||
|
||||
/mnt/thor7/germann/code/moses/master/mosesdecoder/moses/mm/tpt_typedefs.h:
|
187
moses/mm/custom-pt.cc
Normal file
187
moses/mm/custom-pt.cc
Normal file
@ -0,0 +1,187 @@
|
||||
// build a phrase table for the given input
|
||||
// #include "ug_lexical_phrase_scorer2.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <iomanip>
|
||||
#include <algorithm>
|
||||
|
||||
#include "moses/generic/sorting/VectorIndexSorter.h"
|
||||
#include "moses/generic/sampling/Sampling.h"
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
|
||||
#include <boost/math/distributions/binomial.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "ug_mm_tsa.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_corpus_token.h"
|
||||
#include "ug_typedefs.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_bitext.h"
|
||||
#include "ug_lexical_phrase_scorer2.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
using namespace Moses;
|
||||
using namespace Moses::bitext;
|
||||
|
||||
#define CACHING_THRESHOLD 1000
|
||||
#define lbop boost::math::binomial_distribution<>::find_lower_bound_on_p
|
||||
size_t mctr=0,xctr=0;
|
||||
|
||||
typedef L2R_Token<SimpleWordId> Token;
|
||||
typedef mmBitext<Token> mmbitext;
|
||||
mmbitext bt;
|
||||
|
||||
|
||||
float lbsmooth = .005;
|
||||
|
||||
|
||||
PScorePfwd<Token> calc_pfwd;
|
||||
PScorePbwd<Token> calc_pbwd;
|
||||
PScoreLex<Token> calc_lex;
|
||||
PScoreWP<Token> apply_wp;
|
||||
vector<float> fweights;
|
||||
|
||||
void
|
||||
nbest_phrasepairs(uint64_t const pid1,
|
||||
pstats const& ps,
|
||||
vector<PhrasePair> & nbest)
|
||||
{
|
||||
boost::unordered_map<uint64_t,jstats>::const_iterator m;
|
||||
vector<size_t> idx(nbest.size());
|
||||
size_t i=0;
|
||||
for (m = ps.trg.begin();
|
||||
m != ps.trg.end() && i < nbest.size();
|
||||
++m)
|
||||
{
|
||||
// cout << m->second.rcnt() << " " << ps.good << endl;
|
||||
if ((m->second.rcnt() < 3) && (m->second.rcnt() * 100 < ps.good))
|
||||
continue;
|
||||
nbest[i].init(pid1,ps,5);
|
||||
nbest[i].update(m->first,m->second);
|
||||
calc_pfwd(bt, nbest[i]);
|
||||
calc_pbwd(bt, nbest[i]);
|
||||
calc_lex(bt, nbest[i]);
|
||||
apply_wp(bt, nbest[i]);
|
||||
nbest[i].eval(fweights);
|
||||
idx[i] = i;
|
||||
++i;
|
||||
}
|
||||
// cout << i << " " << nbest.size() << endl;
|
||||
if (i < nbest.size())
|
||||
{
|
||||
// cout << "Resizing from " << nbest.size() << " to " << i << endl;
|
||||
nbest.resize(i);
|
||||
idx.resize(i);
|
||||
}
|
||||
VectorIndexSorter<PhrasePair> sorter(nbest,greater<PhrasePair>());
|
||||
if (m != ps.trg.end())
|
||||
{
|
||||
make_heap(idx.begin(),idx.end(),sorter);
|
||||
PhrasePair cand;
|
||||
cand.init(pid1,ps,5);
|
||||
for (; m != ps.trg.end(); ++m)
|
||||
{
|
||||
if ((m->second.rcnt() < 3) && (m->second.rcnt() * 100 < ps.good))
|
||||
continue;
|
||||
cand.update(m->first,m->second);
|
||||
calc_pfwd(bt, cand);
|
||||
calc_pbwd(bt, cand);
|
||||
calc_lex(bt, cand);
|
||||
apply_wp(bt, cand);
|
||||
cand.eval(fweights);
|
||||
if (cand < nbest[idx[0]]) continue;
|
||||
pop_heap(idx.begin(),idx.end(),sorter);
|
||||
nbest[idx.back()] = cand;
|
||||
push_heap(idx.begin(),idx.end(),sorter);
|
||||
}
|
||||
}
|
||||
sort(nbest.begin(),nbest.end(),greater<PhrasePair>());
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
// assert(argc == 4);
|
||||
#if 0
|
||||
string base = argv[1];
|
||||
string L1 = argv[2];
|
||||
string L2 = argv[3];
|
||||
size_t max_samples = argc > 4 ? atoi(argv[4]) : 0;
|
||||
#else
|
||||
string base = "/fs/syn5/germann/exp/sapt/crp/trn/mm/";
|
||||
string L1 = "de";
|
||||
string L2 = "en";
|
||||
size_t max_samples = argc > 1 ? atoi(argv[1]) : 1000;
|
||||
#endif
|
||||
char c = *base.rbegin();
|
||||
if (c != '/' && c != '.')
|
||||
base += ".";
|
||||
|
||||
fweights.resize(5,.25);
|
||||
fweights[0] = 1;
|
||||
bt.open(base,L1,L2);
|
||||
bt.setDefaultSampleSize(max_samples);
|
||||
|
||||
size_t i;
|
||||
i = calc_pfwd.init(0,.05);
|
||||
i = calc_pbwd.init(i,.05);
|
||||
i = calc_lex.init(i,base+L1+"-"+L2+".lex");
|
||||
i = apply_wp.init(i);
|
||||
|
||||
string line;
|
||||
while (getline(cin,line))
|
||||
{
|
||||
vector<id_type> snt;
|
||||
bt.V1->fillIdSeq(line,snt);
|
||||
for (size_t i = 0; i < snt.size(); ++i)
|
||||
{
|
||||
TSA<Token>::tree_iterator m(bt.I1.get());
|
||||
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
|
||||
bt.prep(m);
|
||||
}
|
||||
// continue;
|
||||
for (size_t i = 0; i < snt.size(); ++i)
|
||||
{
|
||||
TSA<Token>::tree_iterator m(bt.I1.get());
|
||||
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
|
||||
{
|
||||
uint64_t spid = m.getPid();
|
||||
sptr<pstats> s = bt.lookup(m);
|
||||
for (size_t j = i; j <= k; ++j)
|
||||
cout << (*bt.V1)[snt[j]] << " ";
|
||||
cout << s->good << "/"
|
||||
<< s->sample_cnt << "/"
|
||||
<< s->raw_cnt << endl;
|
||||
// vector<PhrasePair> nbest(min(s->trg.size(),size_t(20)));
|
||||
vector<PhrasePair> nbest(s->trg.size());
|
||||
nbest_phrasepairs(spid, *s, nbest);
|
||||
BOOST_FOREACH(PhrasePair const& pp, nbest)
|
||||
{
|
||||
uint32_t sid,off,len;
|
||||
parse_pid(pp.p2,sid,off,len);
|
||||
uint32_t stop = off + len;
|
||||
// cout << sid << " " << off << " " << len << endl;
|
||||
Token const* o = bt.T2->sntStart(sid);
|
||||
cout << " " << setw(6) << pp.score << " ";
|
||||
for (uint32_t i = off; i < stop; ++i)
|
||||
cout << (*bt.V2)[o[i].id()] << " ";
|
||||
cout << pp.joint << "/"
|
||||
<< pp.raw1 << "/"
|
||||
<< pp.raw2 << " |";
|
||||
BOOST_FOREACH(float f, pp.fvals)
|
||||
cout << " " << f;
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exit(0);
|
||||
}
|
||||
|
192
moses/mm/mmlex-build.cc
Normal file
192
moses/mm/mmlex-build.cc
Normal file
@ -0,0 +1,192 @@
|
||||
// -*- c++ -*-
|
||||
// Program to extract word cooccurrence counts from a memory-mapped word-aligned bitext
|
||||
// stores the counts lexicon in the format for mm2dTable<uint32_t> (ug_mm_2d_table.h)
|
||||
// (c) 2010-2012 Ulrich Germann
|
||||
|
||||
#include <queue>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <iterator>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/math/distributions/binomial.hpp>
|
||||
|
||||
#include "moses/generic/program_options/ug_get_options.h"
|
||||
// #include "ug_translation_finder.h"
|
||||
// #include "ug_sorters.h"
|
||||
// #include "ug_corpus_sampling.h"
|
||||
#include "ug_mm_2d_table.h"
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "ug_corpus_token.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
using namespace boost::math;
|
||||
|
||||
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> LEX_t;
|
||||
typedef SimpleWordId Token;
|
||||
|
||||
vector<uint32_t> m1; // marginals L1
|
||||
vector<uint32_t> m2; // marginals L2
|
||||
|
||||
id_type first_rare_id=500;
|
||||
vector<vector<uint32_t> > JFREQ; // joint count table for frequent L1 words
|
||||
vector<map<id_type,uint32_t> > JRARE; // joint count table for rare L1 words
|
||||
|
||||
mmTtrack<Token> T1,T2;
|
||||
mmTtrack<char> Tx;
|
||||
TokenIndex V1,V2;
|
||||
|
||||
string bname,cfgFile,L1,L2,oname;
|
||||
|
||||
// DECLARATIONS
|
||||
void interpret_args(int ac, char* av[]);
|
||||
|
||||
void
|
||||
processSentence(id_type sid)
|
||||
{
|
||||
Token const* s1 = T1.sntStart(sid);
|
||||
Token const* s2 = T2.sntStart(sid);
|
||||
char const* p = Tx.sntStart(sid);
|
||||
char const* q = Tx.sntEnd(sid);
|
||||
ushort r,c;
|
||||
bitvector check1(T1.sntLen(sid)), check2(T2.sntLen(sid));
|
||||
check1.set();
|
||||
check2.set();
|
||||
|
||||
// count links
|
||||
while (p < q)
|
||||
{
|
||||
p = binread(p,r);
|
||||
p = binread(p,c);
|
||||
check1.reset(r);
|
||||
check2.reset(c);
|
||||
id_type id1 = (s1+r)->id();
|
||||
if (id1 < first_rare_id) JFREQ[id1][(s2+c)->id()]++;
|
||||
else JRARE[id1][(s2+c)->id()]++;
|
||||
}
|
||||
|
||||
// count unaliged words
|
||||
for (size_t i = check1.find_first(); i < check1.size(); i = check1.find_next(i))
|
||||
{
|
||||
id_type id1 = (s1+i)->id();
|
||||
if (id1 < first_rare_id) JFREQ[id1][0]++;
|
||||
else JRARE[id1][0]++;
|
||||
}
|
||||
for (size_t i = check2.find_first(); i < check2.size(); i = check2.find_next(i))
|
||||
JFREQ[0][(s2+i)->id()]++;
|
||||
}
|
||||
|
||||
void
|
||||
makeTable(string ofname)
|
||||
{
|
||||
ofstream out(ofname.c_str());
|
||||
filepos_type idxOffset=0;
|
||||
m1.resize(max(first_rare_id,V1.getNumTokens()),0);
|
||||
m2.resize(V2.getNumTokens(),0);
|
||||
JFREQ.resize(first_rare_id,vector<uint32_t>(m2.size(),0));
|
||||
JRARE.resize(m1.size());
|
||||
for (size_t sid = 0; sid < T1.size(); ++sid)
|
||||
processSentence(sid);
|
||||
|
||||
vector<id_type> index(V1.getNumTokens()+1,0);
|
||||
numwrite(out,idxOffset); // blank for the time being
|
||||
numwrite(out,id_type(m1.size()));
|
||||
numwrite(out,id_type(m2.size()));
|
||||
|
||||
id_type cellCount=0;
|
||||
id_type stop = min(first_rare_id,id_type(m1.size()));
|
||||
for (id_type id1 = 0; id1 < stop; ++id1)
|
||||
{
|
||||
index[id1] = cellCount;
|
||||
vector<uint32_t> const& v = JFREQ[id1];
|
||||
for (id_type id2 = 0; id2 < id_type(v.size()); ++id2)
|
||||
{
|
||||
if (!v[id2]) continue;
|
||||
cellCount++;
|
||||
numwrite(out,id2);
|
||||
out.write(reinterpret_cast<char const*>(&v[id2]),sizeof(uint32_t));
|
||||
m1[id1] += v[id2];
|
||||
m2[id2] += v[id2];
|
||||
}
|
||||
}
|
||||
for (id_type id1 = stop; id1 < id_type(m1.size()); ++id1)
|
||||
{
|
||||
index[id1] = cellCount;
|
||||
map<id_type,uint32_t> const& M = JRARE[id1];
|
||||
for (map<id_type,uint32_t>::const_iterator m = M.begin(); m != M.end(); ++m)
|
||||
{
|
||||
if (m->second == 0) continue;
|
||||
cellCount++;
|
||||
numwrite(out,m->first);
|
||||
out.write(reinterpret_cast<char const*>(&m->second),sizeof(float));
|
||||
m1[id1] += m->second;
|
||||
m2[m->first] += m->second;
|
||||
}
|
||||
}
|
||||
index[m1.size()] = cellCount;
|
||||
idxOffset = out.tellp();
|
||||
for (size_t i = 0; i < index.size(); ++i)
|
||||
numwrite(out,index[i]);
|
||||
out.write(reinterpret_cast<char const*>(&m1[0]),m1.size()*sizeof(float));
|
||||
out.write(reinterpret_cast<char const*>(&m2[0]),m2.size()*sizeof(float));
|
||||
|
||||
// re-write the file header
|
||||
out.seekp(0);
|
||||
numwrite(out,idxOffset);
|
||||
out.close();
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char* argv[])
|
||||
{
|
||||
interpret_args(argc,argv);
|
||||
char c = *bname.rbegin();
|
||||
if (c != '/' && c != '.') bname += '.';
|
||||
T1.open(bname+L1+".mct");
|
||||
T2.open(bname+L2+".mct");
|
||||
Tx.open(bname+L1+"-"+L2+".mam");
|
||||
V1.open(bname+L1+".tdx");
|
||||
V2.open(bname+L2+".tdx");
|
||||
makeTable(oname);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void
|
||||
interpret_args(int ac, char* av[])
|
||||
{
|
||||
namespace po=boost::program_options;
|
||||
po::variables_map vm;
|
||||
po::options_description o("Options");
|
||||
po::options_description h("Hidden Options");
|
||||
po::positional_options_description a;
|
||||
|
||||
o.add_options()
|
||||
("help,h", "print this message")
|
||||
("cfg,f", po::value<string>(&cfgFile),"config file")
|
||||
("oname,o", po::value<string>(&oname),"output file name")
|
||||
;
|
||||
|
||||
h.add_options()
|
||||
("bname", po::value<string>(&bname), "base name")
|
||||
("L1", po::value<string>(&L1),"L1 tag")
|
||||
("L2", po::value<string>(&L2),"L2 tag")
|
||||
;
|
||||
a.add("bname",1);
|
||||
a.add("L1",1);
|
||||
a.add("L2",1);
|
||||
get_options(ac,av,h.add(o),a,vm,"cfg");
|
||||
|
||||
if (vm.count("help") || bname.empty() || oname.empty())
|
||||
{
|
||||
cout << "usage:\n\t" << av[0] << " <basename> <L1 tag> <L2 tag> -o <output file>\n" << endl;
|
||||
cout << o << endl;
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
|
494
moses/mm/mtt-build.cc
Normal file
494
moses/mm/mtt-build.cc
Normal file
@ -0,0 +1,494 @@
|
||||
// -*- c++ -*-
|
||||
// Converts a corpus in text format (plain text, one centence per line) or
|
||||
// conll format or treetagger output format (which one is automatically
|
||||
// recognized based on the number of fields per line) into memory-mapped
|
||||
// format. (c) 2007-2013 Ulrich Germann
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/program_options/options_description.hpp>
|
||||
#include <boost/program_options/parsers.hpp>
|
||||
#include <boost/program_options/variables_map.hpp>
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <iomanip>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
|
||||
#include "ug_conll_record.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_deptree.h"
|
||||
#include "moses/generic/sorting/VectorIndexSorter.h"
|
||||
#include "ug_im_tsa.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
using namespace Moses;
|
||||
namespace po=boost::program_options;
|
||||
|
||||
int with_pfas;
|
||||
int with_dcas;
|
||||
int with_sfas;
|
||||
|
||||
bool incremental = false; // build / grow vocabs automatically
|
||||
bool is_conll = false; // text or conll format?
|
||||
bool quiet = false; // no progress reporting
|
||||
|
||||
string vocabBase; // base name for existing vocabs that should be used
|
||||
string baseName; // base name for all files
|
||||
string tmpFile, mttFile; /* name of temporary / actual track file
|
||||
* (.mtt for Conll format, .mct for plain text)
|
||||
*/
|
||||
string UNK;
|
||||
|
||||
TokenIndex SF; // surface form
|
||||
TokenIndex LM; // lemma
|
||||
TokenIndex PS; // part of speech
|
||||
TokenIndex DT; // dependency type
|
||||
|
||||
void interpret_args(int ac, char* av[]);
|
||||
|
||||
inline uchar rangeCheck(int p, int limit) { return p < limit ? p : 1; }
|
||||
|
||||
id_type
|
||||
get_id(TokenIndex const& T, string const& w)
|
||||
{
|
||||
id_type ret = T[w];
|
||||
if (ret == 1 && w != UNK)
|
||||
{
|
||||
cerr << "Warning! Unkown vocabulary item '" << w << "', but "
|
||||
<< "incremental mode (-i) is not set." << endl;
|
||||
assert(0);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
open_vocab(TokenIndex& T, string fname)
|
||||
{
|
||||
if (!access(fname.c_str(), F_OK))
|
||||
{
|
||||
T.open(fname,UNK);
|
||||
assert(T[UNK] == 1);
|
||||
}
|
||||
else T.setUnkLabel(UNK);
|
||||
if (incremental) T.setDynamic(true);
|
||||
assert(T["NULL"] == 0);
|
||||
assert(T[UNK] == 1);
|
||||
}
|
||||
|
||||
void
|
||||
ini_cnt_vec(TokenIndex const& T, vector<pair<string,size_t> > & v)
|
||||
{
|
||||
v.resize(T.totalVocabSize());
|
||||
for (size_t i = 0; i < T.totalVocabSize(); ++i)
|
||||
{
|
||||
v[i].first = T[i];
|
||||
v[i].second = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
write_tokenindex(string fname, TokenIndex& T, vector<id_type> const& n2o)
|
||||
{
|
||||
if (!quiet) cerr << "Writing " << fname << endl;
|
||||
vector<id_type> o2n(n2o.size());
|
||||
for (id_type i = 0; i < n2o.size(); ++i) o2n[n2o[i]] = i;
|
||||
vector<pair<string,uint32_t> > v(n2o.size());
|
||||
for (id_type i = 0; i < n2o.size(); ++i)
|
||||
{
|
||||
v[i].first = T[n2o[i]];
|
||||
v[i].second = i;
|
||||
}
|
||||
T.close();
|
||||
sort(v.begin(),v.end());
|
||||
write_tokenindex_to_disk(v, fname, UNK);
|
||||
}
|
||||
|
||||
void init(int argc, char* argv[])
|
||||
{
|
||||
interpret_args(argc,argv);
|
||||
if (is_conll)
|
||||
{
|
||||
open_vocab(SF, vocabBase+".tdx.sfo"); // surface form
|
||||
open_vocab(LM, vocabBase+".tdx.lem"); // lemma
|
||||
open_vocab(PS, vocabBase+".tdx.pos"); // part-of-speech
|
||||
open_vocab(DT, vocabBase+".tdx.drl"); // dependency type
|
||||
}
|
||||
else open_vocab(SF, vocabBase+".tdx"); // surface form
|
||||
}
|
||||
|
||||
void fill_rec(Conll_Record& rec, vector<string> const& w)
|
||||
{
|
||||
if (w.size() == 3) // treetagger output
|
||||
{
|
||||
rec.sform = get_id(SF, w[0]);
|
||||
rec.lemma = get_id(LM, w[2] == "<UNKNOWN>" ? w[0] : w[2]);
|
||||
rec.majpos = rangeCheck(get_id(PS, w[1]), 256);
|
||||
rec.minpos = rangeCheck(get_id(PS, w[1]), 256);
|
||||
rec.dtype = 0;
|
||||
rec.parent = -1;
|
||||
}
|
||||
else if (w.size() >= 8) // CONLL format
|
||||
{
|
||||
int id = atoi(w[0].c_str());
|
||||
int gov = atoi(w[6].c_str());
|
||||
rec.sform = get_id(SF, w[1]);
|
||||
rec.lemma = get_id(LM, w[2]);
|
||||
rec.majpos = rangeCheck(get_id(PS, w[3]), 256);
|
||||
rec.minpos = rangeCheck(get_id(PS, w[4]), 256);
|
||||
rec.dtype = get_id(DT, w[7]);
|
||||
rec.parent = gov ? gov - id : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void log_progress(size_t ctr)
|
||||
{
|
||||
if (ctr % 100000 == 0)
|
||||
{
|
||||
if (ctr) cerr << endl;
|
||||
cerr << setw(12) << ctr / 1000 << "K sentences processed ";
|
||||
}
|
||||
else if (ctr % 10000 == 0)
|
||||
{
|
||||
cerr << ".";
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
size_t
|
||||
process_plain_input(ostream& out, vector<id_type> & s_index)
|
||||
{
|
||||
id_type totalWords = 0;
|
||||
string line,w;
|
||||
while (getline(cin,line))
|
||||
{
|
||||
istringstream buf(line);
|
||||
if (!quiet) log_progress(s_index.size());
|
||||
s_index.push_back(totalWords);
|
||||
while (buf>>w)
|
||||
{
|
||||
numwrite(out,get_id(SF,w));
|
||||
++totalWords;
|
||||
}
|
||||
}
|
||||
s_index.push_back(totalWords);
|
||||
return totalWords;
|
||||
}
|
||||
|
||||
size_t
|
||||
process_tagged_input(ostream& out,
|
||||
vector<id_type> & s_index,
|
||||
vector<id_type> & p_index)
|
||||
{
|
||||
string line;
|
||||
Conll_Record rec;
|
||||
bool new_sent = true;
|
||||
bool new_par = true;
|
||||
id_type totalWords = 0;
|
||||
|
||||
while (getline(cin,line))
|
||||
{
|
||||
vector<string> w; string f; istringstream buf(line);
|
||||
while (buf>>f) w.push_back(f);
|
||||
|
||||
if (w.size() == 0 || (w[0].size() >= 4 && w[0].substr(0,4) == "SID="))
|
||||
new_sent = true;
|
||||
|
||||
else if (w.size() == 1 && w[0] == "<P>")
|
||||
new_par = new_sent = true;
|
||||
|
||||
if (w.size() < 3) continue;
|
||||
if (!quiet && new_sent) log_progress(s_index.size());
|
||||
if (new_sent) { s_index.push_back(totalWords); new_sent = false; }
|
||||
if (new_par) { p_index.push_back(totalWords); new_par = false; }
|
||||
fill_rec(rec,w);
|
||||
out.write(reinterpret_cast<char const*>(&rec),sizeof(rec));
|
||||
++totalWords;
|
||||
}
|
||||
s_index.push_back(totalWords);
|
||||
return totalWords;
|
||||
}
|
||||
|
||||
size_t
|
||||
numberize()
|
||||
{
|
||||
ofstream out(tmpFile.c_str());
|
||||
filepos_type startIdx=0;
|
||||
id_type idxSize=0,totalWords=0;
|
||||
numwrite(out,startIdx); // place holder, to be filled at the end
|
||||
numwrite(out,idxSize); // place holder, to be filled at the end
|
||||
numwrite(out,totalWords); // place holder, to be filled at the end
|
||||
|
||||
vector<id_type> s_index, p_index;
|
||||
|
||||
if(is_conll)
|
||||
totalWords = process_tagged_input(out,s_index,p_index);
|
||||
else
|
||||
totalWords = process_plain_input(out,s_index);
|
||||
|
||||
vector<id_type> const* index = &s_index;
|
||||
if (p_index.size() && p_index.back())
|
||||
{
|
||||
p_index.push_back(totalWords);
|
||||
index = &p_index;
|
||||
}
|
||||
|
||||
if (!quiet)
|
||||
cerr << endl << "Writing index ... (" << index->size() << " chunks) ";
|
||||
|
||||
startIdx = out.tellp();
|
||||
for (size_t i = 0; i < index->size(); i++) numwrite(out,(*index)[i]);
|
||||
out.seekp(0);
|
||||
idxSize = index->size();
|
||||
numwrite(out, startIdx);
|
||||
numwrite(out, idxSize - 1);
|
||||
numwrite(out, totalWords);
|
||||
out.close();
|
||||
if (!quiet) cerr << "done" << endl;
|
||||
return totalWords;
|
||||
}
|
||||
|
||||
vector<id_type> smap,lmap,pmap,dmap;
|
||||
|
||||
void
|
||||
invert(vector<id_type> const& from, vector<id_type> & to)
|
||||
{
|
||||
to.resize(from.size());
|
||||
for (size_t i = 0 ; i < to.size(); ++i)
|
||||
to[from[i]] = i;
|
||||
}
|
||||
|
||||
// sorts new items based on occurrence counts but won't reassign
|
||||
// existing token ids
|
||||
void
|
||||
conservative_sort(TokenIndex const & V,
|
||||
vector<size_t> const & cnt,
|
||||
vector<id_type> & xmap)
|
||||
{
|
||||
xmap.resize(V.totalVocabSize());
|
||||
for (size_t i = 0; i < xmap.size(); ++i) xmap[i] = i;
|
||||
VectorIndexSorter<size_t,greater<size_t>, id_type> sorter(cnt);
|
||||
sort(xmap.begin()+max(id_type(2),V.knownVocabSize()), xmap.end(), sorter);
|
||||
}
|
||||
|
||||
// reassign token ids in the corpus track based on the id map created by
|
||||
// conservative_sort
|
||||
void remap()
|
||||
{
|
||||
if (!quiet) cerr << "Remapping ids ... ";
|
||||
filepos_type idxOffset;
|
||||
id_type totalWords, idxSize;
|
||||
boost::iostreams::mapped_file mtt(tmpFile);
|
||||
char const* p = mtt.data();
|
||||
p = numread(p,idxOffset);
|
||||
p = numread(p,idxSize);
|
||||
p = numread(p,totalWords);
|
||||
if (is_conll)
|
||||
{
|
||||
vector<size_t> sf(SF.totalVocabSize(), 0);
|
||||
vector<size_t> lm(LM.totalVocabSize(), 0);
|
||||
vector<size_t> ps(PS.totalVocabSize(), 0);
|
||||
vector<size_t> dt(DT.totalVocabSize(), 0);
|
||||
Conll_Record* w = reinterpret_cast<Conll_Record*>(const_cast<char*>(p));
|
||||
for (size_t i = 0; i < totalWords; ++i)
|
||||
{
|
||||
++sf.at(w[i].sform);
|
||||
++lm.at(w[i].lemma);
|
||||
++ps.at(w[i].majpos);
|
||||
++ps.at(w[i].minpos);
|
||||
++dt.at(w[i].dtype);
|
||||
}
|
||||
conservative_sort(SF,sf,smap);
|
||||
conservative_sort(LM,lm,lmap);
|
||||
conservative_sort(PS,ps,pmap);
|
||||
conservative_sort(DT,dt,dmap);
|
||||
vector<id_type> smap_i(smap.size()); invert(smap,smap_i);
|
||||
vector<id_type> lmap_i(lmap.size()); invert(lmap,lmap_i);
|
||||
vector<id_type> pmap_i(pmap.size()); invert(pmap,pmap_i);
|
||||
vector<id_type> dmap_i(dmap.size()); invert(dmap,dmap_i);
|
||||
for (size_t i = 0; i < totalWords; ++i)
|
||||
{
|
||||
w[i].sform = smap_i[w[i].sform];
|
||||
w[i].lemma = lmap_i[w[i].lemma];
|
||||
w[i].majpos = pmap_i[w[i].majpos];
|
||||
w[i].minpos = pmap_i[w[i].minpos];
|
||||
w[i].dtype = dmap_i[w[i].dtype];
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
vector<size_t> sf(SF.totalVocabSize(), 0);
|
||||
id_type* w = reinterpret_cast<id_type*>(const_cast<char*>(p));
|
||||
for (size_t i = 0; i < totalWords; ++i) ++sf.at(w[i]);
|
||||
conservative_sort(SF,sf,smap);
|
||||
vector<id_type> smap_i(smap.size()); invert(smap,smap_i);
|
||||
for (size_t i = 0; i < totalWords; ++i) w[i] = smap_i[w[i]];
|
||||
}
|
||||
mtt.close();
|
||||
if (!quiet) cerr << "done." << endl;
|
||||
}
|
||||
|
||||
void save_vocabs()
|
||||
{
|
||||
string vbase = baseName;
|
||||
if (is_conll)
|
||||
{
|
||||
if (SF.totalVocabSize() > SF.knownVocabSize())
|
||||
write_tokenindex(vbase+".tdx.sfo",SF,smap);
|
||||
if (LM.totalVocabSize() > LM.knownVocabSize())
|
||||
write_tokenindex(vbase+".tdx.lem",LM,lmap);
|
||||
if (PS.totalVocabSize() > PS.knownVocabSize())
|
||||
write_tokenindex(vbase+".tdx.pos",PS,pmap);
|
||||
if (DT.totalVocabSize() > DT.knownVocabSize())
|
||||
write_tokenindex(vbase+".tdx.drl",DT,dmap);
|
||||
}
|
||||
else if (SF.totalVocabSize() > SF.knownVocabSize())
|
||||
write_tokenindex(vbase+".tdx",SF,smap);
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
size_t
|
||||
build_mmTSA(string infile, string outfile)
|
||||
{
|
||||
size_t mypid = fork();
|
||||
if(mypid) return mypid;
|
||||
mmTtrack<Token> T(infile);
|
||||
bdBitset filter;
|
||||
filter.resize(T.size(),true);
|
||||
imTSA<Token> S(&T,filter,(quiet?NULL:&cerr));
|
||||
S.save_as_mm_tsa(outfile);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
bool
|
||||
build_plaintext_tsas()
|
||||
{
|
||||
typedef L2R_Token<SimpleWordId> L2R;
|
||||
typedef R2L_Token<SimpleWordId> R2L;
|
||||
size_t c = with_sfas + with_pfas;
|
||||
if (with_sfas) build_mmTSA<L2R>(tmpFile, baseName + ".sfa");
|
||||
if (with_pfas) build_mmTSA<R2L>(tmpFile, baseName + ".pfa");
|
||||
while (c--) wait(NULL);
|
||||
return true;
|
||||
}
|
||||
|
||||
void build_conll_tsas()
|
||||
{
|
||||
string bn = baseName;
|
||||
string mtt = tmpFile;
|
||||
size_t c = 3 * (with_sfas + with_pfas + with_dcas);
|
||||
if (with_sfas)
|
||||
{
|
||||
build_mmTSA<L2R_Token<Conll_Sform> >(mtt,bn+".sfa-sform");
|
||||
build_mmTSA<L2R_Token<Conll_Lemma> >(mtt,bn+".sfa-lemma");
|
||||
build_mmTSA<L2R_Token<Conll_MinPos> >(mtt,bn+".sfa-minpos");
|
||||
}
|
||||
|
||||
if (with_pfas)
|
||||
{
|
||||
build_mmTSA<R2L_Token<Conll_Sform> >(mtt,bn+".pfa-sform");
|
||||
build_mmTSA<R2L_Token<Conll_Lemma> >(mtt,bn+".pfa-lemma");
|
||||
build_mmTSA<R2L_Token<Conll_MinPos> >(mtt,bn+".pfa-minpos");
|
||||
}
|
||||
|
||||
if (with_dcas)
|
||||
{
|
||||
build_mmTSA<ConllBottomUpToken<Conll_Sform> >(mtt,bn+".dca-sform");
|
||||
build_mmTSA<ConllBottomUpToken<Conll_Lemma> >(mtt,bn+".dca-lemma");
|
||||
build_mmTSA<ConllBottomUpToken<Conll_MinPos> >(mtt,bn+".dca-minpos");
|
||||
}
|
||||
while (c--) wait(NULL);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
init(argc,argv);
|
||||
numberize();
|
||||
if (SF.totalVocabSize() > SF.knownVocabSize() ||
|
||||
LM.totalVocabSize() > LM.knownVocabSize() ||
|
||||
PS.totalVocabSize() > PS.knownVocabSize() ||
|
||||
DT.totalVocabSize() > DT.knownVocabSize())
|
||||
{
|
||||
remap();
|
||||
save_vocabs();
|
||||
}
|
||||
if (is_conll) build_conll_tsas();
|
||||
else build_plaintext_tsas();
|
||||
if (!quiet) cerr << endl;
|
||||
rename(tmpFile.c_str(),mttFile.c_str());
|
||||
}
|
||||
|
||||
void
|
||||
interpret_args(int ac, char* av[])
|
||||
{
|
||||
po::variables_map vm;
|
||||
po::options_description o("Options");
|
||||
o.add_options()
|
||||
|
||||
("help,h", "print this message")
|
||||
|
||||
("quiet,q", po::bool_switch(&quiet),
|
||||
"don't print progress information")
|
||||
|
||||
("incremental,i", po::bool_switch(&incremental),
|
||||
"incremental mode; rewrites vocab files!")
|
||||
|
||||
("vocab-base,v", po::value<string>(&vocabBase),
|
||||
"base name of various vocabularies")
|
||||
|
||||
("output,o", po::value<string>(&baseName),
|
||||
"base file name of the resulting file(s)")
|
||||
|
||||
("sfa,s", po::value<int>(&with_sfas)->default_value(1),
|
||||
"also build suffix arrays")
|
||||
|
||||
("pfa,p", po::value<int>(&with_pfas)
|
||||
->default_value(0)->implicit_value(1),
|
||||
"also build prefix arrays")
|
||||
|
||||
("dca,d", po::value<int>(&with_dcas)
|
||||
->default_value(0)->implicit_value(1),
|
||||
"also build dependency chain arrays")
|
||||
|
||||
("conll,c", po::bool_switch(&is_conll),
|
||||
"corpus is in CoNLL format (default: plain text)")
|
||||
|
||||
("unk,u", po::value<string>(&UNK)->default_value("UNK"),
|
||||
"label for unknown tokens")
|
||||
|
||||
// ("map,m", po::value<string>(&vmap),
|
||||
// "map words to word classes for indexing")
|
||||
|
||||
;
|
||||
|
||||
po::options_description h("Hidden Options");
|
||||
h.add_options()
|
||||
;
|
||||
h.add(o);
|
||||
po::positional_options_description a;
|
||||
a.add("output",1);
|
||||
|
||||
po::store(po::command_line_parser(ac,av)
|
||||
.options(h)
|
||||
.positional(a)
|
||||
.run(),vm);
|
||||
po::notify(vm);
|
||||
if (vm.count("help") || !vm.count("output"))
|
||||
{
|
||||
cout << "\nusage:\n\t cat <corpus> | " << av[0]
|
||||
<< " [options] <output .mtt file>" << endl;
|
||||
cout << o << endl;
|
||||
exit(0);
|
||||
}
|
||||
mttFile = baseName + (is_conll ? ".mtt" : ".mct");
|
||||
tmpFile = mttFile + "_";
|
||||
}
|
156
moses/mm/mtt-dump.cc
Normal file
156
moses/mm/mtt-dump.cc
Normal file
@ -0,0 +1,156 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2008-2010 Ulrich Germann
|
||||
#include <boost/program_options.hpp>
|
||||
#include <iomanip>
|
||||
|
||||
#include "tpt_typedefs.h"
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_deptree.h"
|
||||
#include "ug_corpus_token.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
namespace po = boost::program_options;
|
||||
|
||||
string bname,mtt,mct;
|
||||
vector<string> range;
|
||||
|
||||
typedef L2R_Token<Conll_Sform> Token;
|
||||
|
||||
TokenIndex SF,LM,PS,DT;
|
||||
mmTtrack<Token> MTT;
|
||||
mmTtrack<SimpleWordId> MCT;
|
||||
bool sform;
|
||||
bool have_mtt, have_mct;
|
||||
bool with_sids;
|
||||
|
||||
void
|
||||
interpret_args(int ac, char* av[])
|
||||
{
|
||||
po::variables_map vm;
|
||||
po::options_description o("Options");
|
||||
o.add_options()
|
||||
("help,h", "print this message")
|
||||
("numbers,n", po::bool_switch(&with_sids), "print sentence ids as first token")
|
||||
("sform,s", po::bool_switch(&sform), "sform only")
|
||||
;
|
||||
|
||||
po::options_description h("Hidden Options");
|
||||
h.add_options()
|
||||
("bname", po::value<string>(&bname), "base name")
|
||||
("range", po::value<vector<string> >(&range), "range")
|
||||
;
|
||||
po::positional_options_description a;
|
||||
a.add("bname",1);
|
||||
a.add("range",-1);
|
||||
|
||||
po::store(po::command_line_parser(ac,av)
|
||||
.options(h.add(o))
|
||||
.positional(a)
|
||||
.run(),vm);
|
||||
po::notify(vm); // IMPORTANT
|
||||
if (vm.count("help") || bname.empty())
|
||||
{
|
||||
cout << "usage:\n\t"
|
||||
<< av[0] << " track name [<range>]\n"
|
||||
<< endl;
|
||||
cout << o << endl;
|
||||
exit(0);
|
||||
}
|
||||
mtt = bname+".mtt";
|
||||
mct = bname+".mct";
|
||||
}
|
||||
|
||||
void
|
||||
printRangeMTT(size_t start, size_t stop)
|
||||
{
|
||||
for (;start < stop; start++)
|
||||
{
|
||||
size_t i = 0;
|
||||
Token const* t = MTT.sntStart(start);
|
||||
Token const* e = MTT.sntEnd(start);
|
||||
if (with_sids) cout << start << " ";
|
||||
for (;t < e; ++t)
|
||||
{
|
||||
#if 0
|
||||
uchar const* x = reinterpret_cast<uchar const*>(t);
|
||||
cout << *reinterpret_cast<id_type const*>(x) << " ";
|
||||
cout << *reinterpret_cast<id_type const*>(x+4) << " ";
|
||||
cout << int(*(x+8)) << " ";
|
||||
cout << int(*(x+9)) << " ";
|
||||
cout << *reinterpret_cast<short const*>(x+10) << endl;
|
||||
#endif
|
||||
if (!sform)
|
||||
{
|
||||
cout << setw(2) << right << ++i << " ";
|
||||
cout << setw(30) << right << SF[t->sform] << " ";
|
||||
cout << setw(4) << right << PS[t->majpos] << " ";
|
||||
cout << setw(4) << right << PS[t->minpos] << " ";
|
||||
cout << setw(30) << left << LM[t->lemma] << " ";
|
||||
cout << i+t->parent << " ";
|
||||
cout << DT[t->dtype] << endl;
|
||||
}
|
||||
else cout << SF[t->id()] << " ";
|
||||
}
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
printRangeMCT(size_t start, size_t stop)
|
||||
{
|
||||
for (;start < stop; start++)
|
||||
{
|
||||
SimpleWordId const* t = MCT.sntStart(start);
|
||||
SimpleWordId const* e = MCT.sntEnd(start);
|
||||
if (with_sids) cout << start << " ";
|
||||
while (t < e) cout << SF[(t++)->id()] << " ";
|
||||
cout << endl;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char*argv[])
|
||||
{
|
||||
interpret_args(argc,argv);
|
||||
have_mtt = !access(mtt.c_str(),F_OK);
|
||||
have_mct = !have_mtt && !access(mct.c_str(),F_OK);
|
||||
if (!have_mtt && !have_mct)
|
||||
{
|
||||
cerr << "FATAL ERROR: neither " << mtt << " nor " << mct << " exit." << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (have_mtt)
|
||||
{
|
||||
SF.open(bname+".tdx.sfo"); SF.iniReverseIndex();
|
||||
LM.open(bname+".tdx.lem"); LM.iniReverseIndex();
|
||||
PS.open(bname+".tdx.pos"); PS.iniReverseIndex();
|
||||
DT.open(bname+".tdx.drl"); DT.iniReverseIndex();
|
||||
MTT.open(mtt);
|
||||
}
|
||||
else
|
||||
{
|
||||
sform = true;
|
||||
SF.open(bname+".tdx"); SF.iniReverseIndex();
|
||||
MCT.open(mct);
|
||||
}
|
||||
|
||||
if (!range.size())
|
||||
have_mtt ? printRangeMTT(0, MTT.size()) : printRangeMCT(0, MCT.size());
|
||||
else
|
||||
{
|
||||
for (size_t i = 0; i < range.size(); i++)
|
||||
{
|
||||
istringstream buf(range[i]);
|
||||
size_t first,last; uchar c;
|
||||
buf>>first;
|
||||
if (buf.peek() == '-') buf>>c>>last;
|
||||
else last = first;
|
||||
if (have_mtt && last < MTT.size())
|
||||
printRangeMTT(first,last+1);
|
||||
else if (last < MCT.size())
|
||||
printRangeMCT(first,last+1);
|
||||
}
|
||||
}
|
||||
}
|
77
moses/mm/mtt.count.cc
Normal file
77
moses/mm/mtt.count.cc
Normal file
@ -0,0 +1,77 @@
|
||||
// build a phrase table for the given input
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "ug_mm_tsa.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_corpus_token.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <iomanip>
|
||||
#include "ug_typedefs.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "moses/generic/sorting/VectorIndexSorter.h"
|
||||
#include "moses/generic/sampling/Sampling.h"
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
#include <algorithm>
|
||||
#include "moses/generic/program_options/ug_get_options.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
using namespace Moses;
|
||||
typedef L2R_Token<SimpleWordId> Token;
|
||||
typedef mmTSA<Token>::tree_iterator iter;
|
||||
typedef boost::unordered_map<pair<size_t,size_t>,size_t> phrase_counter_t;
|
||||
|
||||
#define CACHING_THRESHOLD 1000
|
||||
|
||||
mmTtrack<Token> T; // token tracks
|
||||
TokenIndex V; // vocabs
|
||||
mmTSA<Token> I; // suffix arrays
|
||||
|
||||
void interpret_args(int ac, char* av[]);
|
||||
string bname;
|
||||
bool echo;
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
interpret_args(argc,argv);
|
||||
|
||||
T.open(bname+".mct");
|
||||
V.open(bname+".tdx"); V.iniReverseIndex();
|
||||
I.open(bname+".sfa",&T);
|
||||
string line;
|
||||
while (getline(cin,line))
|
||||
{
|
||||
vector<id_type> phr;
|
||||
V.fillIdSeq(line,phr);
|
||||
TSA<Token>::tree_iterator m(&I);
|
||||
size_t i = 0;
|
||||
while (i < phr.size() && m.extend(phr[i])) ++i;
|
||||
if (echo) cout << line << ": ";
|
||||
if (i < phr.size()) cout << 0 << endl;
|
||||
else cout << m.rawCnt() << endl;
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
|
||||
void
|
||||
interpret_args(int ac, char* av[])
|
||||
{
|
||||
namespace po=boost::program_options;
|
||||
po::variables_map vm;
|
||||
po::options_description o("Options");
|
||||
po::options_description h("Hidden Options");
|
||||
po::positional_options_description a;
|
||||
|
||||
o.add_options()
|
||||
("help,h", "print this message")
|
||||
("echo,e", po::bool_switch(&echo), "repeat lookup phrases")
|
||||
;
|
||||
|
||||
h.add_options()
|
||||
("bname", po::value<string>(&bname), "base name")
|
||||
;
|
||||
a.add("bname",1);
|
||||
get_options(ac,av,h.add(o),a,vm);
|
||||
}
|
66
moses/mm/num_read_write.h
Normal file
66
moses/mm/num_read_write.h
Normal file
@ -0,0 +1,66 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2006,2007,2008 Ulrich Germann
|
||||
#ifndef __num_read_write_hh
|
||||
#define __num_read_write_hh
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
#include <endian.h>
|
||||
#include <byteswap.h>
|
||||
#include "tpt_typedefs.h"
|
||||
|
||||
namespace ugdiss {
|
||||
|
||||
template<typename uintNumber>
|
||||
void
|
||||
numwrite(std::ostream& out, uintNumber const& x)
|
||||
{
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
uintNumber y;
|
||||
switch (sizeof(uintNumber))
|
||||
{
|
||||
case 2: y = bswap_16(x); break;
|
||||
case 4: y = bswap_32(x); break;
|
||||
case 8: y = bswap_64(x); break;
|
||||
default: y = x;
|
||||
}
|
||||
out.write(reinterpret_cast<char*>(&y),sizeof(y));
|
||||
#else
|
||||
out.write(reinterpret_cast<char const*>(&x),sizeof(x));
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename uintNumber>
|
||||
void
|
||||
numread(std::istream& in, uintNumber& x)
|
||||
{
|
||||
in.read(reinterpret_cast<char*>(&x),sizeof(uintNumber));
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
switch (sizeof(uintNumber))
|
||||
{
|
||||
case 2: x = bswap_16(x); break;
|
||||
case 4: x = bswap_32(x); break;
|
||||
case 8: x = bswap_64(x); break;
|
||||
default: break;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template<typename uintNumber>
|
||||
char const*
|
||||
numread(char const* src, uintNumber& x)
|
||||
{
|
||||
// ATTENTION: THIS NEEDS TO BE VERIFIED FOR BIG-ENDIAN MACHINES!!!
|
||||
x = *reinterpret_cast<uintNumber const*>(src);
|
||||
#if __BYTE_ORDER == __BIG_ENDIAN
|
||||
switch (sizeof(uintNumber))
|
||||
{
|
||||
case 2: x = bswap_16(x); break;
|
||||
case 4: x = bswap_32(x); break;
|
||||
case 8: x = bswap_64(x); break;
|
||||
default: break;
|
||||
}
|
||||
#endif
|
||||
return src+sizeof(uintNumber);
|
||||
}
|
||||
} // end of namespace ugdiss
|
||||
#endif
|
166
moses/mm/obsolete/ug_bitext_base.h
Normal file
166
moses/mm/obsolete/ug_bitext_base.h
Normal file
@ -0,0 +1,166 @@
|
||||
#ifndef __ug_bitext_base_h
|
||||
#define __ug_bitext_base_h
|
||||
// Abstract word-aligned bitext class
|
||||
// Written by Ulrich Germann
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <iomanip>
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/thread.hpp>
|
||||
|
||||
#include "moses/generic/sorting/VectorIndexSorter.h"
|
||||
#include "moses/generic/sampling/Sampling.h"
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
|
||||
#include "ug_typedefs.h"
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "ug_mm_tsa.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_corpus_token.h"
|
||||
#include "tpt_pickler.h"
|
||||
|
||||
using namespace ugdiss;
|
||||
using namespace std;
|
||||
namespace Moses {
|
||||
|
||||
typedef L2R_Token<SimpleWordId> Token;
|
||||
typedef mmTSA<Token>::tree_iterator iter;
|
||||
|
||||
class bitext_base
|
||||
{
|
||||
public:
|
||||
typedef mmTSA<Token>::tree_iterator iter;
|
||||
class pstats; // one-sided phrase statistics
|
||||
class jstats; // phrase pair ("joint") statistics
|
||||
class agenda
|
||||
{
|
||||
boost::mutex lock;
|
||||
boost::condition_variable ready;
|
||||
class job;
|
||||
class worker;
|
||||
list<job> joblist;
|
||||
vector<sptr<boost::thread> > workers;
|
||||
bool shutdown;
|
||||
size_t doomed;
|
||||
public:
|
||||
bitext_base const& bitext;
|
||||
agenda(bitext_base const& bitext);
|
||||
~agenda();
|
||||
void add_workers(int n);
|
||||
sptr<pstats> add_job(mmbitext::iter const& phrase,
|
||||
size_t const max_samples);
|
||||
bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
|
||||
bool & fwd, sptr<bitext_base::pstats> & stats);
|
||||
};
|
||||
|
||||
// stores the list of unfinished jobs;
|
||||
// maintains a pool of workers and assigns the jobs to them
|
||||
|
||||
agenda* ag;
|
||||
mmTtrack<char> Tx; // word alignments
|
||||
mmTtrack<Token> T1,T2; // token tracks
|
||||
TokenIndex V1,V2; // vocabs
|
||||
mmTSA<Token> I1,I2; // suffix arrays
|
||||
|
||||
/// given the source phrase sid[start:stop]
|
||||
// find the possible start (s1 .. s2) and end (e1 .. e2)
|
||||
// points of the target phrase; if non-NULL, store word
|
||||
// alignments in *core_alignment. If /flip/, source phrase is
|
||||
// L2.
|
||||
bool
|
||||
find_trg_phr_bounds
|
||||
(size_t const sid, size_t const start, size_t const stop,
|
||||
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
||||
vector<uchar> * core_alignment, bool const flip) const;
|
||||
|
||||
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
|
||||
private:
|
||||
sptr<pstats>
|
||||
prep2(iter const& phrase);
|
||||
public:
|
||||
mmbitext();
|
||||
~mmbitext();
|
||||
|
||||
void open(string const base, string const L1, string const L2);
|
||||
|
||||
sptr<pstats> lookup(iter const& phrase);
|
||||
void prep(iter const& phrase);
|
||||
};
|
||||
|
||||
// "joint" (i.e., phrase pair) statistics
|
||||
class
|
||||
mmbitext::
|
||||
jstats
|
||||
{
|
||||
uint32_t my_rcnt; // unweighted count
|
||||
float my_wcnt; // weighted count
|
||||
vector<pair<size_t, vector<uchar> > > my_aln;
|
||||
boost::mutex lock;
|
||||
public:
|
||||
jstats();
|
||||
jstats(jstats const& other);
|
||||
uint32_t rcnt() const;
|
||||
float wcnt() const;
|
||||
vector<pair<size_t, vector<uchar> > > const & aln() const;
|
||||
void add(float w, vector<uchar> const& a);
|
||||
};
|
||||
|
||||
struct
|
||||
mmbitext::
|
||||
pstats
|
||||
{
|
||||
boost::mutex lock; // for parallel gathering of stats
|
||||
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
|
||||
|
||||
size_t raw_cnt; // (approximate) raw occurrence count
|
||||
size_t sample_cnt; // number of instances selected during sampling
|
||||
size_t good; // number of selected instances with valid word alignments
|
||||
size_t sum_pairs;
|
||||
// size_t snt_cnt;
|
||||
// size_t sample_snt;
|
||||
size_t in_progress; // keeps track of how many threads are currently working on this
|
||||
boost::unordered_map<uint64_t, jstats> trg;
|
||||
pstats();
|
||||
// vector<phrase> nbest;
|
||||
// void select_nbest(size_t const N=10);
|
||||
void release();
|
||||
void register_worker();
|
||||
void add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a);
|
||||
};
|
||||
|
||||
class
|
||||
mmbitext::
|
||||
agenda::
|
||||
worker
|
||||
{
|
||||
agenda& ag;
|
||||
public:
|
||||
worker(agenda& a);
|
||||
void operator()();
|
||||
|
||||
};
|
||||
|
||||
class
|
||||
mmbitext::
|
||||
agenda::
|
||||
job
|
||||
{
|
||||
public:
|
||||
char const* next;
|
||||
char const* stop;
|
||||
size_t max_samples;
|
||||
size_t ctr;
|
||||
size_t len;
|
||||
bool fwd;
|
||||
sptr<mmbitext::pstats> stats;
|
||||
bool step(uint64_t & sid, uint64_t & offset);
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
291
moses/mm/symal2mam.cc
Normal file
291
moses/mm/symal2mam.cc
Normal file
@ -0,0 +1,291 @@
|
||||
// -*- c++ -*-
|
||||
// program to convert GIZA-style alignments into memory-mapped format
|
||||
// (c) 2010 Ulrich Germann
|
||||
|
||||
// Reads from stdin a file with alternating lines: sentence lengths and symal output.
|
||||
// We need the sentence lenghts for sanity checks, because GIZA alignment might skip
|
||||
// sentences. If --skip, we skip such sentence pairs, otherwise, we leave the word
|
||||
// alignment matrix blank.
|
||||
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "ug_deptree.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "moses/generic/program_options/ug_get_options.h"
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
|
||||
#include "util/exception.hh"
|
||||
#include "util/check.hh"
|
||||
|
||||
// NOTE TO SELF:
|
||||
/* Program to filter out sentences that GIZA will skip or truncate,
|
||||
* i.e. sentences longer than 100 words or sentence pairs with a length
|
||||
*/
|
||||
|
||||
using namespace std;
|
||||
using namespace ugdiss;
|
||||
|
||||
ofstream t1out,t2out,mam;
|
||||
int len1=0,len2=0;
|
||||
size_t lineCtr=0,sid=0;
|
||||
bool conll=false;
|
||||
bool skip=false;
|
||||
bool debug=false;
|
||||
TokenIndex V1;
|
||||
|
||||
string mtt1name,mtt2name,o1name,o2name,mamname,cfgFile;
|
||||
string dataFormat,A3filename;
|
||||
void
|
||||
interpret_args(int ac, char* av[])
|
||||
{
|
||||
namespace po=boost::program_options;
|
||||
po::variables_map vm;
|
||||
po::options_description o("Options");
|
||||
po::options_description h("Hidden Options");
|
||||
po::positional_options_description a;
|
||||
|
||||
o.add_options()
|
||||
("help,h", "print this message")
|
||||
("cfg,f", po::value<string>(&cfgFile),"config file")
|
||||
("a3", po::value<string>(&A3filename), "name of A3 file (for sanity checks)")
|
||||
("o1", po::value<string>(&o1name), "name of output file for track 1")
|
||||
("o2", po::value<string>(&o2name), "name of output file for track 2")
|
||||
("skip", "skip sentence pairs without word alignment (requires --o1 and --o2)")
|
||||
("debug,d", "debug mode")
|
||||
("t1", po::value<string>(&mtt1name), "file name of L1 mapped token track")
|
||||
("t2", po::value<string>(&mtt2name), "file name of L2 mapped token track")
|
||||
("format,F", po::value<string>(&dataFormat)->default_value("plain"), "data format (plain or conll)")
|
||||
;
|
||||
|
||||
h.add_options()
|
||||
("mamname", po::value<string>(&mamname), "name of output file for mam")
|
||||
;
|
||||
a.add("mamname",1);
|
||||
|
||||
get_options(ac,av,h.add(o),a,vm,"cfg");
|
||||
|
||||
skip = vm.count("skip");
|
||||
debug = vm.count("debug");
|
||||
if (vm.count("help") || mamname.empty())
|
||||
{
|
||||
cout << "usage:\n"
|
||||
<< "\t\n"
|
||||
<< "\t ... | " << av[0]
|
||||
<< " <.mam file> \n" << endl;
|
||||
cout << o << endl;
|
||||
cout << "If an A3 file is given (as produced by (m)giza), symal2mam performs\n"
|
||||
<< "a sanity check to make sure that sentence lengths match." << endl;
|
||||
exit(0);
|
||||
}
|
||||
conll = dataFormat == "conll";
|
||||
if (!conll and dataFormat != "plain")
|
||||
{
|
||||
cerr << "format must be 'conll' or 'plain'" << endl;
|
||||
exit(1);
|
||||
}
|
||||
if (skip && (o1name.empty() || o2name.empty()))
|
||||
{
|
||||
cerr << "--skip requires --o1 and --o2" << endl;
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename track_t>
|
||||
void
|
||||
copySentence(track_t const& T, size_t sid, ostream& dest)
|
||||
{
|
||||
char const* a = reinterpret_cast<char const*>(T.sntStart(sid));
|
||||
char const* z = reinterpret_cast<char const*>(T.sntEnd(sid));
|
||||
dest.write(a,z-a);
|
||||
}
|
||||
|
||||
size_t
|
||||
procSymalLine(string const& line, ostream& out)
|
||||
{
|
||||
ushort a,b; char dash;
|
||||
istringstream buf(line);
|
||||
while (buf>>a>>dash>>b)
|
||||
{
|
||||
if (debug && ((len1 && a >= len1) || (len2 && b >= len2)))
|
||||
{
|
||||
cerr << a << "-" << b << " " << len1 << "/" << len2 << endl;
|
||||
}
|
||||
assert(len1 == 0 || a<len1);
|
||||
assert(len2 == 0 || b<len2);
|
||||
binwrite(out,a);
|
||||
binwrite(out,b);
|
||||
}
|
||||
return out.tellp();
|
||||
}
|
||||
|
||||
void finiMAM(ofstream& out, vector<id_type>& idx, id_type numTok)
|
||||
{
|
||||
id_type offset = sizeof(filepos_type)+2*sizeof(id_type);
|
||||
filepos_type idxStart = out.tellp();
|
||||
for (vector<id_type>::iterator i = idx.begin(); i != idx.end(); ++i)
|
||||
numwrite(out,*i-offset);
|
||||
out.seekp(0);
|
||||
numwrite(out,idxStart);
|
||||
numwrite(out,id_type(idx.size()-1));
|
||||
numwrite(out,numTok);
|
||||
out.close();
|
||||
}
|
||||
|
||||
void
|
||||
finalize(ofstream& out, vector<id_type> const& idx, id_type tokenCount)
|
||||
{
|
||||
id_type idxSize = idx.size();
|
||||
filepos_type idxStart = out.tellp();
|
||||
for (size_t i = 0; i < idx.size(); ++i)
|
||||
numwrite(out,idx[i]);
|
||||
out.seekp(0);
|
||||
numwrite(out,idxStart);
|
||||
numwrite(out,idxSize-1);
|
||||
numwrite(out,tokenCount);
|
||||
out.close();
|
||||
}
|
||||
|
||||
bool getCheckValues(istream& in, int& check1, int& check2)
|
||||
{
|
||||
if (A3filename.empty()) return true;
|
||||
string line; string w;
|
||||
getline(in,line);
|
||||
size_t p1 = line.find("source length ") + 14;
|
||||
if (p1 >= line.size()) return false;
|
||||
size_t p2 = line.find("target length ",p1);
|
||||
if (p2 >= line.size()) return false;
|
||||
// cout << line << endl;
|
||||
// cout << line.substr(p1,p2-p1) << endl;
|
||||
check1 = atoi(line.substr(p1,p2-p1).c_str());
|
||||
p1 = p2+14;
|
||||
p2 = line.find("alignment ",p1);
|
||||
if (p2 >= line.size()) return false;
|
||||
check2 = atoi(line.substr(p1,p2-p1).c_str());
|
||||
getline(in,line);
|
||||
getline(in,line);
|
||||
return true;
|
||||
}
|
||||
|
||||
void
|
||||
go()
|
||||
{
|
||||
size_t ctr=0;
|
||||
vector<id_type> idxm;
|
||||
idxm.reserve(10000000);
|
||||
idxm.push_back(mam.tellp());
|
||||
string line;
|
||||
while(getline(cin,line))
|
||||
{
|
||||
idxm.push_back(procSymalLine(line,mam));
|
||||
if (debug && ++ctr%100000==0)
|
||||
cerr << ctr/1000 << "K lines processed" << endl;
|
||||
}
|
||||
finiMAM(mam,idxm,0);
|
||||
cout << idxm.size() << endl;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
go(string t1name, string t2name, string A3filename)
|
||||
{
|
||||
typedef mmTtrack<TKN> track_t;
|
||||
track_t T1(t1name),T2(t2name);
|
||||
filtering_istream A3file; open_input_stream(A3filename,A3file);
|
||||
|
||||
string line; int check1=-1,check2=-1;
|
||||
vector<id_type> idx1(1,0),idx2(1,0),idxm(1,mam.tellp());
|
||||
size_t tokenCount1=0,tokenCount2=0;
|
||||
size_t skipCtr=0,lineCtr=0;
|
||||
if (!getCheckValues(A3file,check1,check2))
|
||||
UTIL_THROW(util::Exception, "Mismatch in input files!");
|
||||
|
||||
for (sid = 0; sid < T1.size(); ++sid)
|
||||
{
|
||||
len1 = T1.sntLen(sid);
|
||||
len2 = T2.sntLen(sid);
|
||||
if (debug)
|
||||
cerr << "[" << lineCtr << "] "
|
||||
<< len1 << " (" << check1 << ") / "
|
||||
<< len2 << " (" << check2 << ")" << endl;
|
||||
if ((check1 >=0 && check1!=len1) ||
|
||||
(check2 >=0 && check2!=len2))
|
||||
{
|
||||
if (skip)
|
||||
{
|
||||
cerr << "[" << ++skipCtr << "] skipping "
|
||||
<< check1 << "/" << check2 << " vs. "
|
||||
<< len1 << "/" << len2
|
||||
<< " at line " << lineCtr << endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
idxm.push_back(mam.tellp());
|
||||
}
|
||||
if (len1 > 100 || len2 > 100)
|
||||
{
|
||||
getline(cin,line);
|
||||
getCheckValues(A3file,check1,check2);
|
||||
lineCtr++;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
if (skip)
|
||||
{
|
||||
idx1.push_back(tokenCount1 += len1);
|
||||
copySentence(T1,sid,t1out);
|
||||
idx2.push_back(tokenCount2 += len2);
|
||||
copySentence(T2,sid,t2out);
|
||||
}
|
||||
|
||||
if (!getline(cin,line))
|
||||
UTIL_THROW(util::Exception, "Too few lines in symal input!");
|
||||
|
||||
lineCtr++;
|
||||
idxm.push_back(procSymalLine(line,mam));
|
||||
if (debug) cerr << "[" << lineCtr << "] "
|
||||
<< check1 << " (" << len1 <<") "
|
||||
<< check2 << " (" << len2 <<") "
|
||||
<< line << endl;
|
||||
getCheckValues(A3file,check1,check2);
|
||||
}
|
||||
if (skip)
|
||||
{
|
||||
finalize(t1out,idx1,tokenCount1);
|
||||
finalize(t2out,idx2,tokenCount2);
|
||||
}
|
||||
finiMAM(mam,idxm,0);
|
||||
cout << idxm.size() << endl;
|
||||
}
|
||||
|
||||
void
|
||||
initialize(ofstream& out, string const& fname)
|
||||
{
|
||||
out.open(fname.c_str());
|
||||
numwrite(out,filepos_type(0)); // place holder for index start
|
||||
numwrite(out,id_type(0)); // place holder for index size
|
||||
numwrite(out,id_type(0)); // place holder for token count
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
interpret_args(argc,argv);
|
||||
if (skip)
|
||||
{
|
||||
initialize(t1out,o1name);
|
||||
initialize(t2out,o2name);
|
||||
}
|
||||
initialize(mam,mamname);
|
||||
if (A3filename.size() == 0)
|
||||
go();
|
||||
else if (conll)
|
||||
go<Conll_Record>(mtt1name,mtt2name,A3filename);
|
||||
else
|
||||
go<id_type>(mtt1name,mtt2name,A3filename);
|
||||
}
|
405
moses/mm/tpt_pickler.cc
Normal file
405
moses/mm/tpt_pickler.cc
Normal file
@ -0,0 +1,405 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2006,2007,2008 Ulrich Germann
|
||||
|
||||
#include "tpt_pickler.h"
|
||||
#include <sys/stat.h>
|
||||
#include <cassert>
|
||||
|
||||
#ifdef CYGWIN
|
||||
#define stat64 stat
|
||||
#endif
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
uint64_t
|
||||
getFileSize(const std::string& fname)
|
||||
{
|
||||
struct stat64 buf;
|
||||
stat64(fname.c_str(),&buf);
|
||||
return buf.st_size;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void
|
||||
binwrite_unsigned_integer(std::ostream& out, T data)
|
||||
{
|
||||
char c;
|
||||
while (data >= 128)
|
||||
{
|
||||
out.put(data%128);
|
||||
data = data >> 7;
|
||||
}
|
||||
c = data;
|
||||
out.put(c|char(-128)); // set the 'stop' bit
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
binread_unsigned_integer(std::istream& in, T& data)
|
||||
{
|
||||
char c, mask=127;
|
||||
in.clear();
|
||||
in.get(c);
|
||||
data = c&mask;
|
||||
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 7;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 14;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 21;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 28;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 35;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 42;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 49;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 56;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += T(c&mask) << 63;
|
||||
}
|
||||
|
||||
void
|
||||
binwrite(std::ostream& out, unsigned char data)
|
||||
{
|
||||
binwrite_unsigned_integer(out, data);
|
||||
}
|
||||
|
||||
void
|
||||
binwrite(std::ostream& out, unsigned short data)
|
||||
{
|
||||
binwrite_unsigned_integer(out, data);
|
||||
}
|
||||
|
||||
void
|
||||
binwrite(std::ostream& out, unsigned long data)
|
||||
{
|
||||
binwrite_unsigned_integer(out, data);
|
||||
}
|
||||
|
||||
void
|
||||
binwrite(std::ostream& out, unsigned long long data)
|
||||
{
|
||||
binwrite_unsigned_integer(out, data);
|
||||
}
|
||||
|
||||
#if __WORDSIZE == 64
|
||||
void
|
||||
binwrite(std::ostream& out, unsigned int data)
|
||||
{
|
||||
binwrite_unsigned_integer(out, data);
|
||||
}
|
||||
#else
|
||||
void
|
||||
binwrite(std::ostream& out, size_t data)
|
||||
{
|
||||
binwrite_unsigned_integer(out, data);
|
||||
}
|
||||
#endif
|
||||
|
||||
void
|
||||
binread(std::istream& in, unsigned short& data)
|
||||
{
|
||||
assert(sizeof(data)==2);
|
||||
char c, mask=127;
|
||||
in.clear();
|
||||
in.get(c);
|
||||
data = c&mask;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += uint16_t(c&mask) << 7;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += uint16_t(c&mask) << 14;
|
||||
}
|
||||
|
||||
void
|
||||
binread(std::istream& in, unsigned int& data)
|
||||
{
|
||||
assert(sizeof(data) == 4);
|
||||
char c, mask=127;
|
||||
in.clear();
|
||||
in.get(c);
|
||||
data = c&mask;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += uint32_t(c&mask) << 7;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += uint32_t(c&mask) << 14;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += uint32_t(c&mask) << 21;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += uint32_t(c&mask) << 28;
|
||||
}
|
||||
|
||||
void
|
||||
binread(std::istream& in, unsigned long& data)
|
||||
{
|
||||
#if __WORDSIZE == 32
|
||||
assert(sizeof(unsigned long)==4);
|
||||
#else
|
||||
assert(sizeof(unsigned long)==8);
|
||||
#endif
|
||||
char c, mask=127;
|
||||
in.get(c);
|
||||
data = c&mask;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 7;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 14;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 21;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 28;
|
||||
#if __WORDSIZE == 64
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 35;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 42;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
|
||||
data += static_cast<unsigned long long>(c&mask) << 49;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
|
||||
data += static_cast<unsigned long long>(c&mask) << 56;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
|
||||
data += static_cast<unsigned long long>(c&mask) << 63;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
binread(std::istream& in, unsigned long long& data)
|
||||
{
|
||||
assert(sizeof(unsigned long long)==8);
|
||||
char c, mask=127;
|
||||
in.get(c);
|
||||
data = c&mask;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 7;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 14;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 21;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 28;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 35;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 42;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 49;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 56;
|
||||
if (c < 0) return;
|
||||
in.get(c);
|
||||
data += static_cast<unsigned long long>(c&mask) << 63;
|
||||
}
|
||||
|
||||
// writing and reading strings ...
|
||||
void
|
||||
binwrite(std::ostream& out, std::string const& s)
|
||||
{
|
||||
size_t len = s.size();
|
||||
ugdiss::binwrite(out,len);
|
||||
out.write(s.c_str(),len);
|
||||
}
|
||||
|
||||
void
|
||||
binread(std::istream& in, std::string& s)
|
||||
{
|
||||
size_t len;
|
||||
ugdiss::binread(in,len);
|
||||
if (!in) return;
|
||||
char buf[len+1];
|
||||
in.read(buf,len);
|
||||
buf[len] = 0;
|
||||
s = buf;
|
||||
}
|
||||
|
||||
void
|
||||
binwrite(std::ostream& out, float x)
|
||||
{
|
||||
// IMPORTANT: this is not robust against the big/little endian
|
||||
// issue.
|
||||
out.write(reinterpret_cast<char*>(&x),sizeof(float));
|
||||
}
|
||||
|
||||
void
|
||||
binread(std::istream& in, float& x)
|
||||
{
|
||||
// IMPORTANT: this is not robust against the big/little endian
|
||||
// issue.
|
||||
in.read(reinterpret_cast<char*>(&x),sizeof(x));
|
||||
}
|
||||
|
||||
|
||||
template<>
|
||||
char const*
|
||||
binread<uint16_t>(char const* p, uint16_t& buf)
|
||||
{
|
||||
static char mask = 127;
|
||||
buf = (*p)&mask;
|
||||
if (*p++ < 0) return p;
|
||||
buf += uint16_t((*p)&mask)<<7;
|
||||
if (*p++ < 0) return p;
|
||||
buf += uint16_t((*p)&mask)<<14;
|
||||
#ifndef NDEBUG
|
||||
assert(*p++ < 0);
|
||||
#else
|
||||
++p;
|
||||
#endif
|
||||
return p;
|
||||
}
|
||||
|
||||
template<>
|
||||
char const*
|
||||
binread<uint32_t>(char const* p, uint32_t& buf)
|
||||
{
|
||||
static char mask = 127;
|
||||
|
||||
if (*p < 0)
|
||||
{
|
||||
buf = (*p)&mask;
|
||||
return ++p;
|
||||
}
|
||||
buf = *p;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += uint32_t((*p)&mask)<<7;
|
||||
return ++p;
|
||||
}
|
||||
buf += uint32_t(*p)<<7;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += uint32_t((*p)&mask)<<14;
|
||||
return ++p;
|
||||
}
|
||||
buf += uint32_t(*p)<<14;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += uint32_t((*p)&mask)<<21;
|
||||
return ++p;
|
||||
}
|
||||
buf += uint32_t(*p)<<21;
|
||||
#ifndef NDEBUG
|
||||
assert(*(++p) < 0);
|
||||
#else
|
||||
++p;
|
||||
#endif
|
||||
buf += uint32_t((*p)&mask)<<28;
|
||||
return ++p;
|
||||
}
|
||||
|
||||
template<>
|
||||
char const*
|
||||
binread<filepos_type>(char const* p, filepos_type& buf)
|
||||
{
|
||||
static char mask = 127;
|
||||
|
||||
if (*p < 0)
|
||||
{
|
||||
buf = (*p)&mask;
|
||||
return ++p;
|
||||
}
|
||||
buf = *p;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<7;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<7;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<14;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<14;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<21;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<21;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<28;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<28;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<35;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<35;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<42;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<42;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<49;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<49;
|
||||
if (*(++p) < 0)
|
||||
{
|
||||
buf += filepos_type((*p)&mask)<<56;
|
||||
return ++p;
|
||||
}
|
||||
buf += filepos_type(*p)<<56;
|
||||
#ifndef NDEBUG
|
||||
assert(*(++p) < 0);
|
||||
#else
|
||||
++p;
|
||||
#endif
|
||||
buf += filepos_type((*p)&mask)<<63;
|
||||
return ++p;
|
||||
}
|
||||
|
||||
template<>
|
||||
char const*
|
||||
binread<float>(char const* p, float& buf)
|
||||
{
|
||||
buf = *reinterpret_cast<float const*>(p);
|
||||
return p+sizeof(float);
|
||||
}
|
||||
|
||||
} // end namespace ugdiss
|
207
moses/mm/tpt_pickler.h
Normal file
207
moses/mm/tpt_pickler.h
Normal file
@ -0,0 +1,207 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2006,2007,2008 Ulrich Germann
|
||||
#ifndef __Pickler
|
||||
#define __Pickler
|
||||
|
||||
#include<iostream>
|
||||
#include<string>
|
||||
#include<vector>
|
||||
#include<map>
|
||||
#include "tpt_typedefs.h"
|
||||
#include "num_read_write.h"
|
||||
#include <cassert>
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
/// Utility method placed here for lack of a better place
|
||||
/// @return the size of file fname.
|
||||
uint64_t getFileSize(const std::string& fname);
|
||||
|
||||
/**
|
||||
* The following functions write and read data in a compact binary
|
||||
* representation. Write and read errors can be checked directly
|
||||
* on the ostream object after the function call, so no return value is
|
||||
* necessary.*/
|
||||
void binwrite(std::ostream& out, char data);
|
||||
void binwrite(std::ostream& out, unsigned char data);
|
||||
void binwrite(std::ostream& out, unsigned short data);
|
||||
void binwrite(std::ostream& out, unsigned int data);
|
||||
void binwrite(std::ostream& out, unsigned long data);
|
||||
void binwrite(std::ostream& out, size_t data);
|
||||
void binwrite(std::ostream& out, unsigned long long data);
|
||||
void binwrite(std::ostream& out, std::string const& data);
|
||||
void binwrite(std::ostream& out, float data);
|
||||
|
||||
void binread(std::istream& in, char &data);
|
||||
void binread(std::istream& in, unsigned char &data);
|
||||
void binread(std::istream& in, unsigned short &data);
|
||||
void binread(std::istream& in, unsigned int &data);
|
||||
void binread(std::istream& in, unsigned long &data);
|
||||
void binread(std::istream& in, size_t &data);
|
||||
void binread(std::istream& in, unsigned long long &data);
|
||||
void binread(std::istream& in, std::string &data);
|
||||
void binread(std::istream& in, float &data);
|
||||
|
||||
std::ostream& write(std::ostream& out, char x);
|
||||
std::ostream& write(std::ostream& out, unsigned char x);
|
||||
std::ostream& write(std::ostream& out, short x);
|
||||
std::ostream& write(std::ostream& out, unsigned short x);
|
||||
std::ostream& write(std::ostream& out, long x);
|
||||
std::ostream& write(std::ostream& out, size_t x);
|
||||
std::ostream& write(std::ostream& out, float x);
|
||||
|
||||
std::istream& read(std::istream& in, char& x);
|
||||
std::istream& read(std::istream& in, unsigned char& x);
|
||||
std::istream& read(std::istream& in, short& x);
|
||||
std::istream& read(std::istream& in, unsigned short& x);
|
||||
std::istream& read(std::istream& in, long& x);
|
||||
std::istream& read(std::istream& in, size_t& x);
|
||||
std::istream& read(std::istream& in, float& x);
|
||||
|
||||
template<typename WHATEVER>
|
||||
char const*
|
||||
binread(char const* p, WHATEVER* buf);
|
||||
|
||||
template<typename numtype>
|
||||
char const*
|
||||
binread(char const* p, numtype& buf);
|
||||
|
||||
template<typename K, typename V>
|
||||
void binwrite(std::ostream& out, std::pair<K,V> const& data);
|
||||
|
||||
template<typename K, typename V>
|
||||
void binread(std::istream& in, std::pair<K,V>& data);
|
||||
|
||||
template<typename K, typename V>
|
||||
char const* binread(char const* p, std::pair<K,V>& data);
|
||||
|
||||
template<typename V>
|
||||
char const* binread(char const* p, std::vector<V>& v);
|
||||
|
||||
|
||||
template<typename K, typename V>
|
||||
char const* binread(char const* p, std::pair<K,V>& data)
|
||||
{
|
||||
#ifdef VERIFY_TIGHT_PACKING
|
||||
assert(p);
|
||||
#endif
|
||||
p = binread(p,data.first);
|
||||
p = binread(p,data.second);
|
||||
return p;
|
||||
}
|
||||
|
||||
template<typename V>
|
||||
char const* binread(char const* p, std::vector<V>& v)
|
||||
{
|
||||
size_t vsize;
|
||||
#ifdef VERIFY_TIGHT_PACKING
|
||||
assert(p);
|
||||
#endif
|
||||
p = binread(p,vsize);
|
||||
v.resize(vsize);
|
||||
for (size_t i = 0; i < vsize; ++i)
|
||||
p = binread(p,v[i]);
|
||||
return p;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T read(std::istream& in)
|
||||
{
|
||||
T ret;
|
||||
read(in,ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T binread(std::istream& in)
|
||||
{
|
||||
T ret;
|
||||
binread(in,ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
binwrite(std::ostream& out, std::vector<T> const& data)
|
||||
{
|
||||
binwrite(out,data.size());
|
||||
for (size_t i = 0; i < data.size(); i++)
|
||||
{ binwrite(out,data[i]); }
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void
|
||||
binread(std::istream& in, std::vector<T>& data)
|
||||
{
|
||||
size_t s;
|
||||
binread(in,s);
|
||||
data.resize(s);
|
||||
for (size_t i = 0; i < s; i++)
|
||||
{ binread(in,data[i]); }
|
||||
}
|
||||
|
||||
template<typename K, typename V>
|
||||
void
|
||||
binread(std::istream& in, std::map<K,V>& data)
|
||||
{
|
||||
size_t s; K k; V v;
|
||||
binread(in,s);
|
||||
data.clear();
|
||||
// I have no idea why this is necessary, but it is, even when
|
||||
// /data/ is supposed to be empty
|
||||
for (size_t i = 0; i < s; i++)
|
||||
{
|
||||
binread(in,k);
|
||||
binread(in,v);
|
||||
data[k] = v;
|
||||
// cerr << "* " << i << " " << k << " " << v << endl;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename K, typename V>
|
||||
void
|
||||
binwrite(std::ostream& out, std::map<K,V> const& data)
|
||||
{
|
||||
binwrite(out,data.size());
|
||||
for (typename std::map<K,V>::const_iterator m = data.begin();
|
||||
m != data.end(); m++)
|
||||
{
|
||||
binwrite(out,m->first);
|
||||
binwrite(out,m->second);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename K, typename V>
|
||||
void
|
||||
binwrite(std::ostream& out, std::pair<K,V> const& data)
|
||||
{
|
||||
binwrite(out,data.first);
|
||||
binwrite(out,data.second);
|
||||
}
|
||||
|
||||
template<typename K, typename V>
|
||||
void
|
||||
binread(std::istream& in, std::pair<K,V>& data)
|
||||
{
|
||||
binread(in,data.first);
|
||||
binread(in,data.second);
|
||||
}
|
||||
|
||||
|
||||
template<typename WHATEVER>
|
||||
char const*
|
||||
binread(char const* p, WHATEVER* buf)
|
||||
{
|
||||
#ifdef VERIFY_TIGHT_PACKING
|
||||
assert(p);
|
||||
#endif
|
||||
return binread(p,*buf);
|
||||
}
|
||||
|
||||
template<typename numtype>
|
||||
char const*
|
||||
binread(char const* p, numtype& buf);
|
||||
|
||||
} // end namespace ugdiss
|
||||
#endif
|
633
moses/mm/tpt_tightindex.cc
Normal file
633
moses/mm/tpt_tightindex.cc
Normal file
@ -0,0 +1,633 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007,2008 Ulrich Germann
|
||||
|
||||
/* Functions for writing indices tightly (use only the bytes you need).
|
||||
* The first bit indicates whether a byte belongs to a key or a value.
|
||||
* The remaining 7 bits are part of the respective integer value.
|
||||
* (c) 2007 Ulrich Germann
|
||||
*/
|
||||
//
|
||||
// ugTightIndex.cc
|
||||
//
|
||||
// Made by Ulrich Germann
|
||||
// Login <germann@germann-laptop>
|
||||
//
|
||||
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
|
||||
// Started on Tue Jul 17 15:09:33 2007 Ulrich Germann
|
||||
//
|
||||
|
||||
#include <iostream>
|
||||
#include <assert.h>
|
||||
#include "tpt_tightindex.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
// std::string bitpattern(unsigned int s)
|
||||
// {
|
||||
// std::ostringstream out;
|
||||
// size_t bit=1;
|
||||
// for (size_t i = 31; i > 0; i--)
|
||||
// out << (s&(bit<<i) ? 1 : 0);
|
||||
// out << ((s&1) ? 1 : 0) ;
|
||||
// return out.str();
|
||||
// }
|
||||
|
||||
// std::string bitpattern(char c)
|
||||
// {
|
||||
// std::ostringstream out;
|
||||
// out << ((c&-128) ? 1 : 0);
|
||||
// // out << ".";
|
||||
// out << ((c&64) ? 1 : 0);
|
||||
// out << ((c&32) ? 1 : 0);
|
||||
// out << ((c&16) ? 1 : 0);
|
||||
// out << ((c&8) ? 1 : 0);
|
||||
// out << ((c&4) ? 1 : 0);
|
||||
// out << ((c&2) ? 1 : 0);
|
||||
// out << ((c&1) ? 1 : 0);
|
||||
// return out.str();
|
||||
// }
|
||||
|
||||
// std::string bitpattern(unsigned char c)
|
||||
// {
|
||||
// std::ostringstream out;
|
||||
// out << ((c&128) ? 1 : 0);
|
||||
// out << ((c&64) ? 1 : 0);
|
||||
// out << ((c&32) ? 1 : 0);
|
||||
// out << ((c&16) ? 1 : 0);
|
||||
// out << ((c&8) ? 1 : 0);
|
||||
// out << ((c&4) ? 1 : 0);
|
||||
// out << ((c&2) ? 1 : 0);
|
||||
// out << ((c&1) ? 1 : 0);
|
||||
// return out.str();
|
||||
// }
|
||||
|
||||
// #define LOG_WRITE_ACTIVITY
|
||||
|
||||
// write a key or value into a tight index
|
||||
// flag indicates wheter it's a key or a value
|
||||
void tightwrite(std::ostream& out, uint64_t data, bool flag)
|
||||
{
|
||||
// assert(sizeof(size_t)==4);
|
||||
#ifdef LOG_WRITE_ACTIVITY
|
||||
size_t bytes_written=1;
|
||||
std::cerr << "starting at file position " << out.tellp()
|
||||
<< ": tightwrite " << data;
|
||||
#endif
|
||||
if (flag)
|
||||
{
|
||||
#ifdef LOG_WRITE_ACTIVITY
|
||||
std::cerr << " with flag 1 ";
|
||||
#endif
|
||||
while (data >= 128)
|
||||
{
|
||||
char c = char(data%128)|char(-128);
|
||||
out.put(c);
|
||||
data >>= 7;
|
||||
#ifdef LOG_WRITE_ACTIVITY
|
||||
bytes_written++;
|
||||
#endif
|
||||
}
|
||||
char c = char(data%128)|char(-128);
|
||||
out.put(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef LOG_WRITE_ACTIVITY
|
||||
std::cerr << " with flag 0 ";
|
||||
#endif
|
||||
while (data >= 128)
|
||||
{
|
||||
char c = data&127;
|
||||
out.put(c);
|
||||
data >>= 7;
|
||||
#ifdef LOG_WRITE_ACTIVITY
|
||||
bytes_written++;
|
||||
#endif
|
||||
}
|
||||
char c = (data&127);
|
||||
out.put(c);
|
||||
}
|
||||
#ifdef LOG_WRITE_ACTIVITY
|
||||
std::cerr << " in " << bytes_written << " bytes" << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
// For the code below: does it make a difference if I hard-code the
|
||||
// unraveled loop or does code optimization by the compiler take care
|
||||
// of that?
|
||||
|
||||
#define DEBUG_TIGHTREAD 0
|
||||
|
||||
// read a key value from a tight index; filepos_type must be at least as
|
||||
// large as count_type
|
||||
filepos_type
|
||||
tightread(std::istream& in, std::ios::pos_type stop)
|
||||
{
|
||||
// debug=true;
|
||||
// assert(sizeof(size_t) == 4);
|
||||
assert(in.rdbuf()->in_avail() > 0);
|
||||
filepos_type data = 0;
|
||||
short int bitshift = 7;
|
||||
int pos = in.tellg();
|
||||
#if DEBUG_TIGHTREAD
|
||||
if (debug)
|
||||
cerr << bitpattern(uint(in.peek())) << " " << in.peek()
|
||||
<< " pos=" << in.tellg() << "\n";
|
||||
#endif
|
||||
int buf = in.get();
|
||||
if (stop == std::ios::pos_type(0))
|
||||
stop = size_t(in.tellg())+in.rdbuf()->in_avail();
|
||||
else
|
||||
stop = std::min(size_t(stop),size_t(in.tellg())+in.rdbuf()->in_avail());
|
||||
if (buf < 0)
|
||||
std::cerr << "number read: " << buf << " " << pos << " "
|
||||
<< in.tellg() << std::endl;
|
||||
assert (buf>=0);
|
||||
|
||||
if (buf >= 128) // continuation bit is 1
|
||||
{
|
||||
data = buf-128; // unset the bit
|
||||
while (in.tellg() < stop && in.peek() >= 128)
|
||||
{
|
||||
#if DEBUG_TIGHTREAD
|
||||
if (debug)
|
||||
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
|
||||
#endif
|
||||
// cerr << bitpattern(size_t(in.peek())) << std::endl;
|
||||
data += size_t(in.get()-128)<<bitshift;
|
||||
bitshift += 7;
|
||||
#if DEBUG_TIGHTREAD
|
||||
if (debug)
|
||||
cerr << " " << data << " pos=" << in.tellg() << std::endl;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
data = buf;
|
||||
while (in.tellg() < stop && in.peek() < 128)
|
||||
{
|
||||
// cerr << bitpattern(size_t(in.peek())) << std::endl;
|
||||
#if DEBUG_TIGHTREAD
|
||||
if (debug)
|
||||
cerr << bitpattern(uint(in.peek())) << " " << in.peek();
|
||||
|
||||
#endif
|
||||
data += size_t(in.get())<<bitshift;
|
||||
bitshift += 7;
|
||||
#if DEBUG_TIGHTREAD
|
||||
if (debug)
|
||||
cerr << " " << data << " pos=" << in.tellg() << "\n";
|
||||
#endif
|
||||
}
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
#define DEBUG_TIGHTFIND 0
|
||||
#if DEBUG_TIGHTFIND
|
||||
bool debug=true;
|
||||
#endif
|
||||
bool
|
||||
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop)
|
||||
{
|
||||
in.seekg((start+stop)/2);
|
||||
// Jump approximately to the middle. Since we might land in the
|
||||
// middle of a number, we need to find the start of the next
|
||||
// [index key/file offset] pair first. Bytes belonging to an index
|
||||
// key have the leftmost bit set to 0, bytes belonging to a file
|
||||
// offset have it set to 1
|
||||
|
||||
// if we landed in the middle of an index key, skip to the end of it
|
||||
while (static_cast<filepos_type>(in.tellg()) < stop && in.get() < 128)
|
||||
{
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug)
|
||||
{
|
||||
in.unget();
|
||||
char c = in.get();
|
||||
std::cerr << in.tellg() << " skipped key byte " << c << std::endl;
|
||||
}
|
||||
#endif
|
||||
if (in.eof()) return false;
|
||||
}
|
||||
// Also skip the associated file offset:
|
||||
while (static_cast<filepos_type>(in.tellg()) < stop && in.peek() >= 128)
|
||||
{
|
||||
#if DEBUG_TIGHTFIND
|
||||
int r = in.get();
|
||||
if (debug)
|
||||
std::cerr << in.tellg() << " skipped value byte " << r
|
||||
<< " next is " << in.peek()
|
||||
<< std::endl;
|
||||
#else
|
||||
in.get();
|
||||
#endif
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
char const*
|
||||
tightfind_midpoint(char const* const start,
|
||||
char const* const stop)
|
||||
{
|
||||
char const* mp = start + (stop - start)/2;
|
||||
while (*mp < 0 && mp > start) mp--;
|
||||
while (*mp >= 0 && mp > start) mp--;
|
||||
return (*mp < 0) ? ++mp : mp;
|
||||
}
|
||||
|
||||
bool
|
||||
linear_search(std::istream& in, filepos_type start, filepos_type stop,
|
||||
id_type key, unsigned char& flags)
|
||||
{ // performs a linear search in the range
|
||||
in.seekg(start);
|
||||
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug) std::cerr << in.tellg() << " ";
|
||||
#endif
|
||||
|
||||
// ATTENTION! The bitshift operations below are important:
|
||||
// We use some of the bits in the key value to store additional
|
||||
// information about what and where node iformation is stored.
|
||||
|
||||
id_type foo;
|
||||
for(foo = tightread(in,stop);
|
||||
(foo>>FLAGBITS) < key;
|
||||
foo = tightread(in,stop))
|
||||
{
|
||||
// skip the value associated with key /foo/
|
||||
while (static_cast<filepos_type>(in.tellg()) < stop
|
||||
&& in.peek() >= 128) in.get();
|
||||
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug)
|
||||
std::cerr << (foo>>FLAGBITS) << " [" << key << "] "
|
||||
<< in.tellg() << std::endl;
|
||||
#endif
|
||||
|
||||
if (in.tellg() == std::ios::pos_type(stop))
|
||||
return false; // not found
|
||||
}
|
||||
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug && (foo>>FLAGBITS)==key)
|
||||
std::cerr << "found entry for " << key << std::endl;
|
||||
std::cerr << "current file position is " << in.tellg()
|
||||
<< " (value read: " << key << std::endl;
|
||||
#endif
|
||||
|
||||
assert(static_cast<filepos_type>(in.tellg()) < stop);
|
||||
if ((foo>>FLAGBITS)==key)
|
||||
{
|
||||
flags = (foo%256);
|
||||
flags &= FLAGMASK;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
bool
|
||||
tightfind(std::istream& in, filepos_type start, filepos_type stop,
|
||||
id_type key, unsigned char& flags)
|
||||
{
|
||||
// returns true if the value is found
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug)
|
||||
std::cerr << "looking for " << key
|
||||
<< " in range [" << start << ":" << stop << "]" << std::endl;
|
||||
#endif
|
||||
if (start==stop) return false;
|
||||
assert(stop>start);
|
||||
if ((start+1)==stop) return false; // list is empty
|
||||
|
||||
unsigned int const granularity = sizeof(filepos_type)*5;
|
||||
// granularity: point where we should switch to linear search,
|
||||
// because otherwise we might skip over the entry we are looking for
|
||||
// because we land right in the middle of it.
|
||||
|
||||
if (stop > start + granularity)
|
||||
if (!tightfind_midpoint(in,start,stop))
|
||||
return false; // something went wrong (empty index)
|
||||
|
||||
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
|
||||
{ // If the search range is very short, tightfind_midpoint might skip the
|
||||
// entry we are loking for. In this case, we can afford a linear
|
||||
// search
|
||||
return linear_search(in,start,stop,key,flags);
|
||||
}
|
||||
|
||||
// perform binary search
|
||||
filepos_type curpos = in.tellg();
|
||||
id_type foo = tightread(in,stop);
|
||||
id_type tmpid = foo>>FLAGBITS;
|
||||
if (tmpid == key)
|
||||
{
|
||||
flags = foo%256;
|
||||
flags &= FLAGMASK;
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug) std::cerr << "found entry for " << key << std::endl;
|
||||
#endif
|
||||
return true; // done, found
|
||||
}
|
||||
else if (tmpid > key)
|
||||
{ // look in the lower half
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug) std::cerr << foo << " > " << key << std::endl;
|
||||
#endif
|
||||
return tightfind(in,start,curpos,key,flags);
|
||||
}
|
||||
else
|
||||
{ // look in the upper half
|
||||
while (static_cast<filepos_type>(in.tellg()) < stop
|
||||
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
|
||||
&& in.peek() >= 128)
|
||||
in.get(); // skip associated value
|
||||
if (in.rdbuf()->in_avail() == 0 || in.tellg() == std::ios::pos_type(stop))
|
||||
return false;
|
||||
#if DEBUG_TIGHTFIND
|
||||
if (debug) std::cerr << foo << " < " << key << std::endl;
|
||||
#endif
|
||||
return tightfind(in,in.tellg(),stop,key,flags);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
char const*
|
||||
tightfind(char const* const start,
|
||||
char const* const stop,
|
||||
id_type key,
|
||||
unsigned char& flags)
|
||||
{
|
||||
// returns true if the value is found
|
||||
|
||||
if (start==stop) return NULL;
|
||||
assert(stop>start);
|
||||
if ((start+1)==stop) return NULL; // list is empty
|
||||
char const* p = tightfind_midpoint(start,stop);
|
||||
// if ids can be larger than 67,108,864 on 32-bit machines
|
||||
// (i.e., 2**(28-flagbits)), dest must be declared as uint64_t
|
||||
size_t foo;
|
||||
char const* after = tightread(p,stop,foo);
|
||||
id_type tmpId = foo>>FLAGBITS;
|
||||
if (tmpId == key)
|
||||
{
|
||||
flags = foo%256;
|
||||
flags &= FLAGMASK;
|
||||
return after;
|
||||
}
|
||||
else if (tmpId > key)
|
||||
{ // look in the lower half
|
||||
return tightfind(start,p,key,flags);
|
||||
}
|
||||
else
|
||||
{ // look in the upper half
|
||||
while (*after<0 && ++after < stop);
|
||||
if (after == stop) return NULL;
|
||||
return tightfind(after,stop,key,flags);
|
||||
}
|
||||
}
|
||||
|
||||
char const*
|
||||
tightfind_noflags(char const* const start,
|
||||
char const* const stop,
|
||||
id_type key)
|
||||
{
|
||||
// returns true if the value is found
|
||||
|
||||
if (start==stop) return NULL;
|
||||
assert(stop>start);
|
||||
if ((start+1)==stop) return NULL; // list is empty
|
||||
char const* p = tightfind_midpoint(start,stop);
|
||||
// if ids can be larger than 67,108,864 on 32-bit machines
|
||||
// (i.e., 2**(28-flagbits)), dest must be declared as uint64_t
|
||||
size_t foo;
|
||||
char const* after = tightread(p,stop,foo);
|
||||
if (foo == key)
|
||||
return after;
|
||||
else if (foo > key)
|
||||
{ // look in the lower half
|
||||
return tightfind_noflags(start,p,key);
|
||||
}
|
||||
else
|
||||
{ // look in the upper half
|
||||
while (*after<0 && ++after < stop);
|
||||
if (after == stop) return NULL;
|
||||
return tightfind_noflags(after,stop,key);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
linear_search_noflags(std::istream& in, filepos_type start,
|
||||
filepos_type stop, id_type key)
|
||||
{ // performs a linear search in the range
|
||||
std::ios::pos_type mystop = stop;
|
||||
|
||||
in.seekg(start);
|
||||
id_type foo;
|
||||
for(foo = tightread(in,stop); foo < key; foo = tightread(in,stop))
|
||||
{
|
||||
// skip the value associated with key /foo/
|
||||
while (in.tellg() < mystop && in.peek() >= 128)
|
||||
in.get();
|
||||
if (in.tellg() == mystop)
|
||||
return false; // not found
|
||||
}
|
||||
assert(in.tellg() < mystop);
|
||||
return (foo==key);
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
tightfind_noflags(std::istream& in, filepos_type start,
|
||||
filepos_type stop, id_type key)
|
||||
{
|
||||
// returns true if the value is found
|
||||
if (start==stop) return false;
|
||||
assert(stop>start);
|
||||
if ((start+1)==stop) return false; // list is empty
|
||||
|
||||
// granularity: point where we should switch to linear search,
|
||||
// because otherwise we might skip over the entry we are looking for
|
||||
// because we land right in the middle of it.
|
||||
unsigned int const granularity = sizeof(filepos_type)*5;
|
||||
// UG: why 5? we should be able to get away with less!
|
||||
|
||||
if (stop > start + granularity)
|
||||
if (!tightfind_midpoint(in,start,stop))
|
||||
return false; // something went wrong (empty index)
|
||||
|
||||
// If the search range is very short, tightfind_midpoint might skip the
|
||||
// entry we are loking for. In this case, we can afford a linear
|
||||
// search
|
||||
if (stop <= start + granularity || in.tellg() == std::ios::pos_type(stop))
|
||||
return linear_search_noflags(in,start,stop,key);
|
||||
|
||||
// Otherwise, perform binary search
|
||||
filepos_type curpos = in.tellg();
|
||||
id_type foo = tightread(in,stop);
|
||||
if (foo == key)
|
||||
return true; // done, found
|
||||
|
||||
else if (foo > key) // search first half
|
||||
return tightfind_noflags(in,start,curpos,key);
|
||||
|
||||
else // search second half
|
||||
{
|
||||
std::ios::pos_type mystop = stop;
|
||||
while (in.tellg() < mystop
|
||||
&& in.rdbuf()->in_avail() > 0 // is that still necessary???
|
||||
&& in.peek() >= 128)
|
||||
in.get(); // skip associated value
|
||||
if (in.rdbuf()->in_avail() == 0 || in.tellg() == mystop)
|
||||
return false;
|
||||
return tightfind_noflags(in,in.tellg(),stop,key);
|
||||
}
|
||||
}
|
||||
|
||||
void tightwrite2(std::ostream& out, size_t data, bool flag)
|
||||
{
|
||||
// same as tightwrite, but uses basic storage units of size 2
|
||||
// assert(sizeof(size_t)==4);
|
||||
short int foo = (data%32768);
|
||||
if (flag)
|
||||
{
|
||||
foo += 32768; // set first bit
|
||||
while (data >= 32768) // = 2^15
|
||||
{
|
||||
out.write(reinterpret_cast<char*>(&foo),2);
|
||||
data >>= 15;
|
||||
foo = (data%32768)+32768;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
while (data >= 32768) // = 2^15
|
||||
{
|
||||
out.write(reinterpret_cast<char*>(&foo),2);
|
||||
data >>= 15;
|
||||
foo = data%32768;
|
||||
}
|
||||
}
|
||||
out.write(reinterpret_cast<char*>(&foo),2);
|
||||
}
|
||||
|
||||
char const*
|
||||
tightread8(char const* start,
|
||||
char const* stop,
|
||||
uint64_t& dest)
|
||||
{
|
||||
static char bitmask=127;
|
||||
dest = 0;
|
||||
if (*start < 0)
|
||||
{
|
||||
dest = (*start)&bitmask;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<7;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<14;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<21;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<28;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<35;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<42;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<49;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<56;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint64_t((*start)&bitmask)<<63;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest = *start;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<7;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<14;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<21;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<28;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<35;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<42;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<49;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<56;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint64_t(*start)<<63;
|
||||
}
|
||||
assert(start<stop);
|
||||
return ++start;
|
||||
}
|
||||
|
||||
char const*
|
||||
tightread4(char const* start,
|
||||
char const* stop,
|
||||
uint32_t& dest)
|
||||
{
|
||||
static char bitmask=127;
|
||||
dest = 0;
|
||||
if (*start < 0)
|
||||
{
|
||||
dest = (*start)&bitmask;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint32_t((*start)&bitmask)<<7;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint32_t((*start)&bitmask)<<14;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint32_t((*start)&bitmask)<<21;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint32_t((*start)&bitmask)<<28;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest = *start;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint32_t(*start)<<7;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint32_t(*start)<<14;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint32_t(*start)<<21;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint32_t(*start)<<28;
|
||||
}
|
||||
assert(start<stop);
|
||||
return ++start;
|
||||
}
|
||||
|
||||
char const*
|
||||
tightread2(char const* start,
|
||||
char const* stop,
|
||||
uint16_t& dest)
|
||||
{
|
||||
static char bitmask=127;
|
||||
dest = 0;
|
||||
if (*start < 0)
|
||||
{
|
||||
dest = (*start)&bitmask;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint32_t((*start)&bitmask)<<7;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += uint32_t((*start)&bitmask)<<14;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest = *start;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint32_t(*start)<<7;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += uint32_t(*start)<<14;
|
||||
}
|
||||
assert(start<stop);
|
||||
return ++start;
|
||||
}
|
||||
} // end namespace ugdiss
|
177
moses/mm/tpt_tightindex.h
Normal file
177
moses/mm/tpt_tightindex.h
Normal file
@ -0,0 +1,177 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007,2008 Ulrich Germann
|
||||
/* Functions for writing indices tightly (use only the bytes you need).
|
||||
* The first bit indicates whether a byte belongs to a key or a value.
|
||||
* The remaining 7 bits are part of the respective integer value.
|
||||
*/
|
||||
#ifndef __ugTightIndex
|
||||
#define __ugTightIndex
|
||||
#include <map>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include "tpt_typedefs.h"
|
||||
#include <cassert>
|
||||
// using namespace std;
|
||||
|
||||
#ifndef uchar
|
||||
#endif
|
||||
|
||||
#define FLAGBITS 2
|
||||
#define FLAGMASK (uchar(3))
|
||||
#define HAS_VALUE_MASK (uchar(2))
|
||||
#define HAS_CHILD_MASK (uchar(1))
|
||||
|
||||
|
||||
extern bool debug;
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
// void tightwritex(iostream& out, size_t data, bool flag);
|
||||
void
|
||||
tightwrite(std::ostream& out, uint64_t data, bool flag);
|
||||
|
||||
filepos_type
|
||||
tightread(std::istream& in, std::ios::pos_type stop);
|
||||
|
||||
bool
|
||||
tightfind(std::istream& in,
|
||||
filepos_type start,
|
||||
filepos_type stop,
|
||||
id_type key,
|
||||
unsigned char& flags);
|
||||
|
||||
bool
|
||||
tightfind_noflags(std::istream& in,
|
||||
filepos_type start,
|
||||
filepos_type stop,
|
||||
id_type key);
|
||||
|
||||
char const*
|
||||
tightfind(char const* const start,
|
||||
char const* const stop,
|
||||
id_type key,
|
||||
unsigned char& flags);
|
||||
|
||||
char const*
|
||||
tightfind_noflags(char const* const start,
|
||||
char const* const stop,
|
||||
id_type key);
|
||||
|
||||
|
||||
|
||||
/** move read header in istream /in/ to the first entry after the midpoint of
|
||||
* file position range [start,stop) in in a 'tight' index
|
||||
* @param in the data input stream
|
||||
* @param start start of the search range
|
||||
* @param stop end of the search range
|
||||
* @return true if no errors occurred
|
||||
*/
|
||||
bool
|
||||
tightfind_midpoint(std::istream& in, filepos_type start, filepos_type stop);
|
||||
|
||||
// the bitpattern functions below are for debugging
|
||||
// They return a string showing the bits of the argument value
|
||||
// std::string bitpattern(unsigned int s);
|
||||
// std::string bitpattern(unsigned char c);
|
||||
// std::string bitpattern(char c);
|
||||
|
||||
|
||||
/** read a number from a tight index directy from a memory location
|
||||
* @param start start of read range
|
||||
* @param stop non-inclusive end of read range
|
||||
* @param dest destination
|
||||
* @return first memory position after the number
|
||||
*/
|
||||
|
||||
char const*
|
||||
tightread2(char const* start, char const* stop, uint16_t& dest);
|
||||
|
||||
char const*
|
||||
tightread4(char const* start, char const* stop, uint32_t& dest);
|
||||
|
||||
char const*
|
||||
tightread8(char const* start, char const* stop, uint64_t& dest);
|
||||
|
||||
template<typename numType>
|
||||
char const*
|
||||
tightread(char const* start, char const* stop, numType& dest)
|
||||
{
|
||||
if (sizeof(numType)==2)
|
||||
return tightread2(start,stop,reinterpret_cast<uint16_t&>(dest));
|
||||
if (sizeof(numType)==4)
|
||||
return tightread4(start,stop,reinterpret_cast<uint32_t&>(dest));
|
||||
else if (sizeof(numType)==8)
|
||||
return tightread8(start,stop,reinterpret_cast<uint64_t&>(dest));
|
||||
assert(0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// char const*
|
||||
// tightread(char const* start, char const* stop, uint64_t& dest);
|
||||
|
||||
// char const*
|
||||
// tightread(char const* start, char const* stop, filepos_type& dest);
|
||||
|
||||
#if 0
|
||||
template<typename dtype>
|
||||
char const*
|
||||
tightread(char const* start,
|
||||
char const* stop,
|
||||
dtype& dest)
|
||||
{
|
||||
static char bitmask=127;
|
||||
dest = 0;
|
||||
if (*start < 0)
|
||||
{
|
||||
dest = (*start)&bitmask;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<7;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<14;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<21;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<28;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
assert(sizeof(dtype) > 4);
|
||||
dest += dtype((*start)&bitmask)<<35;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<42;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<49;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<56;
|
||||
if (++start==stop || *start >= 0) return start;
|
||||
dest += dtype((*start)&bitmask)<<63;
|
||||
}
|
||||
else
|
||||
{
|
||||
dest = *start;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<7;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<14;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<21;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<28;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
assert(sizeof(dtype) > 4);
|
||||
dest += dtype(*start)<<35;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<42;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<49;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<56;
|
||||
if (++start==stop || *start < 0) return start;
|
||||
dest += dtype(*start)<<63;
|
||||
}
|
||||
assert(start<stop);
|
||||
return ++start;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
}
|
||||
#endif
|
386
moses/mm/tpt_tokenindex.cc
Normal file
386
moses/mm/tpt_tokenindex.cc
Normal file
@ -0,0 +1,386 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007-2013 Ulrich Germann
|
||||
#include <sstream>
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <boost/pool/pool_alloc.hpp>
|
||||
|
||||
#include "tpt_tokenindex.h"
|
||||
|
||||
using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
TokenIndex::
|
||||
TokenIndex(string unkToken)
|
||||
: ridx(0),unkLabel(unkToken),unkId(1),numTokens(0)
|
||||
{
|
||||
lock.reset(new boost::mutex());
|
||||
};
|
||||
|
||||
#if 0
|
||||
TokenIndex::
|
||||
TokenIndex(string fname, string unkToken,bool dyna)
|
||||
: ridx(0),unkLabel(unkToken)
|
||||
{
|
||||
this->open(fname,unkToken,dyna);
|
||||
};
|
||||
#endif
|
||||
|
||||
void
|
||||
TokenIndex::
|
||||
open(string fname, string unkToken,bool dyna)
|
||||
{
|
||||
if (access(fname.c_str(),F_OK))
|
||||
{
|
||||
ostringstream msg;
|
||||
msg << "TokenIndex::open: File '" << fname << "' does not exist.";
|
||||
throw std::runtime_error(msg.str().c_str());
|
||||
}
|
||||
|
||||
file.open(fname);
|
||||
if (!file.is_open())
|
||||
{
|
||||
cerr << "Error opening file " << fname << endl;
|
||||
assert(0);
|
||||
}
|
||||
// cout << "file is open" << endl;
|
||||
|
||||
this->numTokens = *(reinterpret_cast<uint32_t const*>(file.data()));
|
||||
unkId = *(reinterpret_cast<id_type const*>(file.data()+4));
|
||||
|
||||
// cout << "tokenindex.open: unkId=" << unkId << endl;
|
||||
|
||||
startIdx = reinterpret_cast<Entry const*>(file.data()+4+sizeof(id_type));
|
||||
endIdx = startIdx + numTokens;
|
||||
comp.base = reinterpret_cast<char const*>(endIdx);
|
||||
if (!unkToken.empty())
|
||||
{
|
||||
Entry const* bla = lower_bound(startIdx,endIdx,unkToken.c_str(),comp);
|
||||
unkId = ((bla < endIdx && unkToken == comp.base+bla->offset)
|
||||
? bla->id
|
||||
: numTokens);
|
||||
}
|
||||
this->dynamic=dyna;
|
||||
if (dyna)
|
||||
{
|
||||
this->str2idExtra.reset(new map<string,id_type>());
|
||||
this->newWords.reset(new vector<string>());
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TokenIndex::
|
||||
close()
|
||||
{
|
||||
file.close();
|
||||
}
|
||||
|
||||
TokenIndex::
|
||||
CompFunc::
|
||||
CompFunc()
|
||||
{};
|
||||
|
||||
bool
|
||||
TokenIndex::
|
||||
CompFunc::
|
||||
operator()(Entry const& A, char const* w)
|
||||
{
|
||||
return strcmp(base+A.offset,w) < 0;
|
||||
};
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
operator[](char const* p) const
|
||||
{
|
||||
if (startIdx==endIdx && !dynamic) return strcmp(p,"NULL") && unkId;
|
||||
Entry const* bla = lower_bound(startIdx,endIdx,p,comp);
|
||||
if (bla != endIdx && !strcmp(comp.base+bla->offset,p))
|
||||
return bla->id;
|
||||
if (!dynamic) return unkId;
|
||||
boost::lock_guard<boost::mutex> lk(*this->lock);
|
||||
// stuff below is new as of 2011-01-30, for dynamic adding of unknown items
|
||||
// IMPORTANT: numTokens is not currently not changed, it is the number of
|
||||
// PRE-EXISING TOKENS, not including dynamically added Items
|
||||
map<string,id_type>::value_type newItem(p,str2idExtra->size()+numTokens);
|
||||
pair<map<string,id_type>::iterator,bool> foo = str2idExtra->insert(newItem);
|
||||
if (foo.second) // it actually is a new item
|
||||
newWords->push_back(foo.first->first);
|
||||
return foo.first->second;
|
||||
}
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
operator[](string const& w) const
|
||||
{
|
||||
return (*this)[w.c_str()];
|
||||
}
|
||||
|
||||
vector<char const*>
|
||||
TokenIndex::
|
||||
reverseIndex() const
|
||||
{
|
||||
size_t numToks = endIdx-startIdx;
|
||||
|
||||
// cout << "tokenindex has " << numToks << " tokens" << endl;
|
||||
|
||||
vector<char const*> v(numToks,NULL);
|
||||
// v.reserve(endIdx-startIdx);
|
||||
for (Entry const* x = startIdx; x != endIdx; x++)
|
||||
{
|
||||
if (x->id >= v.size())
|
||||
v.resize(x->id+1);
|
||||
v[x->id] = comp.base+x->offset;
|
||||
}
|
||||
// cout << "done reversing index " << endl;
|
||||
return v;
|
||||
}
|
||||
|
||||
char const* const
|
||||
TokenIndex::
|
||||
operator[](id_type id) const
|
||||
{
|
||||
if (!ridx.size())
|
||||
{
|
||||
cerr << "FATAL ERROR: You need to call iniReverseIndex() "
|
||||
<< "on the TokenIndex class before using operator[](id_type id)."
|
||||
<< endl;
|
||||
assert(0);
|
||||
exit(1);
|
||||
}
|
||||
if (id < ridx.size())
|
||||
return ridx[id];
|
||||
boost::lock_guard<boost::mutex> lk(*this->lock);
|
||||
if (dynamic && id < ridx.size()+newWords->size())
|
||||
return (*newWords)[id-ridx.size()].c_str();
|
||||
return unkLabel.c_str();
|
||||
}
|
||||
|
||||
void
|
||||
TokenIndex::
|
||||
iniReverseIndex()
|
||||
{
|
||||
if (!ridx.size()) ridx = reverseIndex();
|
||||
}
|
||||
|
||||
|
||||
char const* const
|
||||
TokenIndex::
|
||||
operator[](id_type id)
|
||||
{
|
||||
if (!ridx.size()) ridx = reverseIndex();
|
||||
if (id < ridx.size())
|
||||
return ridx[id];
|
||||
boost::lock_guard<boost::mutex> lk(*this->lock);
|
||||
if (dynamic && id < ridx.size()+newWords->size())
|
||||
return (*newWords)[id-ridx.size()].c_str();
|
||||
return unkLabel.c_str();
|
||||
}
|
||||
|
||||
string
|
||||
TokenIndex::
|
||||
toString(vector<id_type> const& v)
|
||||
{
|
||||
if (!ridx.size()) ridx = reverseIndex();
|
||||
ostringstream buf;
|
||||
for (size_t i = 0; i < v.size(); i++)
|
||||
buf << (i ? " " : "") << (*this)[v[i]];
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
string
|
||||
TokenIndex::
|
||||
toString(vector<id_type> const& v) const
|
||||
{
|
||||
assert (ridx.size());
|
||||
ostringstream buf;
|
||||
for (size_t i = 0; i < v.size(); i++)
|
||||
buf << (i ? " " : "") << (*this)[v[i]];
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
string
|
||||
TokenIndex::
|
||||
toString(id_type const* start, id_type const* const stop)
|
||||
{
|
||||
if (!ridx.size()) ridx = reverseIndex();
|
||||
ostringstream buf;
|
||||
if (start < stop)
|
||||
buf << (*this)[*start];
|
||||
while (++start < stop)
|
||||
buf << " " << (*this)[*start];
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
string
|
||||
TokenIndex::
|
||||
toString(id_type const* start, id_type const* const stop) const
|
||||
{
|
||||
assert (ridx.size());
|
||||
ostringstream buf;
|
||||
if (start < stop)
|
||||
buf << (*this)[*start];
|
||||
while (++start < stop)
|
||||
buf << " " << (*this)[*start];
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
vector<id_type>
|
||||
TokenIndex::
|
||||
toIdSeq(string const& line) const
|
||||
{
|
||||
istringstream buf(line);
|
||||
string w;
|
||||
vector<id_type> retval;
|
||||
while (buf>>w)
|
||||
retval.push_back((*this)[w]);
|
||||
return retval;
|
||||
}
|
||||
|
||||
/// Return false if line contains unknown tokens, true otherwise
|
||||
bool
|
||||
TokenIndex::
|
||||
fillIdSeq(string const& line, vector<id_type> & v) const
|
||||
{
|
||||
bool allgood = true; string w;
|
||||
v.clear();
|
||||
for (istringstream buf(line); buf>>w;)
|
||||
{
|
||||
v.push_back((*this)[w]);
|
||||
allgood = allgood && v.back() > 1;
|
||||
}
|
||||
return allgood;
|
||||
}
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
getNumTokens() const
|
||||
{
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
getUnkId() const
|
||||
{
|
||||
return unkId;
|
||||
}
|
||||
|
||||
char const* const
|
||||
TokenIndex::
|
||||
getUnkToken() const
|
||||
{
|
||||
return unkLabel.c_str();
|
||||
// return (*this)[unkId];
|
||||
}
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
knownVocabSize() const
|
||||
{
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
ksize() const
|
||||
{
|
||||
return numTokens;
|
||||
}
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
totalVocabSize() const
|
||||
{ return tsize(); }
|
||||
|
||||
id_type
|
||||
TokenIndex::
|
||||
tsize() const
|
||||
{
|
||||
return (newWords != NULL
|
||||
? numTokens+newWords->size()
|
||||
: numTokens);
|
||||
}
|
||||
|
||||
void
|
||||
write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
|
||||
string const& ofile, string const& unkToken)
|
||||
{
|
||||
typedef pair<uint32_t,id_type> IndexEntry; // offset and id
|
||||
|
||||
// Write token strings to a buffer, keep track of offsets
|
||||
vector<IndexEntry> index(tok.size());
|
||||
ostringstream data;
|
||||
id_type unkId = tok.size();
|
||||
for (size_t i = 0; i < tok.size(); i++)
|
||||
{
|
||||
if (tok[i].first == unkToken)
|
||||
unkId = tok[i].second;
|
||||
index[i].first = data.tellp(); // offset of string
|
||||
index[i].second = tok[i].second; // respective ID
|
||||
data<<tok[i].first<<char(0); // write string to buffer
|
||||
}
|
||||
|
||||
// Now write the actual file
|
||||
ofstream out(ofile.c_str());
|
||||
uint32_t vsize = index.size(); // how many vocab items?
|
||||
out.write(reinterpret_cast<char*>(&vsize),4);
|
||||
out.write(reinterpret_cast<char*>(&unkId),sizeof(id_type));
|
||||
for (size_t i = 0; i < index.size(); i++)
|
||||
{
|
||||
out.write(reinterpret_cast<char*>(&index[i].first),4);
|
||||
out.write(reinterpret_cast<char*>(&index[i].second),sizeof(id_type));
|
||||
}
|
||||
out<<data.str();
|
||||
}
|
||||
|
||||
void
|
||||
TokenIndex::
|
||||
write(string fname)
|
||||
{
|
||||
typedef pair<string,uint32_t> Token; // token and id
|
||||
vector<Token> tok(totalVocabSize());
|
||||
for (id_type i = 0; i < tok.size(); ++i)
|
||||
tok[i] = Token((*this)[i],i);
|
||||
sort(tok.begin(),tok.end());
|
||||
write_tokenindex_to_disk(tok,fname,unkLabel);
|
||||
}
|
||||
|
||||
bool
|
||||
TokenIndex::
|
||||
isDynamic() const
|
||||
{
|
||||
return dynamic;
|
||||
}
|
||||
|
||||
bool
|
||||
TokenIndex::
|
||||
setDynamic(bool on)
|
||||
{
|
||||
bool ret = dynamic;
|
||||
if (on && this->str2idExtra == NULL)
|
||||
{
|
||||
this->str2idExtra.reset(new map<string,id_type>());
|
||||
this->newWords.reset(new vector<string>());
|
||||
}
|
||||
dynamic = on;
|
||||
if (on)
|
||||
{
|
||||
(*this)["NULL"];
|
||||
(*this)[unkLabel];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
TokenIndex::
|
||||
setUnkLabel(string unk)
|
||||
{
|
||||
unkId = (*this)[unk];
|
||||
unkLabel = unk;
|
||||
}
|
||||
|
||||
}
|
168
moses/mm/tpt_tokenindex.h
Normal file
168
moses/mm/tpt_tokenindex.h
Normal file
@ -0,0 +1,168 @@
|
||||
// -*- c++ -*-
|
||||
// TO DO (12.01.2011):
|
||||
//
|
||||
// - Vocab items should be stored in order of ids, so that we can determine their length
|
||||
// by taking computing V[id+1] - V[id] instead of using strlen.
|
||||
//
|
||||
// (c) 2007,2008 Ulrich Germann
|
||||
|
||||
#ifndef __ugTokenIndex_hh
|
||||
#define __ugTokenIndex_hh
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
#include <boost/iostreams/stream.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
#include <boost/thread.hpp>
|
||||
#include "tpt_typedefs.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
class TokenIndex
|
||||
{
|
||||
/** Reverse index: maps from ID to char const* */
|
||||
vector<char const*> ridx;
|
||||
/** Label for the UNK token */
|
||||
string unkLabel;
|
||||
id_type unkId,numTokens;
|
||||
|
||||
/// New 2013-09-02: thread-safe
|
||||
boost::scoped_ptr<boost::mutex> lock;
|
||||
|
||||
// NEW 2011-01-30: dynamic adding of unknown items
|
||||
bool dynamic; // dynamically assign a new word id to unknown items?
|
||||
boost::shared_ptr<map<string,id_type> > str2idExtra;
|
||||
boost::shared_ptr<vector<string> > newWords;
|
||||
// The use of pointers to external items is a bit of a bad hack
|
||||
// in terms of the semantic of TokenIndex const: since external items
|
||||
// are changed, the TokenIndex instance remains unchanged and const works,
|
||||
// even though in reality the underlying object on the coceptual level
|
||||
// *IS* changed. This means that dynamic TokenIndex instances are not
|
||||
// thread-safe!
|
||||
|
||||
public:
|
||||
/** string->ID lookup works via binary search in a vector of Entry instances */
|
||||
class Entry
|
||||
{
|
||||
public:
|
||||
uint32_t offset;
|
||||
id_type id;
|
||||
};
|
||||
|
||||
/** Comparison function object used for Entry instances */
|
||||
class CompFunc
|
||||
{
|
||||
public:
|
||||
char const* base;
|
||||
CompFunc();
|
||||
bool operator()(Entry const& A, char const* w);
|
||||
};
|
||||
|
||||
bio::mapped_file_source file;
|
||||
Entry const* startIdx;
|
||||
Entry const* endIdx;
|
||||
CompFunc comp;
|
||||
TokenIndex(string unkToken="UNK");
|
||||
// TokenIndex(string fname,string unkToken="UNK",bool dyna=false);
|
||||
void open(string fname,string unkToken="UNK",bool dyna=false);
|
||||
void close();
|
||||
// id_type unkId,numTokens;
|
||||
id_type operator[](char const* w) const;
|
||||
id_type operator[](string const& w) const;
|
||||
char const* const operator[](id_type id) const;
|
||||
char const* const operator[](id_type id);
|
||||
vector<char const*> reverseIndex() const;
|
||||
|
||||
string toString(vector<id_type> const& v);
|
||||
string toString(vector<id_type> const& v) const;
|
||||
|
||||
string toString(id_type const* start, id_type const* const stop);
|
||||
string toString(id_type const* start, id_type const* const stop) const;
|
||||
|
||||
vector<id_type> toIdSeq(string const& line) const;
|
||||
|
||||
bool fillIdSeq(string const& line, vector<id_type> & v) const;
|
||||
|
||||
void iniReverseIndex();
|
||||
id_type getNumTokens() const;
|
||||
id_type getUnkId() const;
|
||||
|
||||
// the following two functions are deprecated; use ksize() and tsize() instead
|
||||
id_type knownVocabSize() const; // return size of known (fixed) vocabulary
|
||||
id_type totalVocabSize() const; // total of known and dynamically items
|
||||
|
||||
id_type ksize() const; // shorthand for knownVocabSize();
|
||||
id_type tsize() const; // shorthand for totalVocabSize();
|
||||
|
||||
|
||||
char const* const getUnkToken() const;
|
||||
|
||||
void write(string fname); // write TokenIndex to a new file
|
||||
bool isDynamic() const;
|
||||
bool setDynamic(bool onoff);
|
||||
|
||||
void setUnkLabel(string unk);
|
||||
};
|
||||
|
||||
void
|
||||
write_tokenindex_to_disk(vector<pair<string,uint32_t> > const& tok,
|
||||
string const& ofile, string const& unkToken);
|
||||
|
||||
/** for sorting words by frequency */
|
||||
class compWords
|
||||
{
|
||||
string unk;
|
||||
public:
|
||||
compWords(string _unk) : unk(_unk) {};
|
||||
|
||||
bool
|
||||
operator()(pair<string,size_t> const& A,
|
||||
pair<string,size_t> const& B) const
|
||||
{
|
||||
if (A.first == unk) return false;// do we still need this special treatment?
|
||||
if (B.first == unk) return true; // do we still need this special treatment?
|
||||
if (A.second == B.second)
|
||||
return A.first < B.first;
|
||||
return A.second > B.second;
|
||||
}
|
||||
};
|
||||
|
||||
template<class MYMAP>
|
||||
void
|
||||
mkTokenIndex(string ofile,MYMAP const& M,string unkToken)
|
||||
{
|
||||
typedef pair<uint32_t,id_type> IndexEntry; // offset and id
|
||||
typedef pair<string,uint32_t> Token; // token and id
|
||||
|
||||
|
||||
// first, sort the word list in decreasing order of frequency, so that we
|
||||
// can assign IDs in an encoding-efficient manner (high frequency. low ID)
|
||||
vector<pair<string,size_t> > wcounts(M.size()); // for sorting by frequency
|
||||
typedef typename MYMAP::const_iterator myIter;
|
||||
size_t z=0;
|
||||
for (myIter m = M.begin(); m != M.end(); m++)
|
||||
{
|
||||
// cout << m->first << " " << m->second << endl;
|
||||
wcounts[z++] = pair<string,size_t>(m->first,m->second);
|
||||
}
|
||||
compWords compFunc(unkToken);
|
||||
sort(wcounts.begin(),wcounts.end(),compFunc);
|
||||
|
||||
// Assign IDs ...
|
||||
vector<Token> tok(wcounts.size());
|
||||
for (size_t i = 0; i < wcounts.size(); i++)
|
||||
tok[i] = Token(wcounts[i].first,i);
|
||||
// and re-sort in alphabetical order
|
||||
sort(tok.begin(),tok.end());
|
||||
write_tokenindex_to_disk(tok,ofile,unkToken);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
15
moses/mm/tpt_typedefs.h
Normal file
15
moses/mm/tpt_typedefs.h
Normal file
@ -0,0 +1,15 @@
|
||||
// -*- c++ -*-
|
||||
// Basic type definitions for code related to tightly packed tries
|
||||
// (c) 2006-2012 Ulrich Germann
|
||||
|
||||
#ifndef __tpt_typedefs_h
|
||||
#define __tpt_typedefs_h
|
||||
#include <stdint.h>
|
||||
namespace ugdiss
|
||||
{
|
||||
typedef uint32_t id_type;
|
||||
typedef uint32_t count_type;
|
||||
typedef uint64_t filepos_type;
|
||||
typedef unsigned char uchar;
|
||||
}
|
||||
#endif
|
176
moses/mm/ug_bitext.cc
Normal file
176
moses/mm/ug_bitext.cc
Normal file
@ -0,0 +1,176 @@
|
||||
//-*- c++-mode -*-
|
||||
|
||||
#include "ug_bitext.h"
|
||||
#include <algorithm>
|
||||
#include <boost/math/distributions/binomial.hpp>
|
||||
|
||||
using namespace ugdiss;
|
||||
using namespace std;
|
||||
namespace Moses
|
||||
{
|
||||
namespace bitext
|
||||
{
|
||||
pstats::
|
||||
pstats()
|
||||
: raw_cnt (0)
|
||||
, sample_cnt (0)
|
||||
, good (0)
|
||||
, sum_pairs (0)
|
||||
, in_progress (0)
|
||||
{}
|
||||
|
||||
void
|
||||
pstats::
|
||||
register_worker()
|
||||
{
|
||||
this->lock.lock();
|
||||
++this->in_progress;
|
||||
this->lock.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
pstats::
|
||||
release()
|
||||
{
|
||||
this->lock.lock();
|
||||
if (this->in_progress-- == 1) // last one - >we're done
|
||||
this->ready.notify_all();
|
||||
this->lock.unlock();
|
||||
}
|
||||
|
||||
void
|
||||
pstats::
|
||||
add(uint64_t pid, float const w,
|
||||
vector<uchar> const& a,
|
||||
uint32_t const cnt2)
|
||||
{
|
||||
this->lock.lock();
|
||||
jstats& entry = this->trg[pid];
|
||||
this->lock.unlock();
|
||||
entry.add(w,a,cnt2);
|
||||
if (this->good < entry.rcnt())
|
||||
{
|
||||
this->lock.lock();
|
||||
UTIL_THROW(util::Exception, "more joint counts than good counts!"
|
||||
<< entry.rcnt() << "/" << this->good);
|
||||
}
|
||||
}
|
||||
|
||||
jstats::
|
||||
jstats()
|
||||
: my_rcnt(0), my_wcnt(0), my_cnt2(0)
|
||||
{
|
||||
my_aln.reserve(1);
|
||||
}
|
||||
|
||||
jstats::
|
||||
jstats(jstats const& other)
|
||||
{
|
||||
my_rcnt = other.rcnt();
|
||||
my_wcnt = other.wcnt();
|
||||
my_aln = other.aln();
|
||||
}
|
||||
|
||||
void
|
||||
jstats::
|
||||
add(float w, vector<uchar> const& a, uint32_t const cnt2)
|
||||
{
|
||||
boost::lock_guard<boost::mutex> lk(this->lock);
|
||||
my_rcnt += 1;
|
||||
my_wcnt += w;
|
||||
my_cnt2 += cnt2;
|
||||
if (a.size())
|
||||
{
|
||||
size_t i = 0;
|
||||
while (i < my_aln.size() && my_aln[i].second != a) ++i;
|
||||
if (i == my_aln.size())
|
||||
my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
|
||||
else
|
||||
my_aln[i].first++;
|
||||
if (my_aln[i].first > my_aln[i/2].first)
|
||||
push_heap(my_aln.begin(),my_aln.begin()+i+1);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t
|
||||
jstats::
|
||||
rcnt() const
|
||||
{ return my_rcnt; }
|
||||
|
||||
float
|
||||
jstats::
|
||||
wcnt() const
|
||||
{ return my_wcnt; }
|
||||
|
||||
uint32_t
|
||||
jstats::
|
||||
cnt2() const
|
||||
{ return my_cnt2; }
|
||||
|
||||
vector<pair<size_t, vector<uchar> > > const&
|
||||
jstats::
|
||||
aln() const
|
||||
{ return my_aln; }
|
||||
|
||||
bool
|
||||
PhrasePair::
|
||||
operator<(PhrasePair const& other) const
|
||||
{
|
||||
return this->score < other.score;
|
||||
}
|
||||
|
||||
bool
|
||||
PhrasePair::
|
||||
operator>(PhrasePair const& other) const
|
||||
{
|
||||
return this->score > other.score;
|
||||
}
|
||||
|
||||
PhrasePair::PhrasePair() {}
|
||||
|
||||
void
|
||||
PhrasePair::
|
||||
init(uint64_t const pid1, pstats const& ps, size_t const numfeats)
|
||||
{
|
||||
p1 = pid1;
|
||||
raw1 = ps.raw_cnt;
|
||||
sample1 = ps.sample_cnt;
|
||||
sample2 = 0;
|
||||
good1 = ps.good;
|
||||
good2 = 0;
|
||||
fvals.resize(numfeats);
|
||||
}
|
||||
|
||||
float
|
||||
lbop(size_t const tries, size_t const succ, float const confidence)
|
||||
{
|
||||
return
|
||||
boost::math::binomial_distribution<>::
|
||||
find_lower_bound_on_p(tries, succ, confidence);
|
||||
}
|
||||
|
||||
void
|
||||
PhrasePair::
|
||||
update(uint64_t const pid2, jstats const& js)
|
||||
{
|
||||
p2 = pid2;
|
||||
raw2 = js.cnt2();
|
||||
joint = js.rcnt();
|
||||
assert(js.aln().size());
|
||||
if (js.aln().size())
|
||||
aln = js.aln()[0].second;
|
||||
}
|
||||
|
||||
float
|
||||
PhrasePair::
|
||||
eval(vector<float> const& w)
|
||||
{
|
||||
assert(w.size() == this->fvals.size());
|
||||
this->score = 0;
|
||||
for (size_t i = 0; i < w.size(); ++i)
|
||||
this->score += w[i] * this->fvals[i];
|
||||
return this->score;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
992
moses/mm/ug_bitext.h
Normal file
992
moses/mm/ug_bitext.h
Normal file
@ -0,0 +1,992 @@
|
||||
//-*- c++ -*-
|
||||
|
||||
#ifndef __ug_bitext_h
|
||||
#define __ug_bitext_h
|
||||
// Implementations of word-aligned bitext.
|
||||
// Written by Ulrich Germann
|
||||
//
|
||||
// mmBitext: static, memory-mapped bitext
|
||||
// imBitext: dynamic, in-memory bitext
|
||||
//
|
||||
|
||||
// things we can do to speed up things:
|
||||
// - set up threads at startup time that force the
|
||||
// data in to memory sequentially
|
||||
//
|
||||
// - use multiple agendas for better load balancing and to avoid
|
||||
// competition for locks
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <iomanip>
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/thread.hpp>
|
||||
|
||||
#include "moses/generic/sorting/VectorIndexSorter.h"
|
||||
#include "moses/generic/sampling/Sampling.h"
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
#include "moses/Util.h"
|
||||
|
||||
#include "util/exception.hh"
|
||||
#include "util/check.hh"
|
||||
|
||||
#include "ug_typedefs.h"
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "ug_im_ttrack.h"
|
||||
#include "ug_mm_tsa.h"
|
||||
#include "ug_im_tsa.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_corpus_token.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_lexical_phrase_scorer2.h"
|
||||
|
||||
using namespace ugdiss;
|
||||
using namespace std;
|
||||
namespace Moses {
|
||||
|
||||
namespace bitext
|
||||
{
|
||||
using namespace ugdiss;
|
||||
|
||||
template<typename TKN> class Bitext;
|
||||
|
||||
template<typename sid_t, typename off_t, typename len_t>
|
||||
void
|
||||
parse_pid(uint64_t const pid, sid_t & sid,
|
||||
off_t & off, len_t& len)
|
||||
{
|
||||
static uint64_t two32 = uint64_t(1)<<32;
|
||||
static uint64_t two16 = uint64_t(1)<<16;
|
||||
len = pid%two16;
|
||||
off = (pid%two32)>>16;
|
||||
sid = pid>>32;
|
||||
}
|
||||
|
||||
float
|
||||
lbop(size_t const tries, size_t const succ,
|
||||
float const confidence);
|
||||
|
||||
// "joint" (i.e., phrase pair) statistics
|
||||
class
|
||||
jstats
|
||||
{
|
||||
boost::mutex lock;
|
||||
uint32_t my_rcnt; // unweighted count
|
||||
float my_wcnt; // weighted count
|
||||
uint32_t my_cnt2;
|
||||
vector<pair<size_t, vector<uchar> > > my_aln;
|
||||
public:
|
||||
jstats();
|
||||
jstats(jstats const& other);
|
||||
uint32_t rcnt() const;
|
||||
uint32_t cnt2() const; // raw target phrase occurrence count
|
||||
float wcnt() const;
|
||||
|
||||
vector<pair<size_t, vector<uchar> > > const & aln() const;
|
||||
void add(float w, vector<uchar> const& a, uint32_t const cnt2);
|
||||
};
|
||||
|
||||
struct
|
||||
pstats
|
||||
{
|
||||
boost::mutex lock; // for parallel gathering of stats
|
||||
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
|
||||
|
||||
size_t raw_cnt; // (approximate) raw occurrence count
|
||||
size_t sample_cnt; // number of instances selected during sampling
|
||||
size_t good; // number of selected instances with valid word alignments
|
||||
size_t sum_pairs;
|
||||
size_t in_progress; // keeps track of how many threads are currently working on this
|
||||
typename boost::unordered_map<uint64_t, jstats> trg;
|
||||
pstats();
|
||||
void release();
|
||||
void register_worker();
|
||||
size_t count_workers() { return in_progress; }
|
||||
|
||||
void add(uint64_t const pid, float const w,
|
||||
vector<uchar> const& a, uint32_t const cnt2);
|
||||
};
|
||||
|
||||
class
|
||||
PhrasePair
|
||||
{
|
||||
public:
|
||||
uint64_t p1, p2;
|
||||
uint32_t raw1,raw2,sample1,sample2,good1,good2,joint;
|
||||
uint32_t mono,swap,left,right;
|
||||
vector<float> fvals;
|
||||
vector<uchar> aln;
|
||||
// float avlex12,avlex21; // average lexical probs (Moses std)
|
||||
// float znlex1,znlex2; // zens-ney lexical smoothing
|
||||
// float colex1,colex2; // based on raw lexical occurrences
|
||||
float score;
|
||||
PhrasePair();
|
||||
bool operator<(PhrasePair const& other) const;
|
||||
bool operator>(PhrasePair const& other) const;
|
||||
void init(uint64_t const pid1, pstats const& ps,
|
||||
size_t const numfeats);
|
||||
void update(uint64_t const pid2, jstats const& js);
|
||||
float eval(vector<float> const& w);
|
||||
};
|
||||
|
||||
template<typename Token>
|
||||
class
|
||||
PhraseScorer
|
||||
{
|
||||
protected:
|
||||
int index;
|
||||
int num_feats;
|
||||
public:
|
||||
virtual
|
||||
|
||||
void
|
||||
operator()(Bitext<Token> const& pt, PhrasePair& pp) const = 0;
|
||||
|
||||
int
|
||||
fcnt() const { return num_feats; }
|
||||
};
|
||||
|
||||
template<typename Token>
|
||||
class
|
||||
PScorePfwd : public PhraseScorer<Token>
|
||||
{
|
||||
float conf;
|
||||
public:
|
||||
PScorePfwd()
|
||||
{
|
||||
this->num_feats = 1;
|
||||
}
|
||||
|
||||
int
|
||||
init(int const i, float const c)
|
||||
{
|
||||
conf = c;
|
||||
this->index = i;
|
||||
return i + this->num_feats;
|
||||
}
|
||||
|
||||
void
|
||||
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
|
||||
{
|
||||
if (pp.joint > pp.good1)
|
||||
{
|
||||
cerr << bt.toString(pp.p1,0) << " ::: " << bt.toString(pp.p2,1) << endl;
|
||||
cerr << pp.joint << "/" << pp.good1 << "/" << pp.raw2 << endl;
|
||||
}
|
||||
pp.fvals[this->index] = log(lbop(pp.good1, pp.joint, conf));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Token>
|
||||
class
|
||||
PScorePbwd : public PhraseScorer<Token>
|
||||
{
|
||||
float conf;
|
||||
public:
|
||||
PScorePbwd()
|
||||
{
|
||||
this->num_feats = 1;
|
||||
}
|
||||
|
||||
int
|
||||
init(int const i, float const c)
|
||||
{
|
||||
conf = c;
|
||||
this->index = i;
|
||||
return i + this->num_feats;
|
||||
}
|
||||
|
||||
void
|
||||
operator()(Bitext<Token> const& pt, PhrasePair& pp) const
|
||||
{
|
||||
pp.fvals[this->index] = log(lbop(max(pp.raw2,pp.joint), pp.joint, conf));
|
||||
}
|
||||
};
|
||||
|
||||
template<typename Token>
|
||||
class
|
||||
PScoreLex : public PhraseScorer<Token>
|
||||
{
|
||||
LexicalPhraseScorer2<Token> scorer;
|
||||
public:
|
||||
|
||||
PScoreLex() { this->num_feats = 2; }
|
||||
|
||||
int
|
||||
init(int const i, string const& fname)
|
||||
{
|
||||
scorer.open(fname);
|
||||
this->index = i;
|
||||
return i + this->num_feats;
|
||||
}
|
||||
|
||||
void
|
||||
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
|
||||
{
|
||||
uint32_t sid1=0,sid2=0,off1=0,off2=0,len1=0,len2=0;
|
||||
parse_pid(pp.p1, sid1, off1, len1);
|
||||
parse_pid(pp.p2, sid2, off2, len2);
|
||||
|
||||
#if 0
|
||||
Token const* t1 = bt.T1->sntStart(sid1);
|
||||
for (size_t i = off1; i < off1 + len1; ++i)
|
||||
cout << (*bt.V1)[t1[i].id()] << " ";
|
||||
cout << __FILE__ << ":" << __LINE__ << endl;
|
||||
|
||||
Token const* t2 = bt.T2->sntStart(sid2);
|
||||
for (size_t i = off2; i < off2 + len2; ++i)
|
||||
cout << (*bt.V2)[t2[i].id()] << " ";
|
||||
cout << __FILE__ << ":" << __LINE__ << endl;
|
||||
|
||||
BOOST_FOREACH (int a, pp.aln)
|
||||
cout << a << " " ;
|
||||
cout << __FILE__ << ":" << __LINE__ << "\n" << endl;
|
||||
#endif
|
||||
scorer.score(bt.T1->sntStart(sid1)+off1,0,len1,
|
||||
bt.T2->sntStart(sid2)+off2,0,len2,
|
||||
pp.aln, pp.fvals[this->index],
|
||||
pp.fvals[this->index+1]);
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/// Word penalty
|
||||
template<typename Token>
|
||||
class
|
||||
PScoreWP : public PhraseScorer<Token>
|
||||
{
|
||||
public:
|
||||
|
||||
PScoreWP() { this->num_feats = 1; }
|
||||
|
||||
int
|
||||
init(int const i)
|
||||
{
|
||||
this->index = i;
|
||||
return i + this->num_feats;
|
||||
}
|
||||
|
||||
void
|
||||
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
|
||||
{
|
||||
uint32_t sid2=0,off2=0,len2=0;
|
||||
parse_pid(pp.p2, sid2, off2, len2);
|
||||
pp.fvals[this->index] = len2;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
/// Phrase penalty
|
||||
template<typename Token>
|
||||
class
|
||||
PScorePP : public PhraseScorer<Token>
|
||||
{
|
||||
public:
|
||||
|
||||
PScorePP() { this->num_feats = 1; }
|
||||
|
||||
int
|
||||
init(int const i)
|
||||
{
|
||||
this->index = i;
|
||||
return i + this->num_feats;
|
||||
}
|
||||
|
||||
void
|
||||
operator()(Bitext<Token> const& bt, PhrasePair& pp) const
|
||||
{
|
||||
pp.fvals[this->index] = 1;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
template<typename TKN>
|
||||
class Bitext
|
||||
{
|
||||
mutable boost::mutex lock;
|
||||
public:
|
||||
typedef TKN Token;
|
||||
typedef typename TSA<Token>::tree_iterator iter;
|
||||
|
||||
class agenda;
|
||||
// stores the list of unfinished jobs;
|
||||
// maintains a pool of workers and assigns the jobs to them
|
||||
|
||||
// to be done: work with multiple agendas for faster lookup
|
||||
// (multiplex jobs); not sure if an agenda having more than
|
||||
// four or so workers is efficient, because workers get into
|
||||
// each other's way.
|
||||
mutable sptr<agenda> ag;
|
||||
|
||||
sptr<Ttrack<char> > const Tx; // word alignments
|
||||
sptr<Ttrack<Token> > const T1; // token track
|
||||
sptr<Ttrack<Token> > const T2; // token track
|
||||
sptr<TokenIndex> const V1; // vocab
|
||||
sptr<TokenIndex> const V2; // vocab
|
||||
sptr<TSA<Token> > const I1; // indices
|
||||
sptr<TSA<Token> > const I2; // indices
|
||||
|
||||
/// given the source phrase sid[start:stop]
|
||||
// find the possible start (s1 .. s2) and end (e1 .. e2)
|
||||
// points of the target phrase; if non-NULL, store word
|
||||
// alignments in *core_alignment. If /flip/, source phrase is
|
||||
// L2.
|
||||
bool
|
||||
find_trg_phr_bounds
|
||||
(size_t const sid, size_t const start, size_t const stop,
|
||||
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
||||
vector<uchar> * core_alignment, bool const flip) const;
|
||||
|
||||
mutable boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
|
||||
private:
|
||||
size_t default_sample_size;
|
||||
sptr<pstats>
|
||||
prep2(iter const& phrase, size_t const max_sample) const;
|
||||
public:
|
||||
Bitext(Ttrack<Token>* const t1,
|
||||
Ttrack<Token>* const t2,
|
||||
Ttrack<char>* const tx,
|
||||
TokenIndex* const v1,
|
||||
TokenIndex* const v2,
|
||||
TSA<Token>* const i1,
|
||||
TSA<Token>* const i2,
|
||||
size_t const max_sample=5000);
|
||||
|
||||
virtual void open(string const base, string const L1, string const L2) = 0;
|
||||
|
||||
sptr<pstats> lookup(iter const& phrase) const;
|
||||
sptr<pstats> lookup(iter const& phrase, size_t const max_sample) const;
|
||||
void prep(iter const& phrase) const;
|
||||
void setDefaultSampleSize(size_t const max_samples);
|
||||
size_t getDefaultSampleSize() const;
|
||||
|
||||
string toString(uint64_t pid, int isL2) const;
|
||||
};
|
||||
|
||||
template<typename Token>
|
||||
string
|
||||
Bitext<Token>::
|
||||
toString(uint64_t pid, int isL2) const
|
||||
{
|
||||
ostringstream buf;
|
||||
uint32_t sid,off,len; parse_pid(pid,sid,off,len);
|
||||
Token const* t = (isL2 ? T2 : T1)->sntStart(sid) + off;
|
||||
Token const* x = t + len;
|
||||
TokenIndex const& V = isL2 ? *V2 : *V1;
|
||||
while (t < x)
|
||||
{
|
||||
buf << V[t->id()];
|
||||
if (++t < x) buf << " ";
|
||||
}
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
|
||||
|
||||
template<typename Token>
|
||||
size_t
|
||||
Bitext<Token>::
|
||||
getDefaultSampleSize() const
|
||||
{
|
||||
return default_sample_size;
|
||||
}
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>::
|
||||
setDefaultSampleSize(size_t const max_samples)
|
||||
{
|
||||
if (max_samples != default_sample_size)
|
||||
{
|
||||
cache1.clear();
|
||||
cache2.clear();
|
||||
default_sample_size = max_samples;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
Bitext<Token>::
|
||||
Bitext(Ttrack<Token>* const t1,
|
||||
Ttrack<Token>* const t2,
|
||||
Ttrack<char>* const tx,
|
||||
TokenIndex* const v1,
|
||||
TokenIndex* const v2,
|
||||
TSA<Token>* const i1,
|
||||
TSA<Token>* const i2,
|
||||
size_t const max_sample)
|
||||
: Tx(tx), T1(t1), T2(t2), V1(v1), V2(v2), I1(i1), I2(i2)
|
||||
, default_sample_size(max_sample)
|
||||
{ }
|
||||
|
||||
// agenda is a pool of jobs
|
||||
template<typename Token>
|
||||
class
|
||||
Bitext<Token>::
|
||||
agenda
|
||||
{
|
||||
boost::mutex lock;
|
||||
class job
|
||||
{
|
||||
boost::mutex lock;
|
||||
friend class agenda;
|
||||
public:
|
||||
size_t workers; // how many workers are working on this job?
|
||||
sptr<TSA<Token> const> root; // root of the underlying suffix array
|
||||
char const* next; // next position to read from
|
||||
char const* stop; // end of index range
|
||||
size_t max_samples; // how many samples to extract at most
|
||||
size_t ctr; /* # of phrase occurrences considered so far
|
||||
* # of samples chosen is stored in stats->good */
|
||||
size_t len; // phrase length
|
||||
bool fwd; // if true, source phrase is L1
|
||||
sptr<pstats> stats; // stores statistics collected during sampling
|
||||
bool step(uint64_t & sid, uint64_t & offset); // select another occurrence
|
||||
bool done() const;
|
||||
job(typename TSA<Token>::tree_iterator const& m,
|
||||
sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd);
|
||||
};
|
||||
|
||||
class
|
||||
worker
|
||||
{
|
||||
agenda& ag;
|
||||
public:
|
||||
worker(agenda& a) : ag(a) {}
|
||||
void operator()();
|
||||
};
|
||||
|
||||
list<sptr<job> > joblist;
|
||||
vector<sptr<boost::thread> > workers;
|
||||
bool shutdown;
|
||||
size_t doomed;
|
||||
public:
|
||||
Bitext<Token> const& bt;
|
||||
agenda(Bitext<Token> const& bitext);
|
||||
~agenda();
|
||||
void add_workers(int n);
|
||||
|
||||
sptr<pstats>
|
||||
add_job(typename TSA<Token>::tree_iterator const& phrase,
|
||||
size_t const max_samples);
|
||||
sptr<job> get_job();
|
||||
};
|
||||
|
||||
template<typename Token>
|
||||
bool
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
job::
|
||||
step(uint64_t & sid, uint64_t & offset)
|
||||
{
|
||||
boost::lock_guard<boost::mutex> jguard(lock);
|
||||
if ((max_samples == 0) && (next < stop))
|
||||
{
|
||||
next = root->readSid(next,stop,sid);
|
||||
next = root->readOffset(next,stop,offset);
|
||||
boost::lock_guard<boost::mutex> sguard(stats->lock);
|
||||
if (stats->raw_cnt == ctr) ++stats->raw_cnt;
|
||||
stats->sample_cnt++;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
{
|
||||
while (next < stop && stats->good < max_samples)
|
||||
{
|
||||
next = root->readSid(next,stop,sid);
|
||||
next = root->readOffset(next,stop,offset);
|
||||
{
|
||||
boost::lock_guard<boost::mutex> sguard(stats->lock);
|
||||
if (stats->raw_cnt == ctr) ++stats->raw_cnt;
|
||||
size_t rnum = randInt(stats->raw_cnt - ctr++);
|
||||
if (rnum < max_samples - stats->good)
|
||||
{
|
||||
stats->sample_cnt++;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
add_workers(int n)
|
||||
{
|
||||
static boost::posix_time::time_duration nodelay(0,0,0,0);
|
||||
boost::lock_guard<boost::mutex> guard(this->lock);
|
||||
|
||||
int target = max(1, int(n + workers.size() - this->doomed));
|
||||
// house keeping: remove all workers that have finished
|
||||
for (size_t i = 0; i < workers.size(); )
|
||||
{
|
||||
if (workers[i]->timed_join(nodelay))
|
||||
{
|
||||
if (i + 1 < workers.size())
|
||||
workers[i].swap(workers.back());
|
||||
workers.pop_back();
|
||||
}
|
||||
else ++i;
|
||||
}
|
||||
// cerr << workers.size() << "/" << target << " active" << endl;
|
||||
if (int(workers.size()) > target)
|
||||
this->doomed = workers.size() - target;
|
||||
else
|
||||
while (int(workers.size()) < target)
|
||||
{
|
||||
sptr<boost::thread> w(new boost::thread(worker(*this)));
|
||||
workers.push_back(w);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
worker::
|
||||
operator()()
|
||||
{
|
||||
size_t s1=0, s2=0, e1=0, e2=0;
|
||||
uint64_t sid=0, offset=0; // of the source phrase
|
||||
while(sptr<job> j = ag.get_job())
|
||||
{
|
||||
j->stats->register_worker();
|
||||
vector<uchar> aln;
|
||||
while (j->step(sid,offset))
|
||||
{
|
||||
aln.clear();
|
||||
if (!ag.bt.find_trg_phr_bounds
|
||||
(sid, offset, offset + j->len, s1, s2, e1, e2,
|
||||
j->fwd?&aln:NULL, !j->fwd))
|
||||
continue;
|
||||
j->stats->lock.lock();
|
||||
j->stats->good += 1;
|
||||
j->stats->sum_pairs += (s2-s1+1)*(e2-e1+1);
|
||||
j->stats->lock.unlock();
|
||||
for (size_t k = j->fwd ? 1 : 0; k < aln.size(); k += 2)
|
||||
aln[k] += s2 - s1;
|
||||
Token const* o = (j->fwd ? ag.bt.T2 : ag.bt.T1)->sntStart(sid);
|
||||
float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
|
||||
for (size_t s = s1; s <= s2; ++s)
|
||||
{
|
||||
sptr<iter> b = (j->fwd ? ag.bt.I2 : ag.bt.I1)->find(o+s,e1-s);
|
||||
if (!b || b->size() < e1 -s)
|
||||
UTIL_THROW(util::Exception, "target phrase not found");
|
||||
// assert(b);
|
||||
for (size_t i = e1; i <= e2; ++i)
|
||||
{
|
||||
|
||||
j->stats->add(b->getPid(),sample_weight,aln,b->approxOccurrenceCount());
|
||||
if (i < e2)
|
||||
{
|
||||
#ifndef NDEBUG
|
||||
bool ok = b->extend(o[i].id());
|
||||
assert(ok);
|
||||
#else
|
||||
b->extend(o[i].id());
|
||||
// cerr << "boo" << endl;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (j->fwd && s < s2)
|
||||
for (size_t k = j->fwd ? 1 : 0; k < aln.size(); k += 2)
|
||||
--aln[k];
|
||||
}
|
||||
// j->stats->lock.unlock();
|
||||
}
|
||||
j->stats->release();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
job::
|
||||
job(typename TSA<Token>::tree_iterator const& m,
|
||||
sptr<TSA<Token> > const& r, size_t maxsmpl, bool isfwd)
|
||||
: workers(0)
|
||||
, root(r)
|
||||
, next(m.lower_bound(-1))
|
||||
, stop(m.upper_bound(-1))
|
||||
, max_samples(maxsmpl)
|
||||
, ctr(0)
|
||||
, len(m.size())
|
||||
, fwd(isfwd)
|
||||
{
|
||||
stats.reset(new pstats());
|
||||
stats->raw_cnt = m.approxOccurrenceCount();
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
add_job(typename TSA<Token>::tree_iterator const& phrase,
|
||||
size_t const max_samples)
|
||||
{
|
||||
static boost::posix_time::time_duration nodelay(0,0,0,0);
|
||||
bool fwd = phrase.root == bt.I1.get();
|
||||
sptr<job> j(new job(phrase, fwd ? bt.I2 : bt.I1, max_samples, fwd));
|
||||
j->stats->register_worker();
|
||||
|
||||
boost::unique_lock<boost::mutex> lk(this->lock);
|
||||
joblist.push_back(j);
|
||||
if (joblist.size() == 1)
|
||||
{
|
||||
size_t i = 0;
|
||||
while (i < workers.size())
|
||||
{
|
||||
if (workers[i]->timed_join(nodelay))
|
||||
{
|
||||
if (doomed)
|
||||
{
|
||||
if (i+1 < workers.size())
|
||||
workers[i].swap(workers.back());
|
||||
workers.pop_back();
|
||||
--doomed;
|
||||
}
|
||||
else
|
||||
workers[i++] = sptr<boost::thread>(new boost::thread(worker(*this)));
|
||||
}
|
||||
else ++i;
|
||||
}
|
||||
}
|
||||
return j->stats;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
sptr<typename Bitext<Token>::agenda::job>
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
get_job()
|
||||
{
|
||||
// cerr << workers.size() << " workers on record" << endl;
|
||||
sptr<job> ret;
|
||||
if (this->shutdown) return ret;
|
||||
// add_workers(0);
|
||||
boost::unique_lock<boost::mutex> lock(this->lock);
|
||||
if (this->doomed)
|
||||
{
|
||||
--this->doomed;
|
||||
return ret;
|
||||
}
|
||||
typename list<sptr<job> >::iterator j = joblist.begin();
|
||||
while (j != joblist.end())
|
||||
{
|
||||
if ((*j)->done())
|
||||
{
|
||||
(*j)->stats->release();
|
||||
joblist.erase(j++);
|
||||
}
|
||||
else if ((*j)->workers >= 4)
|
||||
{
|
||||
++j;
|
||||
}
|
||||
else break;
|
||||
}
|
||||
if (joblist.size())
|
||||
{
|
||||
ret = j == joblist.end() ? joblist.front() : *j;
|
||||
boost::lock_guard<boost::mutex> jguard(ret->lock);
|
||||
++ret->workers;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
template<typename TKN>
|
||||
class mmBitext : public Bitext<TKN>
|
||||
{
|
||||
public:
|
||||
void open(string const base, string const L1, string L2);
|
||||
mmBitext();
|
||||
};
|
||||
|
||||
template<typename TKN>
|
||||
mmBitext<TKN>::
|
||||
mmBitext()
|
||||
: Bitext<TKN>(new mmTtrack<TKN>(),
|
||||
new mmTtrack<TKN>(),
|
||||
new mmTtrack<char>(),
|
||||
new TokenIndex(),
|
||||
new TokenIndex(),
|
||||
new mmTSA<TKN>(),
|
||||
new mmTSA<TKN>())
|
||||
{};
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
mmBitext<TKN>::
|
||||
open(string const base, string const L1, string L2)
|
||||
{
|
||||
mmTtrack<TKN>& t1 = *reinterpret_cast<mmTtrack<TKN>*>(this->T1.get());
|
||||
mmTtrack<TKN>& t2 = *reinterpret_cast<mmTtrack<TKN>*>(this->T2.get());
|
||||
mmTtrack<char>& tx = *reinterpret_cast<mmTtrack<char>*>(this->Tx.get());
|
||||
t1.open(base+L1+".mct");
|
||||
t2.open(base+L2+".mct");
|
||||
tx.open(base+L1+"-"+L2+".mam");
|
||||
this->V1->open(base+L1+".tdx"); this->V1->iniReverseIndex();
|
||||
this->V2->open(base+L2+".tdx"); this->V2->iniReverseIndex();
|
||||
mmTSA<TKN>& i1 = *reinterpret_cast<mmTSA<TKN>*>(this->I1.get());
|
||||
mmTSA<TKN>& i2 = *reinterpret_cast<mmTSA<TKN>*>(this->I2.get());
|
||||
i1.open(base+L1+".sfa", this->T1.get());
|
||||
i2.open(base+L2+".sfa", this->T2.get());
|
||||
assert(this->T1->size() == this->T2->size());
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
class imBitext : public Bitext<TKN>
|
||||
{
|
||||
public:
|
||||
void open(string const base, string const L1, string L2);
|
||||
imBitext();
|
||||
};
|
||||
|
||||
template<typename TKN>
|
||||
imBitext<TKN>::
|
||||
imBitext()
|
||||
: Bitext<TKN>(new imTtrack<TKN>(),
|
||||
new imTtrack<TKN>(),
|
||||
new imTtrack<char>(),
|
||||
new TokenIndex(),
|
||||
new TokenIndex(),
|
||||
new imTSA<TKN>(),
|
||||
new imTSA<TKN>())
|
||||
{}
|
||||
|
||||
|
||||
// template<typename TKN>
|
||||
// void
|
||||
// imBitext<TKN>::
|
||||
// open(string const base, string const L1, string L2)
|
||||
// {
|
||||
// mmTtrack<TKN>& t1 = *reinterpret_cast<mmTtracuk<TKN>*>(this->T1.get());
|
||||
// mmTtrack<TKN>& t2 = *reinterpret_cast<mmTtrack<TKN>*>(this->T2.get());
|
||||
// mmTtrack<char>& tx = *reinterpret_cast<mmTtrack<char>*>(this->Tx.get());
|
||||
// t1.open(base+L1+".mct");
|
||||
// t2.open(base+L2+".mct");
|
||||
// tx.open(base+L1+"-"+L2+".mam");
|
||||
// cerr << "DADA" << endl;
|
||||
// this->V1->open(base+L1+".tdx"); this->V1->iniReverseIndex();
|
||||
// this->V2->open(base+L2+".tdx"); this->V2->iniReverseIndex();
|
||||
// mmTSA<TKN>& i1 = *reinterpret_cast<mmTSA<TKN>*>(this->I1.get());
|
||||
// mmTSA<TKN>& i2 = *reinterpret_cast<mmTSA<TKN>*>(this->I2.get());
|
||||
// i1.open(base+L1+".sfa", this->T1.get());
|
||||
// i2.open(base+L2+".sfa", this->T2.get());
|
||||
// assert(this->T1->size() == this->T2->size());
|
||||
// }
|
||||
|
||||
template<typename Token>
|
||||
bool
|
||||
Bitext<Token>::
|
||||
find_trg_phr_bounds(size_t const sid, size_t const start, size_t const stop,
|
||||
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
||||
vector<uchar>* core_alignment, bool const flip) const
|
||||
{
|
||||
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
|
||||
// a word on the core_alignment:
|
||||
// since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1 < e2, respectively)
|
||||
// are be definition unaligned, we store only the core alignment in *core_alignment
|
||||
// it is up to the calling function to shift alignment points over for start positions
|
||||
// of extracted phrases that start with a fringe word
|
||||
bitvector forbidden((flip ? T1 : T2)->sntLen(sid));
|
||||
size_t src,trg;
|
||||
size_t lft = forbidden.size();
|
||||
size_t rgt = 0;
|
||||
vector<vector<ushort> > aln((*T1).sntLen(sid));
|
||||
char const* p = Tx->sntStart(sid);
|
||||
char const* x = Tx->sntEnd(sid);
|
||||
|
||||
// cerr << "flip = " << flip << " " << __FILE__ << ":" << __LINE__ << endl;
|
||||
|
||||
while (p < x)
|
||||
{
|
||||
if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
|
||||
else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
|
||||
if (src < start || src >= stop)
|
||||
forbidden.set(trg);
|
||||
else
|
||||
{
|
||||
lft = min(lft,trg);
|
||||
rgt = max(rgt,trg);
|
||||
if (core_alignment)
|
||||
{
|
||||
if (flip) aln[trg].push_back(src);
|
||||
else aln[src].push_back(trg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = lft; i <= rgt; ++i)
|
||||
if (forbidden[i])
|
||||
return false;
|
||||
|
||||
s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
|
||||
e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
|
||||
|
||||
if (lft > rgt) return false;
|
||||
if (core_alignment)
|
||||
{
|
||||
core_alignment->clear();
|
||||
if (flip)
|
||||
{
|
||||
for (size_t i = lft; i <= rgt; ++i)
|
||||
{
|
||||
sort(aln[i].begin(),aln[i].end());
|
||||
BOOST_FOREACH(ushort x, aln[i])
|
||||
{
|
||||
core_alignment->push_back(i-lft);
|
||||
core_alignment->push_back(x-start);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = start; i < stop; ++i)
|
||||
{
|
||||
BOOST_FOREACH(ushort x, aln[i])
|
||||
{
|
||||
core_alignment->push_back(i-start);
|
||||
core_alignment->push_back(x-lft);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if 0
|
||||
// if (e1 - s1 > 3)
|
||||
{
|
||||
lock_guard<mutex> guard(this->lock);
|
||||
Token const* t1 = T1->sntStart(sid);
|
||||
Token const* t2 = T2->sntStart(sid);
|
||||
cout << "[" << start << ":" << stop << "] => ["
|
||||
<< s1 << ":" << s2 << ":"
|
||||
<< e1 << ":" << e2 << "]" << endl;
|
||||
for (size_t k = start; k < stop; ++k)
|
||||
cout << k-start << "." << (*V1)[t1[k].id()] << " ";
|
||||
cout << endl;
|
||||
for (size_t k = s1; k < e2;)
|
||||
{
|
||||
if (k == s2) cout << "[";
|
||||
cout << int(k)-int(s2) << "." << (*V2)[t2[k].id()];
|
||||
if (++k == e1) cout << "] ";
|
||||
else cout << " ";
|
||||
}
|
||||
cout << endl;
|
||||
for (size_t k = 0; k < core_alignment->size(); k += 2)
|
||||
cout << int((*core_alignment)[k]) << "-" << int((*core_alignment)[k+1]) << " ";
|
||||
cout << "\n" << __FILE__ << ":" << __LINE__ << endl;
|
||||
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return lft <= rgt;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
void
|
||||
Bitext<Token>::
|
||||
prep(iter const& phrase) const
|
||||
{
|
||||
prep2(phrase, this->default_sample_size);
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>::
|
||||
prep2(iter const& phrase, size_t const max_sample) const
|
||||
{
|
||||
// boost::lock_guard<boost::mutex>(this->lock);
|
||||
if (!ag)
|
||||
{
|
||||
// boost::lock_guard<boost::mutex>(this->lock);
|
||||
if (!ag)
|
||||
{
|
||||
ag.reset(new agenda(*this));
|
||||
ag->add_workers(20);
|
||||
}
|
||||
}
|
||||
typedef boost::unordered_map<uint64_t,sptr<pstats> > pcache_t;
|
||||
sptr<pstats> ret;
|
||||
if (max_sample == this->default_sample_size)
|
||||
{
|
||||
uint64_t pid = phrase.getPid();
|
||||
pcache_t & cache(phrase.root == &(*this->I1) ? cache1 : cache2);
|
||||
pcache_t::value_type entry(pid,sptr<pstats>());
|
||||
pair<pcache_t::iterator,bool> foo;
|
||||
{
|
||||
// boost::lock_guard<boost::mutex>(this->lock);
|
||||
foo = cache.emplace(entry);
|
||||
}
|
||||
if (foo.second) foo.first->second = ag->add_job(phrase, max_sample);
|
||||
ret = foo.first->second;
|
||||
}
|
||||
else ret = ag->add_job(phrase, max_sample);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>::
|
||||
lookup(iter const& phrase) const
|
||||
{
|
||||
boost::lock_guard<boost::mutex>(this->lock);
|
||||
sptr<pstats> ret;
|
||||
ret = prep2(phrase, this->default_sample_size);
|
||||
assert(ret);
|
||||
boost::unique_lock<boost::mutex> lock(ret->lock);
|
||||
while (ret->in_progress)
|
||||
ret->ready.wait(lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
sptr<pstats>
|
||||
Bitext<Token>::
|
||||
lookup(iter const& phrase, size_t const max_sample) const
|
||||
{
|
||||
boost::lock_guard<boost::mutex>(this->lock);
|
||||
sptr<pstats> ret = prep2(phrase, max_sample);
|
||||
boost::unique_lock<boost::mutex> lock(ret->lock);
|
||||
while (ret->in_progress)
|
||||
ret->ready.wait(lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
~agenda()
|
||||
{
|
||||
this->lock.lock();
|
||||
this->shutdown = true;
|
||||
this->lock.unlock();
|
||||
for (size_t i = 0; i < workers.size(); ++i)
|
||||
workers[i]->join();
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
agenda(Bitext<Token> const& thebitext)
|
||||
: shutdown(false), doomed(0), bt(thebitext)
|
||||
{ }
|
||||
|
||||
template<typename Token>
|
||||
bool
|
||||
Bitext<Token>::
|
||||
agenda::
|
||||
job::
|
||||
done() const
|
||||
{
|
||||
return (max_samples && stats->good >= max_samples) || next == stop;
|
||||
}
|
||||
|
||||
} // end of namespace bitext
|
||||
} // end of namespace moses
|
||||
#endif
|
||||
|
57
moses/mm/ug_conll_bottom_up_token.h
Normal file
57
moses/mm/ug_conll_bottom_up_token.h
Normal file
@ -0,0 +1,57 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007-2012 Ulrich Germann
|
||||
// Token class for dependency trees, where the linear order
|
||||
// of tokens is defined as going up a dependency chain
|
||||
#ifndef __ug_conll_bottom_up_token_h
|
||||
#define __ug_conll_bottok_up_token_h
|
||||
#include "ug_typedefs.h"
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
template<typename T>
|
||||
class ConllBottomUpToken : public T
|
||||
{
|
||||
public:
|
||||
typedef T Token;
|
||||
ConllBottomUpToken() : T() {};
|
||||
ConllBottomUpToken(id_type id) : T(id) {};
|
||||
|
||||
ConllBottomUpToken const* next(int length=1) const;
|
||||
|
||||
template<typename TTRACK_TYPE>
|
||||
ConllBottomUpToken const* stop(TTRACK_TYPE const& C, id_type sid) const
|
||||
{
|
||||
return NULL;
|
||||
};
|
||||
|
||||
ConllBottomUpToken const*
|
||||
stop(ConllBottomUpToken const* seqStart,
|
||||
ConllBottomUpToken const* seqEnd) const
|
||||
{
|
||||
return NULL;
|
||||
};
|
||||
|
||||
bool operator<(T const& other) const { return this->cmp(other) < 0; }
|
||||
bool operator>(T const& other) const { return this->cmp(other) > 0; }
|
||||
bool operator==(T const& other) const { return this->cmp(other) == 0; }
|
||||
bool operator!=(T const& other) const { return this->cmp(other) != 0; }
|
||||
|
||||
bool reachable(T const* o)
|
||||
{
|
||||
for (T const* x = this; x; x = reinterpret_cast<T const*>(x->up()))
|
||||
if (x == o) return true;
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
ConllBottomUpToken<T> const*
|
||||
ConllBottomUpToken<T>::
|
||||
next(int length) const
|
||||
{
|
||||
return reinterpret_cast<ConllBottomUpToken<T> const*>(this->up(length));
|
||||
}
|
||||
|
||||
} // end of namespace ugdiss
|
||||
#endif
|
20
moses/mm/ug_conll_record.cc
Normal file
20
moses/mm/ug_conll_record.cc
Normal file
@ -0,0 +1,20 @@
|
||||
#include "ug_conll_record.h"
|
||||
namespace ugdiss
|
||||
{
|
||||
Conll_Record
|
||||
Conll_Record::
|
||||
remap(vector<id_type const*> const& m) const
|
||||
{
|
||||
Conll_Record ret;
|
||||
ret.sform = m.size() > 0 && m[0] ? m[0][this->sform] : this->sform;
|
||||
ret.lemma = m.size() > 1 && m[1] ? m[1][this->lemma] : this->lemma;
|
||||
ret.majpos = m.size() > 2 && m[2] ? m[2][this->majpos] : this->majpos;
|
||||
ret.minpos = m.size() > 2 && m[2] ? m[2][this->minpos] : this->minpos;
|
||||
ret.dtype = m.size() > 3 && m[3] ? m[3][this->dtype] : this->dtype;
|
||||
ret.info[0] = m.size() > 4 && m[4] ? m[4][this->info[0]] : this->info[0];
|
||||
ret.info[1] = m.size() > 5 && m[5] ? m[5][this->info[1]] : this->info[1];
|
||||
ret.info[2] = m.size() > 6 && m[6] ? m[6][this->info[2]] : this->info[2];
|
||||
ret.parent = this->parent;
|
||||
return ret;
|
||||
}
|
||||
}
|
70
moses/mm/ug_conll_record.h
Normal file
70
moses/mm/ug_conll_record.h
Normal file
@ -0,0 +1,70 @@
|
||||
#ifndef __ug_conll_record_h
|
||||
#define __ug_conll_record_h
|
||||
#include "ug_typedefs.h"
|
||||
// Base class for dependency tree corpora with POS and Lemma annotations
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
class
|
||||
Conll_Record
|
||||
{
|
||||
public:
|
||||
id_type sform; // surface form
|
||||
id_type lemma; // lemma
|
||||
uchar majpos; // major part of speech
|
||||
uchar minpos; // minor part of speech
|
||||
short parent; // id of parent
|
||||
uchar dtype; // dependency type
|
||||
uchar info[3]; /* additional information (depends on the part of speech)
|
||||
* a place holder for the time being, to ensure proper
|
||||
* alignment in memory */
|
||||
Conll_Record();
|
||||
Conll_Record const* up(int length=1) const;
|
||||
|
||||
Conll_Record& operator=(Conll_Record const& other);
|
||||
|
||||
bool isDescendentOf(Conll_Record const* other) const;
|
||||
|
||||
// virtual bool operator==(Conll_Record const& other) const;
|
||||
// virtual bool operator<(Conll_Record const& other) const;
|
||||
Conll_Record remap(vector<id_type const*> const& m) const;
|
||||
|
||||
#if 0
|
||||
/** constructor for conversion from CONLL-stype text format
|
||||
* @parameter SF Vocabulary for surface form
|
||||
* @parameter LM Vocabulary for lemma
|
||||
* @parameter PS Vocabulary for part-of-speech
|
||||
* @parameter DT Vocabulary for dependency type
|
||||
*/
|
||||
Conll_Record(string const& line,
|
||||
TokenIndex const& SF, TokenIndex const& LM,
|
||||
TokenIndex const& PS, TokenIndex const& DT);
|
||||
|
||||
/** store the record as-is to disk (for memory-mapped reading later) */
|
||||
void store(ostream& out);
|
||||
#endif
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
T const* as(Conll_Record const* p)
|
||||
{
|
||||
return reinterpret_cast<T const*>(p);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T const* up(T const* p,int length=1)
|
||||
{
|
||||
return as<T>(p->up(length));
|
||||
}
|
||||
|
||||
// this is for contigous word sequences extracted from longer sequences
|
||||
// adjust parent pointers to 0 (no parent) if they point out of the
|
||||
// subsequence
|
||||
void
|
||||
fixParse(Conll_Record* start, Conll_Record* stop);
|
||||
|
||||
} // end of namespace ugdiss
|
||||
|
||||
#endif
|
44
moses/mm/ug_corpus_token.cc
Normal file
44
moses/mm/ug_corpus_token.cc
Normal file
@ -0,0 +1,44 @@
|
||||
#include "ug_corpus_token.h"
|
||||
// Simple wrapper around integer IDs for use with the Ctrack and TSA template classes.
|
||||
// (c) 2007-2009 Ulrich Germann
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
id_type const&
|
||||
SimpleWordId::
|
||||
id() const
|
||||
{
|
||||
return theID;
|
||||
}
|
||||
|
||||
int
|
||||
SimpleWordId::
|
||||
cmp(SimpleWordId const& other) const
|
||||
{
|
||||
return (theID < other.theID ? -1
|
||||
: theID == other.theID ? 0
|
||||
: 1);
|
||||
}
|
||||
|
||||
SimpleWordId::
|
||||
SimpleWordId(id_type const& id)
|
||||
{
|
||||
theID = id;
|
||||
}
|
||||
|
||||
bool
|
||||
SimpleWordId::
|
||||
operator==(SimpleWordId const& other) const
|
||||
{
|
||||
return theID == other.theID;
|
||||
}
|
||||
|
||||
id_type
|
||||
SimpleWordId::
|
||||
remap(vector<id_type const*> const& m) const
|
||||
{
|
||||
if (!m[0]) return theID;
|
||||
return m[0][theID];
|
||||
}
|
||||
|
||||
}
|
97
moses/mm/ug_corpus_token.h
Normal file
97
moses/mm/ug_corpus_token.h
Normal file
@ -0,0 +1,97 @@
|
||||
// -*- c++ -*-
|
||||
// This code is part of the re-factorization of the earlier non-template implementation of "corpus tracks"
|
||||
// and suffix and prefix arrays over them as template classes.
|
||||
// (c) 2007-2009 Ulrich Germann
|
||||
|
||||
#ifndef __ug_corpus_token_h
|
||||
#define __ug_corpus_token_h
|
||||
|
||||
// This file defines a few simple token classes for use with the Ttrack/TSA template classes
|
||||
// - SimpleWordId is a simple wrapper around an integer ID
|
||||
// - L2R_Token defines next() for building suffix arrays
|
||||
// - R2L_Token defines next() for building prefix arrays
|
||||
|
||||
|
||||
#include "tpt_typedefs.h"
|
||||
#include "ug_ttrack_base.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
/** Simple wrapper around id_type for use with the Ttrack/TSA template classes */
|
||||
|
||||
class SimpleWordId
|
||||
{
|
||||
id_type theID;
|
||||
public:
|
||||
SimpleWordId(id_type const& id);
|
||||
id_type const& id() const;
|
||||
int cmp(SimpleWordId const& other) const;
|
||||
bool operator==(SimpleWordId const& other) const;
|
||||
id_type remap(vector<id_type const*> const& m) const;
|
||||
};
|
||||
|
||||
/** Token class for suffix arrays */
|
||||
template<typename T>
|
||||
class
|
||||
L2R_Token : public T
|
||||
{
|
||||
public:
|
||||
typedef T Token;
|
||||
|
||||
L2R_Token() : T() {};
|
||||
L2R_Token(id_type id) : T(id) {};
|
||||
|
||||
L2R_Token const* next(int n=1) const { return this+n; }
|
||||
|
||||
/** return a pointer to the end of a sentence; used as a stopping criterion during
|
||||
* comparison of suffixes; see Ttrack::cmp() */
|
||||
template<typename TTRACK_TYPE>
|
||||
L2R_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
|
||||
{
|
||||
return reinterpret_cast<L2R_Token<T> const*>(C.sntEnd(sid));
|
||||
}
|
||||
|
||||
L2R_Token const* stop(L2R_Token const* seqStart, L2R_Token const* seqEnd) const
|
||||
{
|
||||
return seqEnd;
|
||||
}
|
||||
|
||||
bool operator<(T const& other) const { return this->cmp(other) < 0; }
|
||||
bool operator>(T const& other) const { return this->cmp(other) > 0; }
|
||||
bool operator==(T const& other) const { return this->cmp(other) == 0; }
|
||||
bool operator!=(T const& other) const { return this->cmp(other) != 0; }
|
||||
};
|
||||
|
||||
/** Token class for prefix arrays */
|
||||
template<typename T>
|
||||
class
|
||||
R2L_Token : public T
|
||||
{
|
||||
public:
|
||||
typedef T Token;
|
||||
|
||||
R2L_Token() : T() {};
|
||||
R2L_Token(id_type id) : T(id) {};
|
||||
|
||||
R2L_Token const* next(int n = 1) const { return this - n; }
|
||||
|
||||
template<typename TTRACK_TYPE>
|
||||
R2L_Token const* stop(TTRACK_TYPE const& C, id_type sid) const
|
||||
{
|
||||
return reinterpret_cast<R2L_Token<T> const*>(C.sntStart(sid) - 1);
|
||||
}
|
||||
|
||||
R2L_Token const* stop(R2L_Token const* seqStart, R2L_Token const* seqEnd) const
|
||||
{
|
||||
assert(seqStart);
|
||||
return seqStart - 1;
|
||||
}
|
||||
|
||||
bool operator<(T const& other) const { return this->cmp(other) < 0; }
|
||||
bool operator>(T const& other) const { return this->cmp(other) > 0; }
|
||||
bool operator==(T const& other) const { return this->cmp(other) == 0; }
|
||||
bool operator!=(T const& other) const { return this->cmp(other) != 0; }
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
323
moses/mm/ug_deptree.cc
Normal file
323
moses/mm/ug_deptree.cc
Normal file
@ -0,0 +1,323 @@
|
||||
#include <sstream>
|
||||
|
||||
#include "ug_deptree.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
|
||||
using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
bool
|
||||
Conll_Record::
|
||||
isDescendentOf(Conll_Record const* other) const
|
||||
{
|
||||
Conll_Record const* a = this;
|
||||
while (a != other && a->parent)
|
||||
a += a->parent;
|
||||
return a==other;
|
||||
}
|
||||
|
||||
Conll_Record&
|
||||
Conll_Record::
|
||||
operator=(Conll_Record const& o)
|
||||
{
|
||||
sform = o.sform;
|
||||
lemma = o.lemma;
|
||||
majpos = o.majpos;
|
||||
minpos = o.minpos;
|
||||
parent = o.parent;
|
||||
dtype = o.dtype;
|
||||
info[0] = o.info[0];
|
||||
info[1] = o.info[1];
|
||||
info[2] = o.info[2];
|
||||
return *this;
|
||||
}
|
||||
|
||||
Conll_Record::
|
||||
Conll_Record()
|
||||
: sform(0),lemma(0),majpos(0),minpos(0),parent(0),dtype(0)
|
||||
{
|
||||
info[0]=0;
|
||||
info[1]=0;
|
||||
info[2]=0;
|
||||
}
|
||||
|
||||
Conll_AllFields::
|
||||
Conll_AllFields()
|
||||
: Conll_Record::Conll_Record()
|
||||
{};
|
||||
|
||||
int
|
||||
Conll_AllFields::
|
||||
cmp(Conll_Record const& other) const
|
||||
{
|
||||
if (sform != other.sform) return sform < other.sform ? -1 : 1;
|
||||
if (lemma != other.lemma) return lemma < other.lemma ? -1 : 1;
|
||||
if (majpos != other.majpos) return majpos < other.majpos ? -1 : 1;
|
||||
if (minpos != other.minpos) return minpos < other.minpos ? -1 : 1;
|
||||
if (dtype != other.dtype) return dtype < other.dtype ? -1 : 1;
|
||||
if (info[0] != other.info[0]) return info[0] < other.info[0] ? -1 : 1;
|
||||
if (info[1] != other.info[1]) return info[1] < other.info[1] ? -1 : 1;
|
||||
if (info[2] != other.info[2]) return info[2] < other.info[2] ? -1 : 1;
|
||||
if (parent != other.parent) return parent < other.parent ? -1 : 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
Conll_WildCard::
|
||||
Conll_WildCard()
|
||||
: Conll_Record::Conll_Record()
|
||||
{};
|
||||
|
||||
int
|
||||
Conll_WildCard::
|
||||
cmp(Conll_Record const& other) const
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
#if 1
|
||||
bool
|
||||
Conll_AllFields::
|
||||
operator==(Conll_AllFields const& other) const
|
||||
{
|
||||
return (sform == other.sform
|
||||
&& lemma == other.lemma
|
||||
&& majpos == other.majpos
|
||||
&& minpos == other.minpos
|
||||
&& parent == other.parent
|
||||
&& dtype == other.dtype
|
||||
&& info[0] == other.info[0]
|
||||
&& info[1] == other.info[1]
|
||||
&& info[2] == other.info[2]
|
||||
);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
Conll_Record::
|
||||
Conll_Record(string const& line,
|
||||
TokenIndex const& SF, TokenIndex const& LM,
|
||||
TokenIndex const& PS, TokenIndex const& DT)
|
||||
{
|
||||
|
||||
string surf,lem,pos1,pos2,dummy,drel;
|
||||
short id,gov;
|
||||
istringstream buf(line);
|
||||
|
||||
buf >> id >> surf >> lem >> pos1 >> pos2 >> dummy >> gov >> drel;
|
||||
|
||||
sform = SF[surf];
|
||||
lemma = LM[lem];
|
||||
if (PS[pos1] > 255 || PS[pos2] > 255 || DT[drel] > 255)
|
||||
{
|
||||
cerr << "error at this line:\n" << line << endl;
|
||||
exit(1);
|
||||
}
|
||||
majpos = rangeCheck(PS[pos1],256);
|
||||
minpos = rangeCheck(PS[pos2],256);
|
||||
dtype = rangeCheck(DT[drel],256);
|
||||
parent = gov ? gov-id : 0;
|
||||
info[0]=info[1]=info[2]=0;
|
||||
}
|
||||
void
|
||||
Conll_Record::
|
||||
store(ostream& out)
|
||||
{
|
||||
out.write(reinterpret_cast<char const*>(this),sizeof(*this));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
Conll_Record const*
|
||||
Conll_Record::up(int length) const
|
||||
{
|
||||
Conll_Record const* ret = this;
|
||||
while (length-- > 0)
|
||||
if (!ret->parent) return NULL;
|
||||
else ret += ret->parent;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
Conll_Sform::
|
||||
Conll_Sform()
|
||||
: Conll_Record::Conll_Record()
|
||||
{};
|
||||
|
||||
Conll_MinPos::
|
||||
Conll_MinPos()
|
||||
: Conll_Record::Conll_Record()
|
||||
{};
|
||||
|
||||
Conll_MinPos_Lemma::
|
||||
Conll_MinPos_Lemma()
|
||||
: Conll_Record::Conll_Record()
|
||||
{};
|
||||
|
||||
Conll_Lemma::
|
||||
Conll_Lemma()
|
||||
: Conll_Record::Conll_Record()
|
||||
{};
|
||||
|
||||
Conll_Lemma::
|
||||
Conll_Lemma(id_type _id)
|
||||
: Conll_Record::Conll_Record()
|
||||
{
|
||||
this->lemma = _id;
|
||||
};
|
||||
|
||||
Conll_MinPos::
|
||||
Conll_MinPos(id_type _id)
|
||||
: Conll_Record::Conll_Record()
|
||||
{
|
||||
this->minpos = _id;
|
||||
};
|
||||
|
||||
id_type
|
||||
Conll_MinPos::
|
||||
id() const
|
||||
{
|
||||
return this->minpos;
|
||||
}
|
||||
|
||||
Conll_MajPos::
|
||||
Conll_MajPos(id_type _id)
|
||||
: Conll_Record::Conll_Record()
|
||||
{
|
||||
this->majpos = _id;
|
||||
};
|
||||
|
||||
id_type
|
||||
Conll_MajPos::
|
||||
id() const
|
||||
{
|
||||
return this->majpos;
|
||||
}
|
||||
|
||||
id_type
|
||||
Conll_MinPos_Lemma::
|
||||
id() const
|
||||
{
|
||||
return this->minpos;
|
||||
}
|
||||
|
||||
int
|
||||
Conll_MajPos::
|
||||
cmp(Conll_Record const& other) const
|
||||
{
|
||||
return this->majpos < other.majpos ? -1 : this->majpos > other.majpos ? 1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
Conll_MinPos::
|
||||
cmp(Conll_Record const& other) const
|
||||
{
|
||||
return this->minpos < other.minpos ? -1 : this->minpos > other.minpos ? 1 : 0;
|
||||
}
|
||||
|
||||
int
|
||||
Conll_MinPos_Lemma::
|
||||
cmp(Conll_Record const& other) const
|
||||
{
|
||||
if (this->minpos != 0 && other.minpos != 0 && this->minpos != other.minpos)
|
||||
return this->minpos < other.minpos ? -1 : 1;
|
||||
if (this->lemma != 0 && other.lemma != 0 && this->lemma != other.lemma)
|
||||
return this->lemma < other.lemma ? -1 : 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
id_type
|
||||
Conll_Lemma::
|
||||
id() const
|
||||
{
|
||||
return this->lemma;
|
||||
}
|
||||
|
||||
int
|
||||
Conll_Lemma::
|
||||
cmp(Conll_Record const& other) const
|
||||
{
|
||||
#if 0
|
||||
for (Conll_Record const* x = this; x; x = x->parent ? x+x->parent : NULL)
|
||||
cout << (x!=this?".":"") << x->lemma;
|
||||
cout << " <=> ";
|
||||
for (Conll_Record const* x = &other; x; x = x->parent ? x+x->parent : NULL)
|
||||
cout << (x!=&other?".":"") << x->lemma;
|
||||
cout << (this->lemma < other.lemma ? -1 : this->lemma > other.lemma ? 1 : 0);
|
||||
cout << endl;
|
||||
#endif
|
||||
return this->lemma < other.lemma ? -1 : this->lemma > other.lemma ? 1 : 0;
|
||||
}
|
||||
|
||||
Conll_Sform::
|
||||
Conll_Sform(id_type _id)
|
||||
: Conll_Record::Conll_Record()
|
||||
{
|
||||
this->sform = _id;
|
||||
};
|
||||
|
||||
id_type
|
||||
Conll_Sform
|
||||
::id() const
|
||||
{
|
||||
return this->sform;
|
||||
}
|
||||
|
||||
int
|
||||
Conll_Sform::
|
||||
cmp(Conll_Record const& other) const
|
||||
{
|
||||
return this->sform < other.sform ? -1 : this->sform > other.sform ? 1 : 0;
|
||||
}
|
||||
|
||||
#if 0
|
||||
dpSnt::
|
||||
dpSnt(Conll_Record const* first, Conll_Record const* last)
|
||||
{
|
||||
w.reserve(last-first);
|
||||
for (Conll_Record const* x = first; x < last; ++x)
|
||||
w.push_back(DTNode(x));
|
||||
for (size_t i = 0; i < w.size(); i++)
|
||||
{
|
||||
short p = w[i].rec->parent;
|
||||
if (p != 0)
|
||||
{
|
||||
if (p > 0) assert(i+p < w.size());
|
||||
else assert(i >= size_t(-p));
|
||||
w[i].parent = &(w[i+p]);
|
||||
w[i].parent->children.push_back(&(w[i]));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
/** @return true if the linear sequence of /Conll_Record/s is coherent,
|
||||
* i.e., a proper connected tree structure */
|
||||
bool
|
||||
isCoherent(Conll_Record const* const start, Conll_Record const* const stop)
|
||||
{
|
||||
int outOfRange=0;
|
||||
for (Conll_Record const* x = start; outOfRange <= 1 && x < stop; ++x)
|
||||
{
|
||||
Conll_Record const* n = x->up();
|
||||
if (!n || n < start || n >= stop)
|
||||
outOfRange++;
|
||||
}
|
||||
return outOfRange<=1;
|
||||
}
|
||||
|
||||
// this is for contigous word sequences extracted from longer sequences
|
||||
// adjust parent pointers to 0 (no parent) if they point out of the
|
||||
// subsequence
|
||||
void
|
||||
fixParse(Conll_Record* start, Conll_Record* stop)
|
||||
{
|
||||
int len = stop-start;
|
||||
int i = 0;
|
||||
for (Conll_Record* x = start; x < stop; ++x,++i)
|
||||
{
|
||||
int p = i+x->parent;
|
||||
if (p < 0 || p >= len) x->parent = 0;
|
||||
}
|
||||
}
|
||||
}
|
217
moses/mm/ug_deptree.h
Normal file
217
moses/mm/ug_deptree.h
Normal file
@ -0,0 +1,217 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007-2012 Ulrich Germann
|
||||
// Stuff related to dependency trees
|
||||
|
||||
#ifndef __ug_deptree_h
|
||||
#define __ug_deptree_h
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_ttrack_base.h"
|
||||
|
||||
#include "ug_conll_record.h"
|
||||
#include "ug_conll_bottom_up_token.h"
|
||||
#include "ug_typedefs.h"
|
||||
|
||||
using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
// Fills the vector v with pointers to the internal root r_x for the
|
||||
// stretch [start,x] for all x: start <= x < stop. If the stretch
|
||||
// is incoherent, r_x is NULL
|
||||
template<typename T>
|
||||
void
|
||||
fill_L2R_roots(T const* start,T const* stop, vector<T const*>& v)
|
||||
{
|
||||
assert(stop>start);
|
||||
v.resize(stop-start);
|
||||
v[0] = start;
|
||||
bitvector isR(v.size());
|
||||
vector<T const*> root(v.size());
|
||||
isR.set(0);
|
||||
root[0] = start+start->parent;
|
||||
for (T const* x = start+1; x < stop; ++x)
|
||||
{
|
||||
size_t p = x-start;
|
||||
root[p] = x+x->parent;
|
||||
for (size_t i = isR.find_first(); i < isR.size(); i = isR.find_next(i))
|
||||
if (root[i]==x)
|
||||
isR.reset(i);
|
||||
if (root[p] < start || root[p] >= stop)
|
||||
isR.set(x-start);
|
||||
v[p] = (isR.count()==1) ? start+isR.find_first() : NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// return the root of the tree if the span [start,stop) constitutes a
|
||||
// tree, NULL otherwise
|
||||
template<typename T>
|
||||
T const*
|
||||
findInternalRoot(T const* start, T const* stop)
|
||||
{
|
||||
int outOfRange=0;
|
||||
T const* root = NULL;
|
||||
for (T const* t = start; t < stop && outOfRange <= 1; t++)
|
||||
{
|
||||
T const* n = reinterpret_cast<T const*>(t->up());
|
||||
if (!n || n < start || n >=stop)
|
||||
{
|
||||
outOfRange++;
|
||||
root = t;
|
||||
}
|
||||
}
|
||||
assert(outOfRange);
|
||||
return outOfRange == 1 ? root : NULL;
|
||||
}
|
||||
|
||||
// return the governor of the tree given by [start,stop) if the span
|
||||
// constitutes a tree, NULL otherwise
|
||||
template<typename T>
|
||||
T const*
|
||||
findExternalRoot(T const* start, T const* stop)
|
||||
{
|
||||
int numRoots=0;
|
||||
T const* root = NULL;
|
||||
for (T const* t = start; t < stop && numRoots <= 1; t++)
|
||||
{
|
||||
T const* n = reinterpret_cast<T const*>(t->up());
|
||||
if (!n || n < start || n >=stop)
|
||||
{
|
||||
if (root && n != root)
|
||||
numRoots++;
|
||||
else
|
||||
{
|
||||
root = n;
|
||||
if (!numRoots) numRoots++;
|
||||
}
|
||||
}
|
||||
}
|
||||
assert(numRoots);
|
||||
return numRoots == 1 ? root : NULL;
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
T const*
|
||||
findInternalRoot(vector<T> const& v)
|
||||
{
|
||||
T const* a = as<T>(&(*v.begin()));
|
||||
T const* b = as<T>(&(*v.end()));
|
||||
return (a==b) ? NULL : findInternalRoot<T>(a,b);
|
||||
}
|
||||
|
||||
#if 1
|
||||
class DTNode
|
||||
{
|
||||
public:
|
||||
Conll_Record const* rec; // pointer to the record (see below) for this node
|
||||
DTNode* parent; // pointer to my parent
|
||||
vector<DTNode*> children; // children (in the order they appear in the sentence)
|
||||
DTNode(Conll_Record const* p);
|
||||
};
|
||||
|
||||
/** A parsed sentence */
|
||||
class
|
||||
DependencyTree
|
||||
{
|
||||
public:
|
||||
vector<DTNode> w;
|
||||
DependencyTree(Conll_Record const* first, Conll_Record const* last);
|
||||
};
|
||||
#endif
|
||||
|
||||
class
|
||||
Conll_Lemma : public Conll_Record
|
||||
{
|
||||
public:
|
||||
Conll_Lemma();
|
||||
Conll_Lemma(id_type _id);
|
||||
id_type id() const;
|
||||
int cmp(Conll_Record const& other) const;
|
||||
};
|
||||
|
||||
class
|
||||
Conll_Sform : public Conll_Record
|
||||
{
|
||||
public:
|
||||
Conll_Sform();
|
||||
Conll_Sform(id_type _id);
|
||||
id_type id() const;
|
||||
int cmp(Conll_Record const& other) const;
|
||||
};
|
||||
|
||||
class
|
||||
Conll_MajPos : public Conll_Record
|
||||
{
|
||||
public:
|
||||
Conll_MajPos();
|
||||
Conll_MajPos(id_type _id);
|
||||
id_type id() const;
|
||||
int cmp(Conll_Record const& other) const;
|
||||
};
|
||||
|
||||
|
||||
class
|
||||
Conll_MinPos : public Conll_Record
|
||||
{
|
||||
public:
|
||||
Conll_MinPos();
|
||||
Conll_MinPos(id_type _id);
|
||||
id_type id() const;
|
||||
int cmp(Conll_Record const& other) const;
|
||||
};
|
||||
|
||||
class
|
||||
Conll_MinPos_Lemma : public Conll_Record
|
||||
{
|
||||
public:
|
||||
Conll_MinPos_Lemma();
|
||||
id_type id() const;
|
||||
int cmp(Conll_Record const& other) const;
|
||||
};
|
||||
|
||||
class
|
||||
Conll_AllFields : public Conll_Record
|
||||
{
|
||||
public:
|
||||
Conll_AllFields();
|
||||
int cmp(Conll_Record const& other) const;
|
||||
bool operator==(Conll_AllFields const& other) const;
|
||||
};
|
||||
|
||||
class
|
||||
Conll_WildCard : public Conll_Record
|
||||
{
|
||||
public:
|
||||
Conll_WildCard();
|
||||
int cmp(Conll_Record const& other) const;
|
||||
};
|
||||
|
||||
/** @return true if the linear sequence of /Conll_Record/s is coherent,
|
||||
* i.e., a proper connected tree structure */
|
||||
bool
|
||||
isCoherent(Conll_Record const* start, Conll_Record const* const stop);
|
||||
|
||||
|
||||
/** @return the root node of the tree covering the span [start,stop), if the span is coherent;
|
||||
* NULL otherwise */
|
||||
template<typename T>
|
||||
T const* topNode(T const* start , T const* stop)
|
||||
{
|
||||
T const* ret = NULL;
|
||||
for (T const* x = start; x < stop; ++x)
|
||||
{
|
||||
T const* n = reinterpret_cast<T const*>(x->up());
|
||||
if (!n || n < start || n >= stop)
|
||||
{
|
||||
if (ret) return NULL;
|
||||
else ret = x;
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
330
moses/mm/ug_im_tsa.h
Normal file
330
moses/mm/ug_im_tsa.h
Normal file
@ -0,0 +1,330 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007-2009 Ulrich Germann. All rights reserved.
|
||||
#ifndef _ug_im_tsa_h
|
||||
#define _ug_im_tsa_h
|
||||
|
||||
// TO DO:
|
||||
// - multi-threaded sorting during TSA construction (currently painfully slow!)
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
#include "tpt_tightindex.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_tsa_base.h"
|
||||
#include "tpt_pickler.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
//-----------------------------------------------------------------------
|
||||
template<typename TOKEN>
|
||||
class imTSA : public TSA<TOKEN>
|
||||
{
|
||||
typedef typename Ttrack<TOKEN>::Position cpos;
|
||||
public:
|
||||
class tree_iterator;
|
||||
friend class tree_iterator;
|
||||
|
||||
private:
|
||||
vector<cpos> sufa; // stores the actual array
|
||||
vector<filepos_type> index; /* top-level index into regions in sufa
|
||||
* (for faster access) */
|
||||
|
||||
private:
|
||||
char const*
|
||||
index_jump(char const* a, char const* z, float ratio) const;
|
||||
|
||||
char const*
|
||||
getLowerBound(id_type id) const;
|
||||
|
||||
char const*
|
||||
getUpperBound(id_type id) const;
|
||||
|
||||
public:
|
||||
imTSA();
|
||||
imTSA(Ttrack<TOKEN> const* c, bdBitset const& filt, ostream* log = NULL);
|
||||
|
||||
count_type
|
||||
sntCnt(char const* p, char const * const q) const;
|
||||
|
||||
count_type
|
||||
rawCnt(char const* p, char const * const q) const;
|
||||
|
||||
void
|
||||
getCounts(char const* p, char const * const q,
|
||||
count_type& sids, count_type& raw) const;
|
||||
|
||||
char const*
|
||||
readSid(char const* p, char const* q, id_type& sid) const;
|
||||
|
||||
char const*
|
||||
readSid(char const* p, char const* q, uint64_t& sid) const;
|
||||
|
||||
char const*
|
||||
readOffset(char const* p, char const* q, uint16_t& offset) const;
|
||||
|
||||
char const*
|
||||
readOffset(char const* p, char const* q, uint64_t& offset) const;
|
||||
|
||||
void
|
||||
sanityCheck() const;
|
||||
|
||||
void
|
||||
save_as_mm_tsa(string fname) const;
|
||||
|
||||
};
|
||||
|
||||
template<typename TOKEN>
|
||||
class
|
||||
imTSA<TOKEN>::
|
||||
tree_iterator : public TSA<TOKEN>::tree_iterator
|
||||
{
|
||||
public:
|
||||
tree_iterator(imTSA<TOKEN> const* s);
|
||||
};
|
||||
|
||||
template<typename TOKEN>
|
||||
imTSA<TOKEN>::
|
||||
tree_iterator::
|
||||
tree_iterator(imTSA<TOKEN> const* s)
|
||||
: TSA<TOKEN>::tree_iterator::tree_iterator(reinterpret_cast<TSA<TOKEN> const*>(s))
|
||||
{};
|
||||
|
||||
/** jump to the point 1/ratio in a tightly packed index
|
||||
* assumes that keys are flagged with '1', values with '0'
|
||||
*/
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
imTSA<TOKEN>::
|
||||
index_jump(char const* a, char const* z, float ratio) const
|
||||
{
|
||||
typedef cpos cpos;
|
||||
assert(ratio >= 0 && ratio < 1);
|
||||
cpos const* xa = reinterpret_cast<cpos const*>(a);
|
||||
cpos const* xz = reinterpret_cast<cpos const*>(z);
|
||||
return reinterpret_cast<char const*>(xa+int(ratio*(xz-xa)));
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
imTSA<TOKEN>::
|
||||
imTSA()
|
||||
{
|
||||
this->corpus = NULL;
|
||||
this->indexSize = 0;
|
||||
this->data = NULL;
|
||||
this->startArray = NULL;
|
||||
this->endArray = NULL;
|
||||
this->corpusSize=0;
|
||||
this->BitSetCachingThreshold=4096;
|
||||
};
|
||||
|
||||
// build an array from all the tokens in the sentences in *c that are
|
||||
// specified in filter
|
||||
template<typename TOKEN>
|
||||
imTSA<TOKEN>::
|
||||
imTSA(Ttrack<TOKEN> const* c, bdBitset const& filter, ostream* log)
|
||||
{
|
||||
assert(c);
|
||||
this->corpus = c;
|
||||
|
||||
// In the first iteration over the corpus, we obtain word counts.
|
||||
// They allows us to
|
||||
// a. allocate the exact amount of memory we need
|
||||
// b. place tokens into the right 'section' in the array, based on
|
||||
// the ID of the first token in the sequence. We can then sort
|
||||
// each section separately.
|
||||
|
||||
if (log) *log << "counting tokens ... ";
|
||||
int slimit = 65536;
|
||||
// slimit=65536 is the upper bound of what we can fit into a ushort which
|
||||
// we currently use for the offset. Actually, due to (memory) word
|
||||
// alignment in the memory, using a ushort instead of a uint32_t might not
|
||||
// even make a difference.
|
||||
|
||||
vector<count_type> wcnt; // word counts
|
||||
sufa.resize(c->count_tokens(wcnt,filter,slimit,log));
|
||||
|
||||
if (log) *log << sufa.size() << "." << endl;
|
||||
// exit(1);
|
||||
// we use a second vector that keeps track for each ID of the current insertion
|
||||
// position in the array
|
||||
vector<count_type> tmp(wcnt.size(),0);
|
||||
for (size_t i = 1; i < wcnt.size(); ++i)
|
||||
tmp[i] = tmp[i-1] + wcnt[i-1];
|
||||
|
||||
// Now dump all token positions into the right place in sufa
|
||||
this->corpusSize = 0;
|
||||
for (id_type sid = filter.find_first();
|
||||
sid < filter.size();
|
||||
sid = filter.find_next(sid))
|
||||
{
|
||||
TOKEN const* k = c->sntStart(sid);
|
||||
TOKEN const* const stop = c->sntEnd(sid);
|
||||
if (stop - k >= slimit) continue;
|
||||
this->corpusSize++;
|
||||
for (ushort p=0; k < stop; ++p,++k)
|
||||
{
|
||||
id_type wid = k->id();
|
||||
cpos& cpos = sufa[tmp[wid]++];
|
||||
cpos.sid = sid;
|
||||
cpos.offset = p;
|
||||
assert(p < c->sntLen(sid));
|
||||
}
|
||||
}
|
||||
|
||||
// Now sort the array
|
||||
if (log) *log << "sorting ...." << endl;
|
||||
index.resize(wcnt.size()+1,0);
|
||||
typename ttrack::Position::LESS<Ttrack<TOKEN> > sorter(c);
|
||||
for (size_t i = 0; i < wcnt.size(); i++)
|
||||
{
|
||||
if (log && wcnt[i] > 5000)
|
||||
*log << "sorting " << wcnt[i]
|
||||
<< " entries starting with id " << i << "." << endl;
|
||||
index[i+1] = index[i]+wcnt[i];
|
||||
assert(index[i+1]==tmp[i]); // sanity check
|
||||
if (wcnt[i]>1)
|
||||
sort(sufa.begin()+index[i],sufa.begin()+index[i+1],sorter);
|
||||
}
|
||||
this->startArray = reinterpret_cast<char const*>(&(*sufa.begin()));
|
||||
this->endArray = reinterpret_cast<char const*>(&(*sufa.end()));
|
||||
this->numTokens = sufa.size();
|
||||
this->indexSize = this->index.size();
|
||||
#if 1
|
||||
// Sanity check during code development. Can be removed once the thing is stable.
|
||||
typename vector<cpos>::iterator m = sufa.begin();
|
||||
for (size_t i = 0; i < wcnt.size(); i++)
|
||||
{
|
||||
for (size_t k = 0; k < wcnt[i]; ++k,++m)
|
||||
{
|
||||
assert(c->getToken(*m)->id()==i);
|
||||
assert(m->offset < c->sntLen(m->sid));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
} // end of imTSA constructor (corpus,filter,quiet)
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
imTSA<TOKEN>::
|
||||
getLowerBound(id_type id) const
|
||||
{
|
||||
if (id >= this->index.size())
|
||||
return NULL;
|
||||
return reinterpret_cast<char const*>(&(this->sufa[index[id]]));
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
imTSA<TOKEN>::
|
||||
getUpperBound(id_type id) const
|
||||
{
|
||||
if (id+1 >= this->index.size())
|
||||
return NULL;
|
||||
return reinterpret_cast<char const*>(&(this->sufa[index[id+1]]));
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
imTSA<TOKEN>::
|
||||
readSid(char const* p, char const* q, id_type& sid) const
|
||||
{
|
||||
sid = reinterpret_cast<cpos const*>(p)->sid;
|
||||
return p;
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
imTSA<TOKEN>::
|
||||
readSid(char const* p, char const* q, uint64_t& sid) const
|
||||
{
|
||||
sid = reinterpret_cast<cpos const*>(p)->sid;
|
||||
return p;
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
imTSA<TOKEN>::
|
||||
readOffset(char const* p, char const* q, uint16_t& offset) const
|
||||
{
|
||||
offset = reinterpret_cast<cpos const*>(p)->offset;
|
||||
return p+sizeof(cpos);
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
imTSA<TOKEN>::
|
||||
readOffset(char const* p, char const* q, uint64_t& offset) const
|
||||
{
|
||||
offset = reinterpret_cast<cpos const*>(p)->offset;
|
||||
return p+sizeof(cpos);
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
count_type
|
||||
imTSA<TOKEN>::
|
||||
rawCnt(char const* p, char const* const q) const
|
||||
{
|
||||
cpos const* xp = reinterpret_cast<cpos const*>(p);
|
||||
cpos const* xq = reinterpret_cast<cpos const*>(q);
|
||||
return xq-xp;
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
void
|
||||
imTSA<TOKEN>::
|
||||
getCounts(char const* p, char const* const q,
|
||||
count_type& sids, count_type& raw) const
|
||||
{
|
||||
id_type sid; uint16_t off;
|
||||
bdBitset check(this->corpus->size());
|
||||
cpos const* xp = reinterpret_cast<cpos const*>(p);
|
||||
cpos const* xq = reinterpret_cast<cpos const*>(q);
|
||||
raw = xq-xp;
|
||||
for (;xp < xq;xp++)
|
||||
{
|
||||
sid = xp->sid;
|
||||
off = xp->offset;
|
||||
check.set(sid);
|
||||
}
|
||||
sids = check.count();
|
||||
}
|
||||
|
||||
template<typename TOKEN>
|
||||
void
|
||||
imTSA<TOKEN>::
|
||||
save_as_mm_tsa(string fname) const
|
||||
{
|
||||
ofstream out(fname.c_str());
|
||||
filepos_type idxStart(0);
|
||||
id_type idxSize(index.size());
|
||||
numwrite(out,idxStart);
|
||||
numwrite(out,idxSize);
|
||||
vector<filepos_type> mmIndex;
|
||||
for (size_t i = 1; i < this->index.size(); i++)
|
||||
{
|
||||
mmIndex.push_back(out.tellp());
|
||||
for (size_t k = this->index[i-1]; k < this->index[i]; ++k)
|
||||
{
|
||||
tightwrite(out,sufa[k].sid,0);
|
||||
tightwrite(out,sufa[k].offset,1);
|
||||
}
|
||||
}
|
||||
mmIndex.push_back(out.tellp());
|
||||
idxStart = out.tellp();
|
||||
for (size_t i = 0; i < mmIndex.size(); i++)
|
||||
numwrite(out,mmIndex[i]-mmIndex[0]);
|
||||
out.seekp(0);
|
||||
numwrite(out,idxStart);
|
||||
out.close();
|
||||
}
|
||||
}
|
||||
#endif
|
431
moses/mm/ug_im_ttrack.d
Normal file
431
moses/mm/ug_im_ttrack.d
Normal file
@ -0,0 +1,431 @@
|
||||
ug_im_ttrack.o: tpt/ug_im_ttrack.cc tpt/ug_im_ttrack.h \
|
||||
/u/germann/opt64/include/boost/shared_ptr.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/shared_ptr.hpp \
|
||||
/u/germann/opt64/include/boost/config.hpp \
|
||||
/u/germann/opt64/include/boost/config/user.hpp \
|
||||
/u/germann/opt64/include/boost/config/select_compiler_config.hpp \
|
||||
/u/germann/opt64/include/boost/config/compiler/gcc.hpp \
|
||||
/u/germann/opt64/include/boost/config/select_stdlib_config.hpp \
|
||||
/u/germann/opt64/include/boost/config/no_tr1/utility.hpp \
|
||||
/u/germann/opt64/include/boost/config/stdlib/libstdcpp3.hpp \
|
||||
/u/germann/opt64/include/boost/config/select_platform_config.hpp \
|
||||
/u/germann/opt64/include/boost/config/platform/linux.hpp \
|
||||
/u/germann/opt64/include/boost/config/posix_features.hpp \
|
||||
/u/germann/opt64/include/boost/config/suffix.hpp \
|
||||
/u/germann/opt64/include/boost/config/no_tr1/memory.hpp \
|
||||
/u/germann/opt64/include/boost/assert.hpp \
|
||||
/u/germann/opt64/include/boost/checked_delete.hpp \
|
||||
/u/germann/opt64/include/boost/throw_exception.hpp \
|
||||
/u/germann/opt64/include/boost/exception/detail/attribute_noreturn.hpp \
|
||||
/u/germann/opt64/include/boost/detail/workaround.hpp \
|
||||
/u/germann/opt64/include/boost/exception/exception.hpp \
|
||||
/u/germann/opt64/include/boost/current_function.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/shared_count.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/bad_weak_ptr.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/sp_counted_base.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/sp_has_sync.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/sp_counted_base_gcc_x86.hpp \
|
||||
/u/germann/opt64/include/boost/detail/sp_typeinfo.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/sp_counted_impl.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/sp_convertible.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/spinlock_pool.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/spinlock.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/spinlock_sync.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/yield_k.hpp \
|
||||
/u/germann/opt64/include/boost/memory_order.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/detail/operator_bool.hpp \
|
||||
tpt/tpt_typedefs.h tpt/tpt_error.h tpt/tpt_tokenindex.h \
|
||||
/u/germann/opt64/include/boost/iostreams/device/mapped_file.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/close.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/categories.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/flush.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/dispatch.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/select.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_base_and_derived.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/intrinsics.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/config.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_class.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_union.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/remove_cv.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/broken_compiler_spec.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/lambda_support.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/lambda.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/ttp.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/msvc.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/gcc.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/workaround.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/ctps.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/cv_traits_impl.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/type_trait_def.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/template_arity_spec.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/int.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/int_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/adl_barrier.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/adl.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/intel.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/nttp_decl.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/nttp.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/integral_wrapper.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/integral_c_tag.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/static_constant.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/static_cast.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/cat.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/config/config.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/template_arity_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessor/params.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/preprocessor.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/comma_if.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/punctuation/comma_if.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/control/if.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/control/iif.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/logical/bool.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/facilities/empty.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/punctuation/comma.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repeat.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/repeat.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/debug/error.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/detail/auto_rec.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/tuple/eat.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/inc.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/arithmetic/inc.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/overload_resolution.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/type_trait_undef.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/bool_trait_def.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/integral_constant.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/bool.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/bool_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/integral_c.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/integral_c_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/bool_trait_undef.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/ice_and.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/ice_not.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/yes_no_type.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_same.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_convertible.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_array.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/add_reference.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_reference.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_lvalue_reference.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_rvalue_reference.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/ice.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/ice_or.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/ice_eq.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_arithmetic.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_integral.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_float.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_void.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_abstract.hpp \
|
||||
/u/germann/opt64/include/boost/static_assert.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/eval_if.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/if.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/value_wknd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/integral.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/eti.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/na_spec.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/lambda_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/void_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/na.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/na_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/lambda_arity_param.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/arity.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/dtp.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessor/enum.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessor/def_params_tail.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/limits/arity.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/logical/and.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/logical/bitand.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/identity.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/facilities/identity.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/empty.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/arithmetic/add.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/arithmetic/dec.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/control/while.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/fold_left.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/detail/fold_left.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/control/expr_iif.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/adt.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/detail/is_binary.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/detail/check.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/logical/compl.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/fold_right.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/detail/fold_right.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/reverse.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/control/detail/while.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/tuple/elem.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/arithmetic/sub.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/identity.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/void.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/traits.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/bool_trait_def.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/template_params.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/control/expr_if.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/enum_params.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/wide_streams.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/is_iterator_range.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/disable_warnings.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/enable_warnings.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/select_by_size.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/iteration/local.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/slot/slot.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/slot/detail/def.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/iteration/detail/local.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/wrap_unwrap.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/enable_if_stream.hpp \
|
||||
/u/germann/opt64/include/boost/utility/enable_if.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/traits_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/ref.hpp \
|
||||
/u/germann/opt64/include/boost/utility/addressof.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/or.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/use_preprocessed.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/nested_type_wknd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/include_preprocessed.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/compiler.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/stringize.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/or.hpp \
|
||||
/u/germann/opt64/include/boost/range/iterator_range.hpp \
|
||||
/u/germann/opt64/include/boost/range/iterator_range_core.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/iterator_traits.hpp \
|
||||
/u/germann/opt64/include/boost/detail/iterator.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/iterator_facade.hpp \
|
||||
/u/germann/opt64/include/boost/iterator.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/interoperable.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/detail/config_def.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/detail/config_undef.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/detail/facade_iterator_category.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/iterator_categories.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/placeholders.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/arg.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/arg_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/na_assert.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/assert.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/not.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/yes_no.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/arrays.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/pp_counter.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/arity_spec.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/arg_typedef.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/arg.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/placeholders.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/and.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/and.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_const.hpp \
|
||||
/u/germann/opt64/include/boost/detail/indirect_traits.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_function.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/false_result.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/is_function_ptr_helper.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_pointer.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_member_pointer.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_member_function_pointer.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/is_mem_fun_pointer_impl.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_volatile.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/remove_reference.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/remove_pointer.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/detail/enable_if.hpp \
|
||||
/u/germann/opt64/include/boost/implicit_cast.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/add_const.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/add_pointer.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/remove_const.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_pod.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_scalar.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/is_enum.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/always.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/apply.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/apply_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/apply_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/apply_wrap.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/has_apply.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/has_xxx.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/type_wrapper.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/has_xxx.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/msvc_typename.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/array/elem.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/array/data.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/array/size.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/enum_trailing_params.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/has_apply.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/msvc_never_true.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/apply_wrap.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/lambda.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/bind.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/bind_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/bind.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/bind_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/next.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/next_prior.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/common_name_wknd.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/protect.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/bind.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/full_lambda.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/quote.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/has_type.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/config/bcc.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/quote.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/template_arity.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/template_arity.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/full_lambda.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/aux_/preprocessed/gcc/apply.hpp \
|
||||
/u/germann/opt64/include/boost/range/functions.hpp \
|
||||
/u/germann/opt64/include/boost/range/begin.hpp \
|
||||
/u/germann/opt64/include/boost/range/config.hpp \
|
||||
/u/germann/opt64/include/boost/range/iterator.hpp \
|
||||
/u/germann/opt64/include/boost/range/mutable_iterator.hpp \
|
||||
/u/germann/opt64/include/boost/range/detail/extract_optional_type.hpp \
|
||||
/u/germann/opt64/include/boost/range/const_iterator.hpp \
|
||||
/u/germann/opt64/include/boost/range/end.hpp \
|
||||
/u/germann/opt64/include/boost/range/detail/implementation_help.hpp \
|
||||
/u/germann/opt64/include/boost/range/detail/common.hpp \
|
||||
/u/germann/opt64/include/boost/range/detail/sfinae.hpp \
|
||||
/u/germann/opt64/include/boost/range/size.hpp \
|
||||
/u/germann/opt64/include/boost/range/difference_type.hpp \
|
||||
/u/germann/opt64/include/boost/range/distance.hpp \
|
||||
/u/germann/opt64/include/boost/range/empty.hpp \
|
||||
/u/germann/opt64/include/boost/range/rbegin.hpp \
|
||||
/u/germann/opt64/include/boost/range/reverse_iterator.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/reverse_iterator.hpp \
|
||||
/u/germann/opt64/include/boost/utility.hpp \
|
||||
/u/germann/opt64/include/boost/utility/base_from_member.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/enum_binary_params.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/tuple/rem.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/repeat_from_to.hpp \
|
||||
/u/germann/opt64/include/boost/utility/binary.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/control/deduce_d.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/cat.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/fold_left.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/seq.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/elem.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/size.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/transform.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/arithmetic/mod.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/arithmetic/detail/div_base.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/comparison/less_equal.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/logical/not.hpp \
|
||||
/u/germann/opt64/include/boost/next_prior.hpp \
|
||||
/u/germann/opt64/include/boost/noncopyable.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/iterator_adaptor.hpp \
|
||||
/u/germann/opt64/include/boost/range/rend.hpp \
|
||||
/u/germann/opt64/include/boost/range/algorithm/equal.hpp \
|
||||
/u/germann/opt64/include/boost/range/concepts.hpp \
|
||||
/u/germann/opt64/include/boost/concept_check.hpp \
|
||||
/u/germann/opt64/include/boost/concept/assert.hpp \
|
||||
/u/germann/opt64/include/boost/concept/detail/general.hpp \
|
||||
/u/germann/opt64/include/boost/concept/detail/backward_compatibility.hpp \
|
||||
/u/germann/opt64/include/boost/concept/detail/has_constraints.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/conversion_traits.hpp \
|
||||
/u/germann/opt64/include/boost/concept/usage.hpp \
|
||||
/u/germann/opt64/include/boost/concept/detail/concept_def.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/for_each_i.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/for.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/detail/for.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/seq/enum.hpp \
|
||||
/u/germann/opt64/include/boost/concept/detail/concept_undef.hpp \
|
||||
/u/germann/opt64/include/boost/iterator/iterator_concepts.hpp \
|
||||
/u/germann/opt64/include/boost/limits.hpp \
|
||||
/u/germann/opt64/include/boost/range/value_type.hpp \
|
||||
/u/germann/opt64/include/boost/range/detail/misc_concept.hpp \
|
||||
/u/germann/opt64/include/boost/range/iterator_range_io.hpp \
|
||||
/u/germann/opt64/include/boost/range/iterator_range_core.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/streambuf.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/operations_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/adapter/non_blocking_adapter.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/ios.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/read.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/char_traits.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/char_traits.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/seek.hpp \
|
||||
/u/germann/opt64/include/boost/integer_traits.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/positioning.hpp \
|
||||
/u/germann/opt64/include/boost/cstdint.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/codecvt.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/fpos.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/write.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/concepts.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/default_arg.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/auto_link.hpp \
|
||||
/u/germann/opt64/include/boost/config/auto_link.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/dyn_link.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/path.hpp \
|
||||
/u/germann/opt64/include/boost/type.hpp \
|
||||
/u/germann/opt64/include/boost/config/abi_prefix.hpp \
|
||||
/u/germann/opt64/include/boost/config/abi_suffix.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/stream.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/constants.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/overload_resolution.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/gcc.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/forward.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/limits.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/push_params.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/iostream.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/stream_buffer.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/streambuf/direct_streambuf.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/error.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/execute.hpp \
|
||||
/u/germann/opt64/include/boost/utility/result_of.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/iteration/iterate.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/repetition/enum_shifted_params.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/iteration/detail/iter/forward1.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/iteration/detail/bounds/lower1.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/slot/detail/shared.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/iteration/detail/bounds/upper1.hpp \
|
||||
/u/germann/opt64/include/boost/utility/detail/result_of_iterate.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/functional.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/optional.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/aligned_storage.hpp \
|
||||
/u/germann/opt64/include/boost/aligned_storage.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/alignment_of.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/size_t_trait_def.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/size_t.hpp \
|
||||
/u/germann/opt64/include/boost/mpl/size_t_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/detail/size_t_trait_undef.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/type_with_alignment.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/for_each_i.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/tuple/to_list.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/transform.hpp \
|
||||
/u/germann/opt64/include/boost/preprocessor/list/append.hpp \
|
||||
/u/germann/opt64/include/boost/type_traits/alignment_of.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/streambuf/linked_streambuf.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/operations.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/imbue.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/input_sequence.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/optimal_buffer_size.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/output_sequence.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/streambuf/indirect_streambuf.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/adapter/concept_adapter.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/call_traits.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/config/unreachable_return.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/device/null.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/buffer.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/checked_operations.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/get.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/put.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/double_object.hpp \
|
||||
/u/germann/opt64/include/boost/call_traits.hpp \
|
||||
/u/germann/opt64/include/boost/detail/call_traits.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/push.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/adapter/range_adapter.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/pipeline.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/resolve.hpp \
|
||||
/u/germann/opt64/include/boost/detail/is_incrementable.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/adapter/mode_adapter.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/adapter/output_iterator_adapter.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/detail/is_dereferenceable.hpp \
|
||||
/u/germann/opt64/include/boost/iostreams/device/array.hpp \
|
||||
tpt/ug_ttrack_base.h /u/germann/opt64/include/boost/dynamic_bitset.hpp \
|
||||
/u/germann/opt64/include/boost/dynamic_bitset/dynamic_bitset.hpp \
|
||||
/u/germann/opt64/include/boost/dynamic_bitset/config.hpp \
|
||||
/u/germann/opt64/include/boost/dynamic_bitset_fwd.hpp \
|
||||
/u/germann/opt64/include/boost/detail/dynamic_bitset.hpp \
|
||||
/u/germann/opt64/include/boost/pending/lowest_bit.hpp \
|
||||
/u/germann/opt64/include/boost/pending/integer_log2.hpp \
|
||||
tpt/ug_ttrack_position.h /h/116/germann/diss/code/basic/ug_vocab.h \
|
||||
/h/116/germann/diss/code/tpt/tpt_typedefs.h \
|
||||
/h/116/germann/diss/code/tpt/ugdiss_typedefs.h \
|
||||
/u/germann/opt64/include/boost/scoped_ptr.hpp \
|
||||
/u/germann/opt64/include/boost/smart_ptr/scoped_ptr.hpp \
|
||||
/h/116/germann/diss/code/tpt/tplm.h \
|
||||
/h/116/germann/diss/code/tpt/tpt_tokenindex.h \
|
||||
/h/116/germann/diss/code/tpt/tpt_typedefs.h \
|
||||
/h/116/germann/diss/code/tpt/tpt_pickler.h \
|
||||
/h/116/germann/diss/code/tpt/num_read_write.h \
|
||||
/h/116/germann/diss/code/tpt/tpt_tightindex.h \
|
||||
/h/116/germann/diss/code/tpt/ug_mm_2d_table.h \
|
||||
/h/116/germann/diss/code/tpt/ugdiss_typedefs.h tpt/tpt_pickler.h
|
145
moses/mm/ug_im_ttrack.h
Normal file
145
moses/mm/ug_im_ttrack.h
Normal file
@ -0,0 +1,145 @@
|
||||
// -*- c++-mode -*-
|
||||
// In-memory corpus track
|
||||
// (c) 2006-2012 Ulrich Germann.
|
||||
|
||||
#ifndef __ug_im_ttrack
|
||||
#define __ug_im_ttrack
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/unordered_map.hpp>
|
||||
|
||||
#include "tpt_typedefs.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_ttrack_base.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
// #include "ug_vocab.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename Token=id_type>
|
||||
class imTtrack : public Ttrack<Token>
|
||||
{
|
||||
private:
|
||||
size_t numToks;
|
||||
boost::shared_ptr<vector<vector<Token> > > myData; // pointer to corpus data
|
||||
public:
|
||||
|
||||
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d);
|
||||
imTtrack(istream& in, TokenIndex const& V, ostream* log);
|
||||
imTtrack();
|
||||
// imTtrack(istream& in, Vocab& V);
|
||||
|
||||
/** return pointer to beginning of sentence */
|
||||
Token const* sntStart(size_t sid) const;
|
||||
|
||||
/** return pointer to beginning of sentence */
|
||||
Token const* sntEnd(size_t sid) const;
|
||||
|
||||
size_t size() const;
|
||||
size_t numTokens() const;
|
||||
|
||||
id_type findSid(Token const* t) const;
|
||||
|
||||
};
|
||||
|
||||
template<typename Token>
|
||||
Token const*
|
||||
imTtrack<Token>::
|
||||
sntStart(size_t sid) const // return pointer to beginning of sentence
|
||||
{
|
||||
assert(sid < size());
|
||||
if ((*myData)[sid].size() == 0) return NULL;
|
||||
return &((*myData)[sid].front());
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
Token const*
|
||||
imTtrack<Token>::
|
||||
sntEnd(size_t sid) const // return pointer to end of sentence
|
||||
{
|
||||
assert(sid < size());
|
||||
if ((*myData)[sid].size() == 0) return NULL;
|
||||
return &(*myData)[sid].back();
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
size_t
|
||||
imTtrack<Token>::
|
||||
size() const // return size of corpus (in number of sentences)
|
||||
{
|
||||
// we assume that myIndex has pointers to both the beginning of the
|
||||
// first sentence and the end point of the last, so there's one more
|
||||
// offset in the myIndex than there are sentences
|
||||
return myData.size();
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
size_t
|
||||
imTtrack<Token>::
|
||||
numTokens() const // return size of corpus (in number of words)
|
||||
{
|
||||
return numToks;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
imTtrack<Token>::
|
||||
imTtrack(istream& in, TokenIndex const& V, ostream* log = NULL)
|
||||
{
|
||||
myData.reset(new vector<vector<Token> >());
|
||||
numToks = 0;
|
||||
string line,w;
|
||||
size_t linectr=0;
|
||||
boost::unordered_map<string,id_type> H;
|
||||
for (id_type i = 0; i < V.knownVocabSize(); ++i)
|
||||
H[V[i]] = i;
|
||||
while (getline(in,line))
|
||||
{
|
||||
myData->push_back(vector<Token>());
|
||||
if (log && ++linectr%1000000==0)
|
||||
*log << linectr/1000000 << "M lines of input processed" << endl;
|
||||
istringstream buf(line);
|
||||
while (buf>>w)
|
||||
myData->back().push_back(Token(H[w]));
|
||||
myData->back().resize(myData.back().size());
|
||||
numToks += myData->back().size();
|
||||
}
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
imTtrack<Token>::
|
||||
imTtrack()
|
||||
{
|
||||
myData.reset(new vector<vector<Token> >());
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
imTtrack<Token>::
|
||||
imTtrack(boost::shared_ptr<vector<vector<Token> > > const& d)
|
||||
{
|
||||
myData = d;
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
id_type
|
||||
imTtrack<Token>::
|
||||
findSid(Token const* t) const
|
||||
{
|
||||
id_type i;
|
||||
for (i = 0; i < myData->size(); ++i)
|
||||
{
|
||||
vector<Token> const& v = (*myData)[i];
|
||||
if (v.size() == 0) continue;
|
||||
if (&v.front() <= t && &v.back() >= t)
|
||||
break;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
159
moses/mm/ug_lexical_phrase_scorer1.h
Normal file
159
moses/mm/ug_lexical_phrase_scorer1.h
Normal file
@ -0,0 +1,159 @@
|
||||
// -*- c++ -*-
|
||||
// lexical phrase scorer, version 1
|
||||
// written by Ulrich Germann
|
||||
|
||||
#ifndef __ug_lexical_phrase_scorer_h
|
||||
#define __ug_lexical_phrase_scorer_h
|
||||
|
||||
#include "ug_stream.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include <string>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "tpt_pickler.h"
|
||||
|
||||
using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
template<typename TKN>
|
||||
class
|
||||
LexicalPhraseScorer1
|
||||
{
|
||||
typedef boost::unordered_map<id_type, float> inner_map_t;
|
||||
vector<inner_map_t> L1_given_L2;
|
||||
vector<inner_map_t> L2_given_L1;
|
||||
void load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
|
||||
vector<inner_map_t> & lex);
|
||||
public:
|
||||
void open(string const& bname, string const& L1, string const& L2,
|
||||
TokenIndex & V1, TokenIndex & V2);
|
||||
void score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
vector<ushort> aln, float & fwd_score, float& bwd_score);
|
||||
void score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
char const* const aln_start, char const* const aln_end,
|
||||
float & fwd_score, float& bwd_score);
|
||||
float permissive_lookup(vector<inner_map_t> const& lex,
|
||||
id_type const s, id_type const t) const;
|
||||
};
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
LexicalPhraseScorer1<TKN>::
|
||||
load_lex (string const& fname, TokenIndex & V1, TokenIndex & V2,
|
||||
vector<inner_map_t> & lex)
|
||||
{
|
||||
boost::iostreams::filtering_istream in;
|
||||
cout << fname << endl;
|
||||
open_input_stream(fname,in);
|
||||
lex.resize(V1.ksize());
|
||||
string w1,w2; float p;
|
||||
while (in >> w1 >> w2 >> p)
|
||||
{
|
||||
id_type id1 = V1[w1];
|
||||
while (lex.size() <= id1)
|
||||
lex.push_back(inner_map_t());
|
||||
lex[id1][V2[w2]] = p;
|
||||
}
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
LexicalPhraseScorer1<TKN>::
|
||||
open(string const& bname, string const& L1, string const& L2,
|
||||
TokenIndex & V1, TokenIndex & V2)
|
||||
{
|
||||
string lex1 = bname+L1+"-"+L2+"."+L1+"-given-"+L2+".lex.gz";
|
||||
string lex2 = bname+L1+"-"+L2+"."+L2+"-given-"+L1+".lex.gz";
|
||||
cout << lex1 << endl;
|
||||
cout << lex2 << endl;
|
||||
load_lex(lex1,V1,V2,L1_given_L2);
|
||||
load_lex(lex2,V2,V1,L2_given_L1);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
LexicalPhraseScorer1<TKN>::
|
||||
score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
vector<ushort> aln, float & fwd_score, float& bwd_score)
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (size_t k = 0; k < aln.size(); ++k)
|
||||
{
|
||||
i1 = aln[k]; i2 = aln[++k];
|
||||
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
|
||||
p1[i1] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
|
||||
++c1[i1];
|
||||
p2[i2] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
|
||||
++c2[i2];
|
||||
}
|
||||
fwd_score = 0;
|
||||
for (size_t i = s1; i < e1; ++i)
|
||||
{
|
||||
if (c1[i] == 1) fwd_score += log(p1[i]);
|
||||
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
|
||||
else fwd_score += log(L1_given_L2[snt1[i].id()][0]);
|
||||
}
|
||||
bwd_score = 0;
|
||||
for (size_t i = s2; i < e2; ++i)
|
||||
{
|
||||
if (c2[i] == 1) bwd_score += log(p2[i]);
|
||||
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
|
||||
else bwd_score += log(L2_given_L1[snt2[i].id()][0]);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
float
|
||||
LexicalPhraseScorer1<TKN>::
|
||||
permissive_lookup(vector<inner_map_t> const& lex,
|
||||
id_type const s, id_type const t) const
|
||||
{
|
||||
if (s >= lex.size()) return 1.0;
|
||||
inner_map_t::const_iterator m = lex[s].find(t);
|
||||
return m == lex[s].end() ? 1.0 : m->second;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
LexicalPhraseScorer1<TKN>::
|
||||
score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
char const* const aln_start, char const* const aln_end,
|
||||
float & fwd_score, float& bwd_score)
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (char const* x = aln_start; x < aln_end;)
|
||||
{
|
||||
x = binread(binread(x,i1),i2);
|
||||
// assert(snt1[i2].id() < L1_given_L2.size());
|
||||
// assert(snt2[i2].id() < L2_given_L1.size());
|
||||
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
|
||||
p1[i1] += permissive_lookup(L1_given_L2, snt1[i1].id(), snt2[i2].id());
|
||||
++c1[i1];
|
||||
p2[i2] += permissive_lookup(L2_given_L1, snt2[i2].id(), snt1[i1].id());
|
||||
++c2[i2];
|
||||
}
|
||||
fwd_score = 0;
|
||||
for (size_t i = s1; i < e1; ++i)
|
||||
{
|
||||
if (c1[i] == 1) fwd_score += log(p1[i]);
|
||||
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
|
||||
else fwd_score += log(L1_given_L2[snt1[i].id()][0]);
|
||||
}
|
||||
bwd_score = 0;
|
||||
for (size_t i = s2; i < e2; ++i)
|
||||
{
|
||||
if (c2[i] == 1) bwd_score += log(p2[i]);
|
||||
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
|
||||
else bwd_score += log(L2_given_L1[snt2[i].id()][0]);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
149
moses/mm/ug_lexical_phrase_scorer2.h
Normal file
149
moses/mm/ug_lexical_phrase_scorer2.h
Normal file
@ -0,0 +1,149 @@
|
||||
// -*- c++ -*-
|
||||
// lexical phrase scorer, version 1
|
||||
// written by Ulrich Germann
|
||||
|
||||
#ifndef __ug_lexical_phrase_scorer_h
|
||||
#define __ug_lexical_phrase_scorer_h
|
||||
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include <string>
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_mm_2d_table.h"
|
||||
using namespace std;
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
template<typename TKN>
|
||||
class
|
||||
LexicalPhraseScorer2
|
||||
{
|
||||
typedef mm2dTable<id_type,id_type,uint32_t,uint32_t> table_t;
|
||||
table_t COOC;
|
||||
public:
|
||||
void open(string const& fname);
|
||||
|
||||
template<typename someint>
|
||||
void
|
||||
score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
vector<someint> & aln, float & fwd_score, float& bwd_score) const;
|
||||
|
||||
void
|
||||
score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
char const* const aln_start, char const* const aln_end,
|
||||
float & fwd_score, float& bwd_score) const;
|
||||
// plup: permissive lookup
|
||||
float plup_fwd(id_type const s,id_type const t) const;
|
||||
float plup_bwd(id_type const s,id_type const t) const;
|
||||
// to be done:
|
||||
// - on-the-fly smoothing ?
|
||||
// - better (than permissive-lookup) treatment of unknown combinations
|
||||
// permissive lookup is currently used for compatibility reasons
|
||||
// - zens-ney smoothed scoring via noisy-or combination
|
||||
};
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
LexicalPhraseScorer2<TKN>::
|
||||
open(string const& fname)
|
||||
{
|
||||
COOC.open(fname);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
template<typename someint>
|
||||
void
|
||||
LexicalPhraseScorer2<TKN>::
|
||||
score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
vector<someint> & aln, float & fwd_score, float& bwd_score) const
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (size_t k = 0; k < aln.size(); ++k)
|
||||
{
|
||||
i1 = aln[k]; i2 = aln[++k];
|
||||
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
|
||||
p1[i1] += plup_fwd(snt1[i1].id(),snt2[i2].id());
|
||||
++c1[i1];
|
||||
p2[i2] += plup_bwd(snt1[i1].id(),snt2[i2].id());
|
||||
++c2[i2];
|
||||
}
|
||||
fwd_score = 0;
|
||||
for (size_t i = s1; i < e1; ++i)
|
||||
{
|
||||
if (c1[i] == 1) fwd_score += log(p1[i]);
|
||||
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
|
||||
else fwd_score += log(plup_fwd(snt1[i].id(),0));
|
||||
}
|
||||
bwd_score = 0;
|
||||
for (size_t i = s2; i < e2; ++i)
|
||||
{
|
||||
if (c2[i] == 1) bwd_score += log(p2[i]);
|
||||
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
|
||||
else bwd_score += log(plup_bwd(0,snt2[i].id()));
|
||||
}
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
float
|
||||
LexicalPhraseScorer2<TKN>::
|
||||
plup_fwd(id_type const s, id_type const t) const
|
||||
{
|
||||
if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
|
||||
// if (!COOC[s][t]) cout << s << " " << t << endl;
|
||||
assert(COOC[s][t]);
|
||||
return float(COOC[s][t])/COOC.m1(s);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
float
|
||||
LexicalPhraseScorer2<TKN>::
|
||||
plup_bwd(id_type const s, id_type const t) const
|
||||
{
|
||||
if (COOC.m1(s) == 0 || COOC.m2(t) == 0) return 1.0;
|
||||
assert(COOC[s][t]);
|
||||
return float(COOC[s][t])/COOC.m2(t);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
LexicalPhraseScorer2<TKN>::
|
||||
score(TKN const* snt1, size_t const s1, size_t const e1,
|
||||
TKN const* snt2, size_t const s2, size_t const e2,
|
||||
char const* const aln_start, char const* const aln_end,
|
||||
float & fwd_score, float& bwd_score) const
|
||||
{
|
||||
vector<float> p1(e1,0), p2(e2,0);
|
||||
vector<int> c1(e1,0), c2(e2,0);
|
||||
size_t i1=0,i2=0;
|
||||
for (char const* x = aln_start; x < aln_end;)
|
||||
{
|
||||
x = binread(binread(x,i1),i2);
|
||||
if (i1 < s1 || i1 >= e1 || i2 < s2 || i2 >= e2) continue;
|
||||
p1[i1] += plup_fwd(snt1[i1].id(), snt2[i2].id());
|
||||
++c1[i1];
|
||||
p2[i2] += plup_bwd(snt1[i1].id(), snt2[i2].id());
|
||||
++c2[i2];
|
||||
}
|
||||
fwd_score = 0;
|
||||
for (size_t i = s1; i < e1; ++i)
|
||||
{
|
||||
if (c1[i] == 1) fwd_score += log(p1[i]);
|
||||
else if (c1[i]) fwd_score += log(p1[i])-log(c1[i]);
|
||||
else fwd_score += log(plup_fwd(snt1[i].id(),0));
|
||||
}
|
||||
bwd_score = 0;
|
||||
for (size_t i = s2; i < e2; ++i)
|
||||
{
|
||||
if (c2[i] == 1) bwd_score += log(p2[i]);
|
||||
else if (c2[i]) bwd_score += log(p2[i])-log(c2[i]);
|
||||
else bwd_score += log(plup_bwd(0,snt2[i].id()));
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
28
moses/mm/ug_load_primer.cc
Normal file
28
moses/mm/ug_load_primer.cc
Normal file
@ -0,0 +1,28 @@
|
||||
#include "ug_load_primer.h"
|
||||
#include <boost/interprocess/mapped_region.hpp>
|
||||
#include <boost/thread.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
FastLoader::
|
||||
FastLoader(boost::iostreams::mapped_file_source const& f)
|
||||
: file(f) {}
|
||||
|
||||
|
||||
void
|
||||
FastLoader::
|
||||
operator()() const
|
||||
{
|
||||
size_t const pagesize = boost::interprocess::mapped_region::get_page_size();
|
||||
char const* stop = file.data() + file.size();
|
||||
int dummy=0;
|
||||
for (char const* x = file.data(); x < stop; x += pagesize) dummy += *x;
|
||||
}
|
||||
|
||||
void prime(boost::iostreams::mapped_file_source const& f)
|
||||
{
|
||||
boost::thread foo(FastLoader(f));
|
||||
// foo.detach();
|
||||
}
|
||||
|
||||
}
|
18
moses/mm/ug_load_primer.h
Normal file
18
moses/mm/ug_load_primer.h
Normal file
@ -0,0 +1,18 @@
|
||||
//-*- c++ -*-
|
||||
#pragma once
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
//
|
||||
namespace Moses
|
||||
{
|
||||
class FastLoader
|
||||
{
|
||||
boost::iostreams::mapped_file_source const& file;
|
||||
public:
|
||||
FastLoader(boost::iostreams::mapped_file_source const& f);
|
||||
void operator()() const;
|
||||
};
|
||||
|
||||
void prime(boost::iostreams::mapped_file_source const& f);
|
||||
|
||||
|
||||
};
|
228
moses/mm/ug_mm_2d_table.h
Normal file
228
moses/mm/ug_mm_2d_table.h
Normal file
@ -0,0 +1,228 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007-2012 Ulrich Germann
|
||||
#ifndef __ug_mm_2d_table_h
|
||||
#define __ug_mm_2d_table_h
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "tpt_typedefs.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_typedefs.h"
|
||||
namespace bio=boost::iostreams;
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
template<typename OFFSET, typename ID, typename VAL, typename INIT>
|
||||
class
|
||||
mm2dTable
|
||||
{
|
||||
public:
|
||||
struct Cell
|
||||
{
|
||||
ID id;
|
||||
VAL val;
|
||||
|
||||
bool
|
||||
operator<(ID const otherId) const
|
||||
{
|
||||
return id < otherId;
|
||||
}
|
||||
|
||||
bool
|
||||
operator<(Cell const& other) const
|
||||
{
|
||||
return id < other.id;
|
||||
}
|
||||
|
||||
struct SortDescendingByValue
|
||||
{
|
||||
bool operator()(Cell const& a, Cell const& b) const
|
||||
{
|
||||
return a.val > b.val;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
struct Row
|
||||
{
|
||||
Cell const* start;
|
||||
Cell const* stop;
|
||||
VAL operator[](ID key) const;
|
||||
};
|
||||
|
||||
Cell* data;
|
||||
VAL *M1, *M2;
|
||||
OFFSET * index;
|
||||
ID numRows;
|
||||
ID numCols;
|
||||
boost::shared_ptr<bio::mapped_file> file;
|
||||
|
||||
VAL m1(ID key) const
|
||||
{
|
||||
return (key < numRows) ? M1[key] : INIT(0);
|
||||
}
|
||||
|
||||
VAL m2(ID key) const
|
||||
{
|
||||
return (key < numCols) ? M2[key] : INIT(0);
|
||||
}
|
||||
|
||||
|
||||
void open(string fname);
|
||||
void close();
|
||||
|
||||
Row operator[](ID key) const;
|
||||
|
||||
mm2dTable(string const fname="") { if (!fname.empty()) open(fname); };
|
||||
~mm2dTable() { file.reset(); };
|
||||
};
|
||||
|
||||
template<typename OFFSET, typename ID, typename VAL, typename INIT>
|
||||
typename mm2dTable<OFFSET,ID,VAL,INIT>::Row
|
||||
mm2dTable<OFFSET,ID,VAL,INIT>::
|
||||
operator[](ID key) const
|
||||
{
|
||||
Row ret;
|
||||
if (key < numRows)
|
||||
{
|
||||
ret.start = data+index[key];
|
||||
ret.stop = data+index[key+1];
|
||||
}
|
||||
else
|
||||
ret.start = ret.stop = data+index[key+1];
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename OFFSET, typename ID, typename VAL, typename INIT>
|
||||
VAL
|
||||
mm2dTable<OFFSET,ID,VAL,INIT>::
|
||||
Row::
|
||||
operator[](ID key) const
|
||||
{
|
||||
if (start==stop) return INIT(0);
|
||||
Cell const* c = lower_bound(start,stop,key);
|
||||
return (c != stop && c->id == key ? c->val : INIT(0));
|
||||
}
|
||||
|
||||
template<typename OFFSET, typename ID, typename VAL, typename INIT>
|
||||
void
|
||||
mm2dTable<OFFSET,ID,VAL,INIT>::
|
||||
open(string fname)
|
||||
{
|
||||
// cout << "opening " << fname << " at " << __FILE__ << ":" << __LINE__ << endl;
|
||||
if (access(fname.c_str(),R_OK))
|
||||
{
|
||||
cerr << "[" << __FILE__ << ":" << __LINE__ <<"] FATAL ERROR: "
|
||||
<< "file '" << fname << " is not accessible." << endl;
|
||||
exit(1);
|
||||
}
|
||||
file.reset(new bio::mapped_file());
|
||||
file->open(fname,ios::in|ios::out);
|
||||
if (!file->is_open())
|
||||
{
|
||||
cerr << "Error opening file " << fname << endl;
|
||||
assert(0);
|
||||
}
|
||||
char* p = file->data();
|
||||
filepos_type offset = *reinterpret_cast<filepos_type*>(p);
|
||||
index = reinterpret_cast<OFFSET*>(p+offset); p += sizeof(offset);
|
||||
numRows = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
|
||||
numCols = *reinterpret_cast<ID const*>(p); p += sizeof(id_type);
|
||||
data = reinterpret_cast<Cell*>(p);
|
||||
// cout << numRows << " rows; " << numCols << " columns " << endl;
|
||||
M1 = reinterpret_cast<VAL*>(index+numRows+1);
|
||||
M2 = M1+numRows;
|
||||
// cout << "Table " << fname << " has " << numRows << " rows and "
|
||||
// << numCols << " columns." << endl;
|
||||
// cout << "File size is " << file.size()*1024 << " bytes; ";
|
||||
// cout << "M2 starts " << (reinterpret_cast<char const*>(M2) - file.data())
|
||||
// << " bytes into the file" << endl;
|
||||
// cout << M2[0] << endl;
|
||||
}
|
||||
|
||||
template<
|
||||
typename OFFSET, // integer type of file offsets
|
||||
typename ID, // integer type of column ids
|
||||
typename VAL, // type of cell values
|
||||
typename INIT, // INIT(0) initializes default values
|
||||
typename ICONT // inner container type
|
||||
>
|
||||
void
|
||||
write_mm_2d_table(ostream& out, vector<ICONT> const& T,
|
||||
vector<VAL> const* m1 = NULL,
|
||||
vector<VAL> const* m2 = NULL)
|
||||
{
|
||||
assert(T.size());
|
||||
typedef typename ICONT::const_iterator iter;
|
||||
|
||||
// compute marginals if necessary
|
||||
vector<VAL> m1x,m2x;
|
||||
if (!m1)
|
||||
{
|
||||
m1x.resize(T.size(),INIT(0));
|
||||
for (size_t r = 0; r < T.size(); ++r)
|
||||
for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c)
|
||||
m1x[r] = m1x[r] + c->second;
|
||||
m1 = &m1x;
|
||||
}
|
||||
if (!m2)
|
||||
{
|
||||
for (size_t r = 0; r < T.size(); ++r)
|
||||
for (iter c = T.at(r).begin(); c != T.at(r).end(); ++c)
|
||||
{
|
||||
while (c->first >= m2x.size())
|
||||
m2x.push_back(INIT(0));
|
||||
m2x[c->first] = m2x[c->first] + c->second;
|
||||
}
|
||||
m2 = &m2x;
|
||||
}
|
||||
|
||||
filepos_type idxOffset=0;
|
||||
numwrite(out,idxOffset); // place holder, we'll return here at the end
|
||||
numwrite(out,id_type(m1->size())); // number of rows
|
||||
numwrite(out,id_type(m2->size())); // number of columns
|
||||
|
||||
// write actual table
|
||||
vector<OFFSET> index;
|
||||
size_t ctr =0;
|
||||
index.reserve(m1->size()+1);
|
||||
for (ID r = 0; r < ID(T.size()); ++r)
|
||||
{
|
||||
//index.push_back(out.tellp());
|
||||
index.push_back(ctr);
|
||||
ID lastId = 0;
|
||||
if (T.at(r).size())
|
||||
lastId = T.at(r).begin()->first;
|
||||
for (typename ICONT::const_iterator c = T.at(r).begin();
|
||||
c != T.at(r).end(); ++c)
|
||||
{
|
||||
ctr++;
|
||||
assert(c->first >= lastId);
|
||||
lastId = c->first;
|
||||
typename mm2dTable<OFFSET,ID,VAL,INIT>::Cell item;
|
||||
item.id = c->first;
|
||||
item.val = c->second;
|
||||
out.write(reinterpret_cast<char const*>(&item),sizeof(item));
|
||||
}
|
||||
}
|
||||
// index.push_back(out.tellp());
|
||||
index.push_back(ctr);
|
||||
idxOffset=out.tellp();
|
||||
|
||||
// write index
|
||||
for (size_t i = 0; i < index.size(); ++i)
|
||||
{
|
||||
OFFSET o = index[i]; // (index[i]-index[0])/sizeof(VAL);
|
||||
out.write(reinterpret_cast<char*>(&o),sizeof(OFFSET));
|
||||
}
|
||||
|
||||
// write marginals
|
||||
out.write(reinterpret_cast<char const*>(&(*m1)[0]),m1->size()*sizeof(VAL));
|
||||
out.write(reinterpret_cast<char const*>(&(*m2)[0]),m2->size()*sizeof(VAL));
|
||||
|
||||
out.seekp(0);
|
||||
numwrite(out,idxOffset);
|
||||
}
|
||||
}
|
||||
#endif
|
263
moses/mm/ug_mm_tsa.h
Normal file
263
moses/mm/ug_mm_tsa.h
Normal file
@ -0,0 +1,263 @@
|
||||
// -*- c++ -*-
|
||||
#ifndef _ug_mm_tsa_h
|
||||
#define _ug_mm_tsa_h
|
||||
|
||||
// (c) 2007-2009 Ulrich Germann. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <sstream>
|
||||
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
#include "tpt_tightindex.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "tpt_pickler.h"
|
||||
#include "ug_tsa_base.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename TOKEN>
|
||||
class mmTSA : public TSA<TOKEN>
|
||||
{
|
||||
public:
|
||||
typedef typename TSA<TOKEN>::tree_iterator tree_iterator;
|
||||
friend class TSA_tree_iterator<TOKEN>;
|
||||
private:
|
||||
bio::mapped_file_source file;
|
||||
|
||||
public: // temporarily for debugging
|
||||
|
||||
filepos_type const* index; // random access to top-level sufa ranges
|
||||
|
||||
private:
|
||||
|
||||
char const* index_jump(char const* a, char const* z, float ratio) const;
|
||||
char const* getLowerBound(id_type t) const;
|
||||
char const* getUpperBound(id_type t) const;
|
||||
|
||||
public:
|
||||
mmTSA();
|
||||
mmTSA(string fname, Ttrack<TOKEN> const* c);
|
||||
void open(string fname, Ttrack<TOKEN> const* c);
|
||||
|
||||
count_type
|
||||
sntCnt(char const* p, char const * const q) const;
|
||||
|
||||
count_type
|
||||
rawCnt(char const* p, char const * const q) const;
|
||||
|
||||
void
|
||||
getCounts(char const* p, char const * const q,
|
||||
count_type& sids, count_type& raw) const;
|
||||
|
||||
char const*
|
||||
readSid(char const* p, char const* q, id_type& sid) const;
|
||||
|
||||
char const*
|
||||
readSid(char const* p, char const* q, uint64_t& sid) const;
|
||||
|
||||
char const*
|
||||
readOffset(char const* p, char const* q, uint16_t& offset) const;
|
||||
|
||||
char const*
|
||||
readOffset(char const* p, char const* q, uint64_t& offset) const;
|
||||
|
||||
void sanityCheck() const;
|
||||
|
||||
};
|
||||
|
||||
// ======================================================================
|
||||
|
||||
/** jump to the point 1/ratio in a tightly packed index
|
||||
* assumes that keys are flagged with '1', values with '0'
|
||||
*/
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
mmTSA<TOKEN>::
|
||||
index_jump(char const* a, char const* z, float ratio) const
|
||||
{
|
||||
assert(ratio >= 0 && ratio < 1);
|
||||
char const* m = a+int(ratio*(z-a));
|
||||
if (m > a)
|
||||
{
|
||||
while (m > a && *m < 0) --m;
|
||||
while (m > a && *m >= 0) --m;
|
||||
if (*m < 0) ++m;
|
||||
}
|
||||
assert(*m >= 0);
|
||||
return m;
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
mmTSA<TOKEN>::
|
||||
mmTSA()
|
||||
{
|
||||
this->corpus = NULL;
|
||||
this->startArray = NULL;
|
||||
this->endArray = NULL;
|
||||
this->BitSetCachingThreshold=4096;
|
||||
};
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
mmTSA<TOKEN>::
|
||||
mmTSA(string fname, Ttrack<TOKEN> const* c)
|
||||
{
|
||||
open(fname,c);
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
void
|
||||
mmTSA<TOKEN>::
|
||||
open(string fname, Ttrack<TOKEN> const* c)
|
||||
{
|
||||
this->bsc.reset(new BitSetCache<TSA<TOKEN> >(this));
|
||||
if (access(fname.c_str(),F_OK))
|
||||
{
|
||||
ostringstream msg;
|
||||
msg << "mmTSA<>::open: File '" << fname << "' does not exist.";
|
||||
throw std::runtime_error(msg.str().c_str());
|
||||
}
|
||||
assert(c);
|
||||
this->corpus = c;
|
||||
file.open(fname);
|
||||
Moses::prime(file);
|
||||
char const* p = file.data();
|
||||
filepos_type idxOffset;
|
||||
p = numread(p,idxOffset);
|
||||
p = numread(p,this->indexSize);
|
||||
|
||||
// cerr << fname << ": " << idxOffset << " " << this->indexSize << endl;
|
||||
|
||||
this->startArray = p;
|
||||
this->index = reinterpret_cast<filepos_type const*>(file.data()+idxOffset);
|
||||
this->endArray = reinterpret_cast<char const*>(index);
|
||||
this->corpusSize = c->size();
|
||||
this->numTokens = c->numTokens();
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
mmTSA<TOKEN>::
|
||||
getLowerBound(id_type id) const
|
||||
{
|
||||
if (id >= this->indexSize)
|
||||
return NULL;
|
||||
return this->startArray + this->index[id];
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
mmTSA<TOKEN>::
|
||||
getUpperBound(id_type id) const
|
||||
{
|
||||
if (id >= this->indexSize)
|
||||
return NULL;
|
||||
// if (index[id] == index[id+1])
|
||||
// return NULL;
|
||||
else
|
||||
return this->startArray + this->index[id+1];
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
mmTSA<TOKEN>::
|
||||
readSid(char const* p, char const* q, id_type& sid) const
|
||||
{
|
||||
return tightread(p,q,sid);
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
char const*
|
||||
mmTSA<TOKEN>::
|
||||
readSid(char const* p, char const* q, uint64_t& sid) const
|
||||
{
|
||||
return tightread(p,q,sid);
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
inline
|
||||
char const*
|
||||
mmTSA<TOKEN>::
|
||||
readOffset(char const* p, char const* q, uint16_t& offset) const
|
||||
{
|
||||
return tightread(p,q,offset);
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
inline
|
||||
char const*
|
||||
mmTSA<TOKEN>::
|
||||
readOffset(char const* p, char const* q, uint64_t& offset) const
|
||||
{
|
||||
return tightread(p,q,offset);
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
count_type
|
||||
mmTSA<TOKEN>::
|
||||
rawCnt(char const* p, char const* const q) const
|
||||
{
|
||||
id_type sid; uint16_t off;
|
||||
size_t ret=0;
|
||||
while (p < q)
|
||||
{
|
||||
p = tightread(p,q,sid);
|
||||
p = tightread(p,q,off);
|
||||
ret++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
template<typename TOKEN>
|
||||
void
|
||||
mmTSA<TOKEN>::
|
||||
getCounts(char const* p, char const* const q,
|
||||
count_type& sids, count_type& raw) const
|
||||
{
|
||||
raw = 0;
|
||||
id_type sid; uint16_t off;
|
||||
boost::dynamic_bitset<uint64_t> check(this->corpus->size());
|
||||
while (p < q)
|
||||
{
|
||||
p = tightread(p,q,sid);
|
||||
p = tightread(p,q,off);
|
||||
check.set(sid);
|
||||
raw++;
|
||||
}
|
||||
sids = check.count();
|
||||
}
|
||||
|
||||
// ======================================================================
|
||||
|
||||
} // end of namespace ugdiss
|
||||
|
||||
// #include "ug_mm_tsa_extra.h"
|
||||
#endif
|
53
moses/mm/ug_mm_tsa_tree_iterator.h
Normal file
53
moses/mm/ug_mm_tsa_tree_iterator.h
Normal file
@ -0,0 +1,53 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007-2009 Ulrich Germann. All rights reserved.
|
||||
#if 0
|
||||
#ifndef __ug_mm_tsa_tree_iterator_h
|
||||
#define __ug_mm_tsa_tree_iterator_h
|
||||
|
||||
// namespace ugdiss
|
||||
// {
|
||||
// template<typename TOKEN>
|
||||
// class
|
||||
// mmTSA<TOKEN>::
|
||||
// tree_iterator : public TSA<TOKEN>::tree_iterator
|
||||
// {
|
||||
// public:
|
||||
// tree_iterator(TSA<TOKEN> const* s);
|
||||
// tree_iterator(TSA<TOKEN> const* s, Token const& t);
|
||||
// tree_iterator(TSA<TOKEN> const* s, Token const* kstart, Token const* kend);
|
||||
// bool down() { return TSA<TOKEN>::tree_iterator::down(); }
|
||||
// bool over() { return TSA<TOKEN>::tree_iterator::over(); }
|
||||
// };
|
||||
|
||||
// // ======================================================================
|
||||
// // ======================================================================
|
||||
// // ======================================================================
|
||||
|
||||
// template<typename TOKEN>
|
||||
// mmTSA<TOKEN>::
|
||||
// tree_iterator::
|
||||
// tree_iterator(TSA<TOKEN> const* s)
|
||||
// : TSA<TOKEN>::tree_iterator::tree_iterator(s)
|
||||
// {};
|
||||
|
||||
// template<typename TOKEN>
|
||||
// mmTSA<TOKEN>::
|
||||
// tree_iterator::
|
||||
// tree_iterator(TSA<TOKEN> const* s, Token const& t)
|
||||
// : TSA<TOKEN>::tree_iterator::tree_iterator(s,t)
|
||||
// {};
|
||||
|
||||
// template<typename TOKEN>
|
||||
// mmTSA<TOKEN>::
|
||||
// tree_iterator::
|
||||
// tree_iterator(TSA<TOKEN> const* s, Token const* kstart, Token const* kend)
|
||||
// : TSA<TOKEN>::tree_iterator::tree_iterator(s,kstart,kend)
|
||||
// {};
|
||||
|
||||
// // ======================================================================
|
||||
// // ======================================================================
|
||||
// // ======================================================================
|
||||
|
||||
// }
|
||||
#endif
|
||||
#endif
|
250
moses/mm/ug_mm_ttrack.h
Normal file
250
moses/mm/ug_mm_ttrack.h
Normal file
@ -0,0 +1,250 @@
|
||||
// -*- c++ -*-
|
||||
// Memory-mapped corpus track. The corpus (each Token occupying a fixed number
|
||||
// of bytes (must be compatible with the memory alignment in the OS) is stored
|
||||
// as one huge array. The "index" maps from sentence IDs to positions within
|
||||
// that array.
|
||||
|
||||
// (c) 2007-2010 Ulrich Germann. All rights reserved
|
||||
|
||||
#ifndef __ug_mm_ttrack
|
||||
#define __ug_mm_ttrack
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
|
||||
#include "tpt_typedefs.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_ttrack_base.h"
|
||||
#include "num_read_write.h"
|
||||
#include "ug_load_primer.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename TKN=id_type>
|
||||
class mmTtrack : public Ttrack<TKN>
|
||||
{
|
||||
public:
|
||||
typedef TKN Token;
|
||||
|
||||
private:
|
||||
bio::mapped_file_source file;
|
||||
Token const* data; // pointer to first word of first sentence
|
||||
id_type const* index; /* pointer to index (change data type for corpora
|
||||
* of more than four billion words)
|
||||
*/
|
||||
public:
|
||||
mmTtrack(string fname);
|
||||
mmTtrack();
|
||||
|
||||
// return pointer to beginning of sentence
|
||||
Token const* sntStart(size_t sid) const;
|
||||
|
||||
// return pointer to end of sentence
|
||||
Token const* sntEnd(size_t sid) const;
|
||||
|
||||
// return size of corpus (in number of sentences)
|
||||
size_t size() const;
|
||||
|
||||
// return size of corpus (in number of sentences)
|
||||
size_t numTokens() const;
|
||||
|
||||
// open an mmTtrack file
|
||||
void open(string fname);
|
||||
|
||||
// FUNCTIONS FOR BUILDING CORPUS TRACKS
|
||||
// write a blank file header at the beginning of a new ttrack file
|
||||
void write_blank_file_header(ostream& out) const;
|
||||
|
||||
// write the sentence index /idx/ and fill the file header
|
||||
void write_index_and_finalize(ostream& out,
|
||||
vector<id_type> const& idx,
|
||||
count_type tokenCount) const;
|
||||
|
||||
// copy a contiguous sequence of sentences to another stream
|
||||
// return the number of tokens copied
|
||||
id_type copySentences(ostream& trg, id_type start, id_type stop) const;
|
||||
|
||||
/** find the sentence id of a given token */
|
||||
id_type findSid(TKN const* t) const;
|
||||
|
||||
id_type findSid(id_type tokenOffset) const;
|
||||
|
||||
/// re-assign ids based on the id maps in /f/
|
||||
void remap(string const fname, vector<id_type const*> const & f) const;
|
||||
|
||||
};
|
||||
|
||||
/// re-assign ids based on the id maps in /f/
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
remap(string const fname, vector<id_type const*> const & f) const
|
||||
{
|
||||
bio::mapped_file myfile(fname);
|
||||
assert(myfile.is_open());
|
||||
Moses::prime(myfile);
|
||||
filepos_type idxOffset;
|
||||
char* p = myfile.data();
|
||||
id_type numSent,numWords;
|
||||
p = numread(p,idxOffset);
|
||||
p = numread(p,numSent);
|
||||
p = numread(p,numWords);
|
||||
data = reinterpret_cast<TKN*>(p);
|
||||
for (size_t i = 0; i < numWords; ++i)
|
||||
data[i] = data[i].remap(f);
|
||||
myfile.close();
|
||||
}
|
||||
|
||||
|
||||
template<typename TKN>
|
||||
size_t
|
||||
mmTtrack<TKN>::
|
||||
size() const
|
||||
{
|
||||
return this->numSent;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
size_t
|
||||
mmTtrack<TKN>::
|
||||
numTokens() const
|
||||
{
|
||||
return this->numWords;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
TKN const*
|
||||
mmTtrack<TKN>::
|
||||
sntStart(size_t sid) const // return pointer to beginning of sentence
|
||||
{
|
||||
if (sid >= this->numSent)
|
||||
{
|
||||
cerr << "Fatal error: requested sentence #"<<sid<<" is beyond corpus size ("
|
||||
<< this->numSent <<")" << endl;
|
||||
}
|
||||
assert(sid < this->numSent);
|
||||
return data+index[sid];
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
TKN const*
|
||||
mmTtrack<TKN>::
|
||||
sntEnd(size_t sid) const // return pointer to end of sentence
|
||||
{
|
||||
assert(sid < this->numSent);
|
||||
return data+index[sid+1];
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
mmTtrack<TKN>::
|
||||
mmTtrack()
|
||||
{
|
||||
data = NULL;
|
||||
index = NULL;
|
||||
this->numSent = this->numWords = 0;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
mmTtrack<TKN>::
|
||||
mmTtrack(string fname)
|
||||
{
|
||||
open(fname);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
open(string fname)
|
||||
{
|
||||
if (access(fname.c_str(),F_OK))
|
||||
{
|
||||
ostringstream msg;
|
||||
msg << "mmTtrack<>::open: File '" << fname << "' does not exist.";
|
||||
throw std::runtime_error(msg.str().c_str());
|
||||
}
|
||||
file.open(fname);
|
||||
if (!file.is_open())
|
||||
{
|
||||
cerr << "Error opening file " << fname << endl;
|
||||
assert(0);
|
||||
}
|
||||
filepos_type idxOffset;
|
||||
char const* p = file.data();
|
||||
p = numread(p,idxOffset);
|
||||
p = numread(p,this->numSent);
|
||||
p = numread(p,this->numWords);
|
||||
data = reinterpret_cast<Token const*>(p);
|
||||
index = reinterpret_cast<id_type const*>(file.data()+idxOffset);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
id_type
|
||||
mmTtrack<TKN>::
|
||||
findSid(TKN const* t) const
|
||||
{
|
||||
id_type tokenPos = t-data;
|
||||
id_type const* p = upper_bound(index,index+this->numSent,tokenPos);
|
||||
assert(p>index);
|
||||
return p-index-1;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
id_type
|
||||
mmTtrack<TKN>::
|
||||
findSid(id_type tokenPos) const
|
||||
{
|
||||
id_type const* p = upper_bound(index,index+this->numSent,tokenPos);
|
||||
assert(p>index);
|
||||
return p-index-1;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
write_blank_file_header(ostream& out) const
|
||||
{
|
||||
numwrite(out,filepos_type(0)); // place holder for index start
|
||||
numwrite(out,id_type(0)); // place holder for index size
|
||||
numwrite(out,id_type(0)); // place holder for token count
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
mmTtrack<TKN>::
|
||||
write_index_and_finalize(ostream& out,
|
||||
vector<id_type>const& idx,
|
||||
id_type tokenCount) const
|
||||
{
|
||||
id_type idxSize = idx.size();
|
||||
filepos_type idxStart = out.tellp();
|
||||
for (size_t i = 0; i < idx.size(); ++i)
|
||||
numwrite(out,idx[i]);
|
||||
out.seekp(0);
|
||||
numwrite(out,idxStart);
|
||||
numwrite(out,idxSize-1);
|
||||
numwrite(out,tokenCount);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
id_type
|
||||
mmTtrack<TKN>::
|
||||
copySentences(ostream& trg, id_type start, id_type stop) const
|
||||
{
|
||||
assert(stop > start);
|
||||
TKN const* a = sntStart(start);
|
||||
TKN const* z = sntEnd(stop-1);
|
||||
size_t len = (z-a)*sizeof(TKN);
|
||||
if (!len) return 0;
|
||||
trg.write(reinterpret_cast<char const*>(a),len);
|
||||
return z-a;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
450
moses/mm/ug_mmbitext.cc
Normal file
450
moses/mm/ug_mmbitext.cc
Normal file
@ -0,0 +1,450 @@
|
||||
// #include "ug_mmbitext.h"
|
||||
// #include <algorithm>
|
||||
|
||||
// namespace Moses
|
||||
// {
|
||||
// using namespace ugdiss;
|
||||
// using namespace std;
|
||||
|
||||
// mmbitext::
|
||||
// pstats::
|
||||
// pstats()
|
||||
// : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
|
||||
// {}
|
||||
|
||||
// void
|
||||
// mmbitext::
|
||||
// pstats::
|
||||
// register_worker()
|
||||
// {
|
||||
// this->lock.lock();
|
||||
// ++this->in_progress;
|
||||
// this->lock.unlock();
|
||||
// }
|
||||
|
||||
// void
|
||||
// pstats::
|
||||
// release()
|
||||
// {
|
||||
// this->lock.lock();
|
||||
// if (this->in_progress-- == 1) // last one - >we're done
|
||||
// this->ready.notify_all();
|
||||
// this->lock.unlock();
|
||||
// }
|
||||
|
||||
// void
|
||||
// mmbitext::
|
||||
// open(string const base, string const L1, string L2)
|
||||
// {
|
||||
// T1.open(base+L1+".mct");
|
||||
// T2.open(base+L2+".mct");
|
||||
// Tx.open(base+L1+"-"+L2+".mam");
|
||||
// V1.open(base+L1+".tdx"); V1.iniReverseIndex();
|
||||
// V2.open(base+L2+".tdx"); V2.iniReverseIndex();
|
||||
// I1.open(base+L1+".sfa",&T1);
|
||||
// I2.open(base+L2+".sfa",&T2);
|
||||
// // lexscorer.open(base+L1+"-"+L2+".lex");
|
||||
// assert(T1.size() == T2.size());
|
||||
// }
|
||||
|
||||
|
||||
// mmbitext::
|
||||
// mmbitext()
|
||||
// : ag(NULL)
|
||||
// {
|
||||
|
||||
// }
|
||||
|
||||
// bool
|
||||
// mmbitext::
|
||||
// find_trg_phr_bounds(size_t const sid, size_t const start, size_t const stop,
|
||||
// size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
||||
// vector<uchar>* core_alignment, bool const flip) const
|
||||
// {
|
||||
// // if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
|
||||
// // a word on the core_alignment:
|
||||
// // since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1 < e2, respectively)
|
||||
// // are be definition unaligned, we store only the core alignment in *core_alignment
|
||||
// // it is up to the calling function to shift alignment points over for start positions
|
||||
// // of extracted phrases that start with a fringe word
|
||||
// char const* p = Tx.sntStart(sid);
|
||||
// char const* x = Tx.sntEnd(sid);
|
||||
// bitvector forbidden((flip ? T1 : T2).sntLen(sid));
|
||||
// size_t src,trg;
|
||||
// size_t lft = forbidden.size();
|
||||
// size_t rgt = 0;
|
||||
// vector<vector<ushort> > aln(T1.sntLen(sid));
|
||||
// while (p < x)
|
||||
// {
|
||||
// if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
|
||||
// else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
|
||||
// if (src < start || src >= stop)
|
||||
// forbidden.set(trg);
|
||||
// else
|
||||
// {
|
||||
// lft = min(lft,trg);
|
||||
// rgt = max(rgt,trg);
|
||||
// if (core_alignment)
|
||||
// {
|
||||
// if (flip) aln[trg].push_back(src);
|
||||
// else aln[src].push_back(trg);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// #if 0
|
||||
// cout << setw(5) << mctr << " " << setw(3) << xctr << " ";
|
||||
// for (size_t i = 0; i < forbidden.size(); ++i)
|
||||
// {
|
||||
// if (i == lft) cout << '(';
|
||||
// cout << (forbidden[i] ? 'x' : '-');
|
||||
// if (i == rgt) cout << ')';
|
||||
// }
|
||||
// cout << endl;
|
||||
// #endif
|
||||
|
||||
// for (size_t i = lft; i <= rgt; ++i)
|
||||
// if (forbidden[i])
|
||||
// return false;
|
||||
|
||||
// s2 = lft; for (s1 = s2; s1 && !forbidden[s1-1]; --s1);
|
||||
// e1 = rgt+1; for (e2 = e1; e2 < forbidden.size() && !forbidden[e2]; ++e2);
|
||||
|
||||
// if (lft > rgt) return false;
|
||||
// if (core_alignment)
|
||||
// {
|
||||
// core_alignment->clear();
|
||||
// if (flip)
|
||||
// {
|
||||
// for (size_t i = lft; i <= rgt; ++i)
|
||||
// {
|
||||
// sort(aln[i].begin(),aln[i].end());
|
||||
// BOOST_FOREACH(ushort x, aln[i])
|
||||
// {
|
||||
// core_alignment->push_back(i-lft);
|
||||
// core_alignment->push_back(x-start);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// for (size_t i = start; i < stop; ++i)
|
||||
// {
|
||||
// BOOST_FOREACH(ushort x, aln[i])
|
||||
// {
|
||||
// core_alignment->push_back(i-start);
|
||||
// core_alignment->push_back(x-lft);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return lft <= rgt;
|
||||
// }
|
||||
|
||||
// void
|
||||
// mmbitext::
|
||||
// prep(iter const& phrase)
|
||||
// {
|
||||
// prep2(phrase);
|
||||
// }
|
||||
|
||||
// sptr<mmbitext::pstats>
|
||||
// mmbitext::
|
||||
// prep2(iter const& phrase)
|
||||
// {
|
||||
// if (!ag)
|
||||
// {
|
||||
// ag = new agenda(*this);
|
||||
// ag->add_workers(20);
|
||||
// }
|
||||
// typedef boost::unordered_map<uint64_t,sptr<pstats> > pcache_t;
|
||||
// uint64_t pid = phrase.getPid();
|
||||
// pcache_t & cache(phrase.root == &this->I1 ? cache1 : cache2);
|
||||
// pcache_t::value_type entry(pid,sptr<pstats>());
|
||||
// pair<pcache_t::iterator,bool> foo = cache.emplace(entry);
|
||||
// if (foo.second) foo.first->second = ag->add_job(phrase, 1000);
|
||||
// return foo.first->second;
|
||||
// }
|
||||
|
||||
// sptr<mmbitext::pstats>
|
||||
// mmbitext::
|
||||
// lookup(iter const& phrase)
|
||||
// {
|
||||
// sptr<pstats> ret = prep2(phrase);
|
||||
// boost::unique_lock<boost::mutex> lock(ret->lock);
|
||||
// while (ret->in_progress)
|
||||
// ret->ready.wait(lock);
|
||||
// return ret;
|
||||
// }
|
||||
|
||||
// void
|
||||
// mmbitext::
|
||||
// agenda::
|
||||
// worker::
|
||||
// operator()()
|
||||
// {
|
||||
// uint64_t sid=0, offset=0, len=0; // of the source phrase
|
||||
// bool fwd=false; // source phrase is L1
|
||||
// sptr<mmbitext::pstats> stats;
|
||||
// size_t s1=0, s2=0, e1=0, e2=0;
|
||||
// for (; ag.get_task(sid,offset,len,fwd,stats); )
|
||||
// {
|
||||
// if (!stats) break;
|
||||
// vector<uchar> aln;
|
||||
// if (!ag.bitext.find_trg_phr_bounds
|
||||
// (sid, offset, offset+len, s1, s2, e1, e2, fwd ? &aln : NULL, !fwd))
|
||||
// {
|
||||
// stats->release();
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// stats->lock.lock();
|
||||
// stats->good += 1;
|
||||
// stats->lock.unlock();
|
||||
|
||||
// for (size_t k = 0; k < aln.size(); k += 2)
|
||||
// aln[k] += s2 - s1;
|
||||
// Token const* o = (fwd ? ag.bitext.T2 : ag.bitext.T1).sntStart(sid);
|
||||
// float sample_weight = 1./((s2-s1+1)*(e2-e1+1));
|
||||
// for (size_t s = s1; s <= s2; ++s)
|
||||
// {
|
||||
// iter b(&(fwd ? ag.bitext.I2 : ag.bitext.I1));
|
||||
// for (size_t i = s; i < e1; ++i)
|
||||
// assert(b.extend(o[i].id()));
|
||||
// for (size_t i = e1; i <= e2; ++i)
|
||||
// {
|
||||
// stats->add(b,sample_weight,aln);
|
||||
// if (i < e2) assert(b.extend(o[i].id()));
|
||||
// }
|
||||
// if (fwd && s < s2)
|
||||
// for (size_t k = 0; k < aln.size(); k += 2)
|
||||
// --aln[k];
|
||||
// }
|
||||
// stats->release();
|
||||
// }
|
||||
// }
|
||||
|
||||
// void
|
||||
// mmbitext::
|
||||
// pstats::
|
||||
// add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a)
|
||||
// {
|
||||
// this->lock.lock();
|
||||
// jstats& entry = this->trg[trg_phrase.getPid()];
|
||||
// this->lock.unlock();
|
||||
// entry.add(w,a);
|
||||
// }
|
||||
|
||||
// mmbitext::
|
||||
// agenda::
|
||||
// agenda(mmbitext const& thebitext)
|
||||
// : shutdown(false), doomed(0), bitext(thebitext)
|
||||
// {
|
||||
|
||||
// }
|
||||
|
||||
// mmbitext::
|
||||
// agenda::
|
||||
// ~agenda()
|
||||
// {
|
||||
// this->lock.lock();
|
||||
// this->shutdown = true;
|
||||
// this->ready.notify_all();
|
||||
// this->lock.unlock();
|
||||
// for (size_t i = 0; i < workers.size(); ++i)
|
||||
// workers[i]->join();
|
||||
// }
|
||||
|
||||
// mmbitext::
|
||||
// ~mmbitext()
|
||||
// {
|
||||
// if (ag) delete ag;
|
||||
// }
|
||||
|
||||
// sptr<mmbitext::pstats>
|
||||
// mmbitext::
|
||||
// agenda::
|
||||
// add_job(mmbitext::iter const& phrase, size_t const max_samples)
|
||||
// {
|
||||
// static boost::posix_time::time_duration nodelay(0,0,0,0);
|
||||
|
||||
// job j;
|
||||
// j.stats.reset(new mmbitext::pstats());
|
||||
// j.stats->register_worker();
|
||||
// j.stats->raw_cnt = phrase.approxOccurrenceCount();
|
||||
// j.max_samples = max_samples;
|
||||
// j.next = phrase.lower_bound(-1);
|
||||
// j.stop = phrase.upper_bound(-1);
|
||||
// j.len = phrase.size();
|
||||
// j.ctr = 0;
|
||||
// j.fwd = phrase.root == &bitext.I1;
|
||||
|
||||
// boost::unique_lock<boost::mutex> lk(this->lock);
|
||||
// joblist.push_back(j);
|
||||
// if (joblist.size() == 1)
|
||||
// {
|
||||
// for (size_t i = 0; i < workers.size(); ++i)
|
||||
// {
|
||||
// if (workers[i]->timed_join(nodelay))
|
||||
// {
|
||||
// workers[i] = sptr<boost::thread>(new boost::thread(worker(*this)));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return j.stats;
|
||||
// }
|
||||
|
||||
// bool
|
||||
// mmbitext::
|
||||
// agenda::
|
||||
// get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
|
||||
// bool & fwd, sptr<mmbitext::pstats> & stats)
|
||||
// {
|
||||
// boost::unique_lock<boost::mutex> lock(this->lock);
|
||||
// if (this->doomed || this->shutdown)
|
||||
// {
|
||||
// if (this->doomed) --this->doomed;
|
||||
// return false;
|
||||
// }
|
||||
// // while (joblist.empty())
|
||||
// // {
|
||||
// // cerr << "no jobs" << endl;
|
||||
// // this->ready.wait(lock);
|
||||
// // if (this->doomed || this->shutdown)
|
||||
// // {
|
||||
// // if (this->doomed) --this->doomed;
|
||||
// // return false;
|
||||
// // }
|
||||
// // }
|
||||
// while (joblist.size())
|
||||
// {
|
||||
// if (joblist.front().step(sid,offset))
|
||||
// {
|
||||
// job const& j = joblist.front();
|
||||
// len = j.len;
|
||||
// fwd = j.fwd;
|
||||
// stats = j.stats;
|
||||
// stats->register_worker();
|
||||
// return true;
|
||||
// }
|
||||
// joblist.front().stats->release();
|
||||
// joblist.pop_front();
|
||||
// }
|
||||
// stats.reset();
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// bool
|
||||
// mmbitext::
|
||||
// agenda::
|
||||
// job::
|
||||
// step(uint64_t & sid, uint64_t & offset)
|
||||
// {
|
||||
// while (next < stop && stats->good < max_samples)
|
||||
// {
|
||||
// next = tightread(tightread(next,stop,sid),stop,offset);
|
||||
// {
|
||||
// boost::lock_guard<boost::mutex> lock(stats->lock);
|
||||
// if (stats->raw_cnt == ctr) ++stats->raw_cnt;
|
||||
// size_t rnum = randInt(stats->raw_cnt - ctr++);
|
||||
// // cout << stats->raw_cnt << " " << ctr-1 << " "
|
||||
// // << rnum << " " << max_samples - stats->good << endl;
|
||||
// if (rnum < max_samples - stats->good)
|
||||
// {
|
||||
// stats->sample_cnt++;
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
|
||||
|
||||
// void
|
||||
// mmbitext::
|
||||
// agenda::
|
||||
// add_workers(int n)
|
||||
// {
|
||||
// static boost::posix_time::time_duration nodelay(0,0,0,0);
|
||||
// boost::lock_guard<boost::mutex> lock(this->lock);
|
||||
// // house keeping: remove all workers that have finished
|
||||
// for (size_t i = 0; i < workers.size(); )
|
||||
// {
|
||||
// if (workers[i]->timed_join(nodelay))
|
||||
// {
|
||||
// if (i + 1 < workers.size())
|
||||
// workers[i].swap(workers.back());
|
||||
// workers.pop_back();
|
||||
// }
|
||||
// else ++i;
|
||||
// }
|
||||
// if (n < 0)
|
||||
// {
|
||||
// this->doomed -= n;
|
||||
// }
|
||||
// else
|
||||
// {
|
||||
// for (int i = 0; i < n; ++i)
|
||||
// {
|
||||
// sptr<boost::thread> w(new boost::thread(worker(*this)));
|
||||
// workers.push_back(w);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// mmbitext::
|
||||
// jstats::
|
||||
// jstats()
|
||||
// {
|
||||
// my_aln.reserve(1);
|
||||
// }
|
||||
|
||||
// mmbitext::
|
||||
// jstats::
|
||||
// jstats(jstats const& other)
|
||||
// {
|
||||
// my_rcnt = other.rcnt();
|
||||
// my_wcnt = other.wcnt();
|
||||
// my_aln = other.aln();
|
||||
// }
|
||||
|
||||
// void
|
||||
// mmbitext::
|
||||
// jstats::
|
||||
// add(float w, vector<uchar> const& a)
|
||||
// {
|
||||
// boost::lock_guard<boost::mutex> lk(this->lock);
|
||||
// my_rcnt += 1;
|
||||
// my_wcnt += w;
|
||||
// if (a.size())
|
||||
// {
|
||||
// size_t i = 0;
|
||||
// while (i < my_aln.size() && my_aln[i].second != a) ++i;
|
||||
// if (i == my_aln.size())
|
||||
// my_aln.push_back(pair<size_t,vector<uchar> >(1,a));
|
||||
// else
|
||||
// my_aln[i].first++;
|
||||
// if (my_aln[i].first > my_aln[i/2].first)
|
||||
// push_heap(my_aln.begin(),my_aln.begin()+i+1);
|
||||
// }
|
||||
// }
|
||||
|
||||
// uint32_t
|
||||
// mmbitext::
|
||||
// jstats::
|
||||
// rcnt() const
|
||||
// { return my_rcnt; }
|
||||
|
||||
// float
|
||||
// mmbitext::
|
||||
// jstats::
|
||||
// wcnt() const
|
||||
// { return my_wcnt; }
|
||||
|
||||
// vector<pair<size_t, vector<uchar> > > const&
|
||||
// mmbitext::
|
||||
// jstats::
|
||||
// aln() const
|
||||
// { return my_aln; }
|
||||
|
||||
// }
|
||||
|
191
moses/mm/ug_mmbitext.h
Normal file
191
moses/mm/ug_mmbitext.h
Normal file
@ -0,0 +1,191 @@
|
||||
#ifndef __ug_mm_bitext_h
|
||||
#define __ug_mm_bitext_h
|
||||
// Memory-mapped, word-aligned bitext
|
||||
// Written by Ulrich Germann
|
||||
|
||||
// things we can do to speed up things:
|
||||
// - set up threads at startup time that force the
|
||||
// data in to memory sequentially
|
||||
//
|
||||
// - use multiple agendas for better load balancing and to avoid
|
||||
// competition for locks
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <iomanip>
|
||||
#include <algorithm>
|
||||
|
||||
#include <boost/unordered_map.hpp>
|
||||
#include <boost/foreach.hpp>
|
||||
#include <boost/thread.hpp>
|
||||
|
||||
#include "moses/generic/sorting/VectorIndexSorter.h"
|
||||
#include "moses/generic/sampling/Sampling.h"
|
||||
#include "moses/generic/file_io/ug_stream.h"
|
||||
|
||||
#include "ug_typedefs.h"
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "ug_mm_tsa.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_corpus_token.h"
|
||||
#include "tpt_pickler.h"
|
||||
|
||||
using namespace ugdiss;
|
||||
using namespace std;
|
||||
namespace Moses {
|
||||
|
||||
typedef L2R_Token<SimpleWordId> Token;
|
||||
typedef mmTSA<Token>::tree_iterator iter;
|
||||
|
||||
class mmbitext
|
||||
{
|
||||
public:
|
||||
typedef mmTSA<Token>::tree_iterator iter;
|
||||
class pstats; // one-sided phrase statistics
|
||||
class jstats; // phrase pair ("joint") statistics
|
||||
class agenda
|
||||
{
|
||||
boost::mutex lock;
|
||||
boost::condition_variable ready;
|
||||
class job;
|
||||
class worker;
|
||||
list<job> joblist;
|
||||
vector<sptr<boost::thread> > workers;
|
||||
bool shutdown;
|
||||
size_t doomed;
|
||||
public:
|
||||
mmbitext const& bitext;
|
||||
agenda(mmbitext const& bitext);
|
||||
~agenda();
|
||||
void add_workers(int n);
|
||||
sptr<pstats> add_job(mmbitext::iter const& phrase,
|
||||
size_t const max_samples);
|
||||
bool get_task(uint64_t & sid, uint64_t & offset, uint64_t & len,
|
||||
bool & fwd, sptr<mmbitext::pstats> & stats);
|
||||
};
|
||||
|
||||
// stores the list of unfinished jobs;
|
||||
// maintains a pool of workers and assigns the jobs to them
|
||||
|
||||
agenda* ag;
|
||||
mmTtrack<char> Tx; // word alignments
|
||||
mmTtrack<Token> T1,T2; // token tracks
|
||||
TokenIndex V1,V2; // vocabs
|
||||
mmTSA<Token> I1,I2; // suffix arrays
|
||||
|
||||
/// given the source phrase sid[start:stop]
|
||||
// find the possible start (s1 .. s2) and end (e1 .. e2)
|
||||
// points of the target phrase; if non-NULL, store word
|
||||
// alignments in *core_alignment. If /flip/, source phrase is
|
||||
// L2.
|
||||
bool
|
||||
find_trg_phr_bounds
|
||||
(size_t const sid, size_t const start, size_t const stop,
|
||||
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
||||
vector<uchar> * core_alignment, bool const flip) const;
|
||||
|
||||
boost::unordered_map<uint64_t,sptr<pstats> > cache1,cache2;
|
||||
private:
|
||||
sptr<pstats>
|
||||
prep2(iter const& phrase);
|
||||
public:
|
||||
mmbitext();
|
||||
~mmbitext();
|
||||
|
||||
void open(string const base, string const L1, string const L2);
|
||||
|
||||
sptr<pstats> lookup(iter const& phrase);
|
||||
void prep(iter const& phrase);
|
||||
};
|
||||
|
||||
// "joint" (i.e., phrase pair) statistics
|
||||
class
|
||||
mmbitext::
|
||||
jstats
|
||||
{
|
||||
uint32_t my_rcnt; // unweighted count
|
||||
float my_wcnt; // weighted count
|
||||
vector<pair<size_t, vector<uchar> > > my_aln;
|
||||
boost::mutex lock;
|
||||
public:
|
||||
jstats();
|
||||
jstats(jstats const& other);
|
||||
uint32_t rcnt() const;
|
||||
float wcnt() const;
|
||||
vector<pair<size_t, vector<uchar> > > const & aln() const;
|
||||
void add(float w, vector<uchar> const& a);
|
||||
};
|
||||
|
||||
// struct
|
||||
// mmbitext:
|
||||
// phrasepair
|
||||
// {
|
||||
// Token const* t;
|
||||
// size_t len;
|
||||
// size_t cnt;
|
||||
// float fwd, bwd;
|
||||
|
||||
// map<uint32_t,uint32_t> aln;
|
||||
// string toString(TokenIndex const& V) const;
|
||||
// bool operator<(phrase const& other) const;
|
||||
// bool operator>(phrase const& other) const;
|
||||
// phrase(pair<pair<Token const*, size_t>,jstats> const & foo);
|
||||
|
||||
// };
|
||||
|
||||
|
||||
struct
|
||||
mmbitext::
|
||||
pstats
|
||||
{
|
||||
boost::mutex lock; // for parallel gathering of stats
|
||||
boost::condition_variable ready; // consumers can wait for this data structure to be ready.
|
||||
|
||||
size_t raw_cnt; // (approximate) raw occurrence count
|
||||
size_t sample_cnt; // number of instances selected during sampling
|
||||
size_t good; // number of selected instances with valid word alignments
|
||||
size_t sum_pairs;
|
||||
// size_t snt_cnt;
|
||||
// size_t sample_snt;
|
||||
size_t in_progress; // keeps track of how many threads are currently working on this
|
||||
boost::unordered_map<uint64_t, jstats> trg;
|
||||
pstats();
|
||||
// vector<phrase> nbest;
|
||||
// void select_nbest(size_t const N=10);
|
||||
void release();
|
||||
void register_worker();
|
||||
void add(mmbitext::iter const& trg_phrase, float const w, vector<uchar> const& a);
|
||||
};
|
||||
|
||||
class
|
||||
mmbitext::
|
||||
agenda::
|
||||
worker
|
||||
{
|
||||
agenda& ag;
|
||||
public:
|
||||
worker(agenda& a);
|
||||
void operator()();
|
||||
|
||||
};
|
||||
|
||||
class
|
||||
mmbitext::
|
||||
agenda::
|
||||
job
|
||||
{
|
||||
public:
|
||||
char const* next;
|
||||
char const* stop;
|
||||
size_t max_samples;
|
||||
size_t ctr;
|
||||
size_t len;
|
||||
bool fwd;
|
||||
sptr<mmbitext::pstats> stats;
|
||||
bool step(uint64_t & sid, uint64_t & offset);
|
||||
};
|
||||
|
||||
}
|
||||
#endif
|
||||
|
15
moses/mm/ug_tsa_array_entry.cc
Normal file
15
moses/mm/ug_tsa_array_entry.cc
Normal file
@ -0,0 +1,15 @@
|
||||
#include "ug_tsa_array_entry.h"
|
||||
#include "ug_ttrack_position.h"
|
||||
#include "moses/generic/sampling/Sampling.h"
|
||||
|
||||
// (c) 2007-2010 Ulrich Germann
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
namespace tsa
|
||||
{
|
||||
ArrayEntry::ArrayEntry() : ttrack::Position(0,0), pos(NULL), next(NULL) {};
|
||||
ArrayEntry::ArrayEntry(char const* p) : ttrack::Position(0,0), pos(NULL), next(p) {};
|
||||
|
||||
}
|
||||
}
|
83
moses/mm/ug_tsa_array_entry.h
Normal file
83
moses/mm/ug_tsa_array_entry.h
Normal file
@ -0,0 +1,83 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007-2010 Ulrich Germann
|
||||
// implementation of stuff related to ArrayEntries
|
||||
// this file should only be included via ug_tsa_base.h,
|
||||
// never by itself
|
||||
#ifndef __ug_tsa_array_entry_h
|
||||
#define __ug_tsa_array_entry_h
|
||||
#include "ug_ttrack_position.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
namespace tsa
|
||||
{
|
||||
class
|
||||
ArrayEntry : public ttrack::Position
|
||||
{
|
||||
public:
|
||||
char const* pos;
|
||||
char const* next;
|
||||
ArrayEntry();
|
||||
|
||||
ArrayEntry(char const* p);
|
||||
|
||||
template<typename TSA_TYPE>
|
||||
ArrayEntry(TSA_TYPE const* S, char const* p);
|
||||
|
||||
};
|
||||
|
||||
template<typename TSA_TYPE>
|
||||
ArrayEntry::
|
||||
ArrayEntry(TSA_TYPE const* S, char const* p)
|
||||
{
|
||||
S->readEntry(p,*this);
|
||||
}
|
||||
|
||||
// template<typename TSA_TYPE>
|
||||
// class SamplingArrayEntryIterator
|
||||
// : public tsa::ArrayEntry
|
||||
// {
|
||||
// size_t const N; // (approximate) total number of occurrences
|
||||
// size_t const samplesize; // how many samples to chose from the range
|
||||
// size_t const sampled; // how many occurrences we've looked at so far
|
||||
// size_t const chosen; // how many we have chosen
|
||||
// TSA_TYPE const* root; // the underlying TSA
|
||||
// char const* stop; // end of the range
|
||||
// public:
|
||||
// SamplingArrayEntryIterator(TSA_TYPE::tree_iterator const& m, size_t const s);
|
||||
// bool step(); // returns false when at end of range
|
||||
// bool done(); //
|
||||
// };
|
||||
|
||||
// template<typename TSA_TYPE>
|
||||
// SamplingArrayEntryIterator::
|
||||
// SamplingArrayEntryIterator(typename TSA_TYPE::tree_iterator const& m, size_t const s)
|
||||
// : ArrayEntry<TSA_TYPE>(m.lower_bound(-1))
|
||||
// , N(m.approxOccurrenceCount())
|
||||
// , samplesize(min(s,N))
|
||||
// , sampled(0)
|
||||
// , chosen(0)
|
||||
// , root(m.root)
|
||||
// , stop(m.upper_bound(-1))
|
||||
// { }
|
||||
|
||||
// template<typename TSA_TYPE>
|
||||
// bool
|
||||
// SamplingArrayEntryIterator::
|
||||
// step()
|
||||
// {
|
||||
// while (chosen < samplesize && next < stop)
|
||||
// {
|
||||
// root->readEntry(next,*this);
|
||||
// if (randInt(N - sampled++) < samplesize - chosen)
|
||||
// {
|
||||
// ++chosen;
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
|
||||
} // end of namespace tsa
|
||||
} // end of namespace ugdiss
|
||||
#endif
|
827
moses/mm/ug_tsa_base.h
Normal file
827
moses/mm/ug_tsa_base.h
Normal file
@ -0,0 +1,827 @@
|
||||
// -*- c++ -*-
|
||||
// Base class for Token Sequence Arrays
|
||||
// (c) 2007-2010 Ulrich Germann. All rights reserved.
|
||||
#ifndef _ug_tsa_base_h
|
||||
#define _ug_tsa_base_h
|
||||
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
|
||||
#include <boost/iostreams/device/mapped_file.hpp>
|
||||
|
||||
#include "tpt_tokenindex.h"
|
||||
#include "ug_ttrack_base.h"
|
||||
#include "ug_corpus_token.h"
|
||||
#include "ug_tsa_tree_iterator.h"
|
||||
#include "ug_tsa_array_entry.h"
|
||||
#include "ug_tsa_bitset_cache.h"
|
||||
#include "ug_typedefs.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
using namespace std;
|
||||
using namespace boost;
|
||||
namespace bio=boost::iostreams;
|
||||
|
||||
template<typename TKN>
|
||||
TKN const*
|
||||
next(TKN const* x)
|
||||
{
|
||||
return static_cast<TKN const*>(x ? x->next() : NULL);
|
||||
}
|
||||
|
||||
/** Base class for [T]oken [S]equence [A]arrays, a generalization of
|
||||
* Suffix arrays.
|
||||
*
|
||||
* Token types (TKN) must provide a number of functions, see the
|
||||
* class SimpleWordId (as a simple example of a "core token base
|
||||
* class") and the template class L2R_Token (a class derived from
|
||||
* its template parameter (e.g. SimpleWordId) that handles the
|
||||
* ordering of sequences. Both are decleared/defined in
|
||||
* ug_corpus_token.{h|cc}
|
||||
*/
|
||||
template<typename TKN>
|
||||
class TSA
|
||||
{
|
||||
|
||||
public:
|
||||
virtual ~TSA() {};
|
||||
typedef TSA_tree_iterator<TKN> tree_iterator;
|
||||
// allows iteration over the array as if it were a trie
|
||||
typedef tsa::ArrayEntry ArrayEntry;
|
||||
/* an entry in the array, for iteration over all occurrences of a
|
||||
* particular sequence */
|
||||
// typedef boost::dynamic_bitset<uint64_t> bitset;
|
||||
typedef shared_ptr<bitvector> bitset_pointer;
|
||||
typedef TKN Token;
|
||||
typedef BitSetCache<TSA<TKN> > BSC_t;
|
||||
/* to allow caching of bit vectors that are expensive to create on
|
||||
* the fly */
|
||||
|
||||
friend class TSA_tree_iterator<TKN>;
|
||||
|
||||
protected:
|
||||
Ttrack<TKN> const* corpus; // pointer to the underlying corpus
|
||||
char const* startArray; // beginning ...
|
||||
char const* endArray; // ... and end ...
|
||||
// of memory block storing the actual TSA
|
||||
|
||||
size_t corpusSize;
|
||||
/** size of the corpus (in number of sentences) of the corpus
|
||||
* underlying the sequence array.
|
||||
*
|
||||
* ATTENTION: This number may differ from
|
||||
* corpus->size(), namely when the
|
||||
* suffix array is based on a subset
|
||||
* of the sentences of /corpus/.
|
||||
*/
|
||||
|
||||
id_type numTokens;
|
||||
/** size of the corpus (in number of tokens) of the corpus underlying the
|
||||
* sequence array.
|
||||
*
|
||||
* ATTENTION: This number may differ from corpus->numTokens(), namely when
|
||||
* the suffix array is based on a subset of the sentences of
|
||||
* /corpus/.
|
||||
*/
|
||||
|
||||
id_type indexSize;
|
||||
// (number of entries +1) in the index of root-level nodes
|
||||
|
||||
size_t BitSetCachingThreshold;
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// private member functions:
|
||||
|
||||
/** @return an index position approximately /fraction/ between
|
||||
* /startRange/ and /endRange/.
|
||||
*/
|
||||
virtual
|
||||
char const*
|
||||
index_jump(char const* startRange,
|
||||
char const* stopRange,
|
||||
float fraction) const = 0;
|
||||
|
||||
/** return the index position of the first item that
|
||||
* is equal to or includes [refStart,refStart+refLen) as a prefix
|
||||
*/
|
||||
char const*
|
||||
find_start(char const* lo, char const* const upX,
|
||||
TKN const* const refStart, int refLen,
|
||||
size_t d) const;
|
||||
|
||||
/** return the index position of the first item that is greater than
|
||||
* [refStart,refStart+refLen) and does not include it as a prefix
|
||||
*/
|
||||
char const*
|
||||
find_end(char const* lo, char const* const upX,
|
||||
TKN const* const refStart, int refLen,
|
||||
size_t d) const;
|
||||
|
||||
/** return the index position of the first item that is longer than
|
||||
* [refStart,refStart+refLen) and includes it as a prefix
|
||||
*/
|
||||
char const*
|
||||
find_longer(char const* lo, char const* const upX,
|
||||
TKN const* const refStart, int refLen,
|
||||
size_t d) const;
|
||||
|
||||
/** Returns a char const* pointing to the position in the data block
|
||||
* where the first item starting with token /id/ is located.
|
||||
*/
|
||||
virtual
|
||||
char const*
|
||||
getLowerBound(id_type id) const = 0;
|
||||
|
||||
virtual
|
||||
char const*
|
||||
getUpperBound(id_type id) const = 0;
|
||||
|
||||
public:
|
||||
shared_ptr<BSC_t> bsc;
|
||||
|
||||
char const* arrayStart() const { return startArray; }
|
||||
char const* arrayEnd() const { return endArray; }
|
||||
|
||||
/** @return a pointer to the beginning of the index entry range covering
|
||||
* [keyStart,keyStop)
|
||||
*/
|
||||
char const*
|
||||
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const;
|
||||
char const*
|
||||
lower_bound(TKN const* keyStart, TKN const* keyStop) const;
|
||||
|
||||
char const*
|
||||
lower_bound(TKN const* keyStart, int keyLen) const;
|
||||
|
||||
/** @return a pointer to the end point of the index entry range covering
|
||||
* [keyStart,keyStop)
|
||||
*/
|
||||
char const*
|
||||
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const;
|
||||
|
||||
char const*
|
||||
upper_bound(TKN const* keyStart, int keyLength) const;
|
||||
|
||||
|
||||
/** dump all suffixes in order to /out/ */
|
||||
void dump(ostream& out, TokenIndex const& T) const;
|
||||
|
||||
/** fill the dynamic bit set with true for all sentences that contain
|
||||
* /phrase/.
|
||||
* @return the raw number of occurrences.
|
||||
*/
|
||||
count_type
|
||||
fillBitSet(vector<TKN> const& phrase, bdBitset& dest) const;
|
||||
|
||||
count_type
|
||||
fillBitSet(TKN const* key, size_t keyLen, bdBitset& dest) const;
|
||||
|
||||
count_type
|
||||
setBits(char const* startRange, char const* endRange,
|
||||
boost::dynamic_bitset<uint64_t>& bs) const;
|
||||
|
||||
void
|
||||
setTokenBits(char const* startRange, char const* endRange, size_t len,
|
||||
bitvector& bs) const;
|
||||
|
||||
/** read the sentence ID into /sid/
|
||||
* @return position of associated offset.
|
||||
*
|
||||
* The function provides an abstraction that uses the right
|
||||
* interpretation of the position based on the subclass
|
||||
* (memory-mapped or in-memory).
|
||||
*/
|
||||
virtual
|
||||
char const*
|
||||
readSid(char const* p, char const* q, id_type& sid) const = 0;
|
||||
|
||||
virtual
|
||||
char const*
|
||||
readSid(char const* p, char const* q, uint64_t& sid) const = 0;
|
||||
|
||||
/** read the offset part of the index entry into /offset/
|
||||
* @return position of the next entry in the index.
|
||||
*
|
||||
* The function provides an abstraction that uses the right
|
||||
* interpretation of the position based on the subclass
|
||||
* (memory-mapped or in-memory).
|
||||
*/
|
||||
virtual
|
||||
char const*
|
||||
readOffset(char const* p, char const* q, uint16_t& offset) const = 0;
|
||||
|
||||
virtual
|
||||
char const*
|
||||
readOffset(char const* p, char const* q, uint64_t& offset) const = 0;
|
||||
|
||||
/** @return sentence count
|
||||
*/
|
||||
count_type
|
||||
sntCnt(char const* p, char const* const q) const;
|
||||
|
||||
count_type
|
||||
rawCnt2(TKN const* keyStart, size_t keyLen) const;
|
||||
|
||||
/** @return raw occurrence count
|
||||
*
|
||||
* depending on the subclass, this is constant time (imTSA) or
|
||||
* linear in in the number of occurrences (mmTSA).
|
||||
*/
|
||||
virtual
|
||||
count_type
|
||||
rawCnt(char const* p, char const* const q) const = 0;
|
||||
|
||||
/** get both sentence and word counts.
|
||||
*
|
||||
* Avoids having to go over the byte range representing the range
|
||||
* of suffixes in question twice when dealing with memory-mapped
|
||||
* suffix arrays.
|
||||
*/
|
||||
virtual
|
||||
void
|
||||
getCounts(char const* p, char const* const q,
|
||||
count_type& sids, count_type& raw) const = 0;
|
||||
|
||||
string
|
||||
suffixAt(char const* p, TokenIndex const* V=NULL, size_t maxlen=0)
|
||||
const;
|
||||
|
||||
string
|
||||
suffixAt(ArrayEntry const& I, TokenIndex const* V=NULL, size_t maxlen=0)
|
||||
const;
|
||||
|
||||
tsa::ArrayEntry& readEntry(char const* p, tsa::ArrayEntry& I) const;
|
||||
|
||||
/** return pointer to the end of the data block */
|
||||
char const* dataEnd() const;
|
||||
|
||||
bool sanityCheck1() const;
|
||||
|
||||
/** Return an ID that represents a given phrase;
|
||||
This should NEVER be 0!
|
||||
Structure of a phrase ID:
|
||||
leftmost 32 bits: sentence ID in the corpus
|
||||
next 16 bits: offset from the start of the sentence
|
||||
next 16 bits: length of the phrase
|
||||
*/
|
||||
uint64_t
|
||||
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
|
||||
typename vector<TKN>::const_iterator const& pstop) const;
|
||||
|
||||
uint64_t
|
||||
getSequenceId(TKN const* t, ushort plen) const;
|
||||
|
||||
/** Return the phrase represented by phrase ID pid_ */
|
||||
string
|
||||
getSequence(uint64_t pid, TokenIndex const& V) const;
|
||||
|
||||
/** Return the phrase represented by phrase ID pid_ */
|
||||
vector<TKN>
|
||||
getSequence(uint64_t pid) const;
|
||||
|
||||
TKN const*
|
||||
getSequenceStart(uint64_t) const;
|
||||
|
||||
ushort
|
||||
getSequenceLength(uint64_t) const;
|
||||
|
||||
size_t
|
||||
getCorpusSize() const;
|
||||
|
||||
Ttrack<TKN> const*
|
||||
getCorpus() const;
|
||||
|
||||
bitset_pointer
|
||||
getBitSet(TKN const* startKey, size_t keyLen) const;
|
||||
|
||||
shared_ptr<bitvector>
|
||||
findTree(TKN const* treeStart, TKN const* treeEnd,
|
||||
bitvector const* filter) const;
|
||||
|
||||
size_t markOccurrences(char const* lo, char const* up, size_t len,
|
||||
bitvector& bitset,
|
||||
bool markOnlyStartPosition) const;
|
||||
|
||||
bool
|
||||
findBranches(TKN const* base, bitvector const& terminals,
|
||||
vector<tree_iterator>& dest) const;
|
||||
|
||||
double aveIndexEntrySize() const
|
||||
{
|
||||
return (endArray-startArray)/double(numTokens);
|
||||
}
|
||||
|
||||
public:
|
||||
// virtual
|
||||
sptr<TSA_tree_iterator<TKN> >
|
||||
find(TKN const* start, size_t len) const
|
||||
{
|
||||
typedef TSA_tree_iterator<TKN> iter;
|
||||
sptr<iter> ret(new iter(this));
|
||||
size_t i = 0;
|
||||
while (i < len && ret->extend(start[i])) ++i;
|
||||
if (i < len) ret.reset();
|
||||
return ret;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// ======================================================================
|
||||
|
||||
// template<typename TOKEN>
|
||||
// sptr<TSA_tree_iterator<TOKEN> >
|
||||
// TSA<TOKEN>::
|
||||
// find(TOKEN const* start, size_t len) const
|
||||
// {
|
||||
// typedef TSA_tree_iterator<TOKEN> iter;
|
||||
// sptr<iter> ret(new iter(this));
|
||||
// size_t i = 0;
|
||||
// while (i < len && ret->extend(start[i])) ++i;
|
||||
// if (i < len) ret.reset();
|
||||
// return ret;
|
||||
// }
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
/** fill the dynamic bitset with information as to which sentences
|
||||
* the phrase occurs in
|
||||
* @return number of total occurrences of the phrase in the corpus
|
||||
*/
|
||||
template<typename TKN>
|
||||
count_type
|
||||
TSA<TKN>::
|
||||
fillBitSet(vector<TKN> const& key,
|
||||
bitvector& bitset) const
|
||||
{
|
||||
if (!key.size()) return 0;
|
||||
return fillBitset(&(key[0]),key.size(),bitset);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** fill the dynamic bitset with information as to which sentences
|
||||
* the phrase occurs in
|
||||
* @return number of total occurrences of the phrase in the corpus
|
||||
*/
|
||||
template<typename TKN>
|
||||
count_type
|
||||
TSA<TKN>::
|
||||
fillBitSet(TKN const* key, size_t keyLen,
|
||||
bitvector& bitset) const
|
||||
{
|
||||
char const* lo = lower_bound(key,keyLen);
|
||||
char const* up = upper_bound(key,keyLen);
|
||||
bitset.resize(corpus->size());
|
||||
bitset.reset();
|
||||
return setBits(lo,up,bitset);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
count_type
|
||||
TSA<TKN>::
|
||||
setBits(char const* startRange, char const* endRange,
|
||||
bitvector& bs) const
|
||||
{
|
||||
count_type wcount=0;
|
||||
char const* p = startRange;
|
||||
id_type sid;
|
||||
ushort off;
|
||||
while (p < endRange)
|
||||
{
|
||||
p = readSid(p,endRange,sid);
|
||||
p = readOffset(p,endRange,off);
|
||||
bs.set(sid);
|
||||
wcount++;
|
||||
}
|
||||
return wcount;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
void
|
||||
TSA<TKN>::
|
||||
setTokenBits(char const* startRange, char const* endRange, size_t len,
|
||||
bitvector& bs) const
|
||||
{
|
||||
ArrayEntry I;
|
||||
I.next = startRange;
|
||||
do {
|
||||
readEntry(I.next,I);
|
||||
Token const* t = corpus->getToken(I);
|
||||
Token const* stop = t->stop(*corpus,I.sid);
|
||||
for (size_t i = 1; i < len; ++i)
|
||||
{
|
||||
assert(t != stop);
|
||||
t = t->next();
|
||||
}
|
||||
assert(t != stop);
|
||||
bs.set(t - corpus->sntStart(0));
|
||||
} while (I.next != endRange);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
count_type
|
||||
TSA<TKN>::
|
||||
sntCnt(char const* p, char const* const q) const
|
||||
{
|
||||
id_type sid; uint16_t off;
|
||||
bitvector check(corpus->size());
|
||||
while (p < q)
|
||||
{
|
||||
p = readSid(p,q,sid);
|
||||
p = readOffset(p,q,off);
|
||||
check.set(sid);
|
||||
}
|
||||
return check.count();
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/** return the lower bound (first matching entry)
|
||||
* of the token range matching [startKey,endKey)
|
||||
*/
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
find_start(char const* lo, char const* const upX,
|
||||
TKN const* const refStart, int refLen,
|
||||
size_t d) const
|
||||
{
|
||||
char const* up = upX;
|
||||
if (lo >= up) return NULL;
|
||||
int x;
|
||||
ArrayEntry I;
|
||||
while (lo < up)
|
||||
{
|
||||
readEntry(index_jump(lo,up,.5),I);
|
||||
x = corpus->cmp(I,refStart,refLen,d);
|
||||
if (x >= 0) up = I.pos;
|
||||
else lo = I.next;
|
||||
}
|
||||
assert(lo==up);
|
||||
if (lo < upX)
|
||||
{
|
||||
readEntry(lo,I);
|
||||
x = corpus->cmp(I,refStart,refLen,d);
|
||||
}
|
||||
// return (x >= 0) ? lo : NULL;
|
||||
return (x == 0 || x == 1) ? lo : NULL;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/** return the upper bound (first entry beyond)
|
||||
* of the token range matching [startKey,endKey)
|
||||
*/
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
find_end(char const* lo, char const* const upX,
|
||||
TKN const* const refStart, int refLen,
|
||||
size_t d) const
|
||||
|
||||
{
|
||||
char const* up = upX;
|
||||
if (lo >= up) return NULL;
|
||||
int x;
|
||||
ArrayEntry I;
|
||||
// float ratio = .1;
|
||||
while (lo < up)
|
||||
{
|
||||
readEntry(index_jump(lo,up,.1),I);
|
||||
x = corpus->cmp(I,refStart,refLen,d);
|
||||
if (x == 2) up = I.pos;
|
||||
else lo = I.next;
|
||||
// ratio = .5;
|
||||
}
|
||||
assert(lo==up);
|
||||
if (lo < upX)
|
||||
{
|
||||
readEntry(lo,I);
|
||||
x = corpus->cmp(I,refStart,refLen,d);
|
||||
}
|
||||
return (x == 2) ? up : upX;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/** return the first entry that has the prefix [refStart,refStart+refLen)
|
||||
* but continues on
|
||||
*/
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
find_longer(char const* lo, char const* const upX,
|
||||
TKN const* const refStart, int refLen,
|
||||
size_t d) const
|
||||
{
|
||||
char const* up = upX;
|
||||
if (lo >= up) return NULL;
|
||||
int x;
|
||||
ArrayEntry I;
|
||||
while (lo < up)
|
||||
{
|
||||
readEntry(index_jump(lo,up,.5),I);
|
||||
x = corpus->cmp(I,refStart,refLen,d);
|
||||
if (x > 0) up = I.pos;
|
||||
else lo = I.next;
|
||||
}
|
||||
assert(lo==up);
|
||||
if (lo < upX)
|
||||
{
|
||||
readEntry(index_jump(lo,up,.5),I);
|
||||
x = corpus->cmp(I,refStart,refLen,d);
|
||||
}
|
||||
return (x == 1) ? up : NULL;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/** returns the start position in the byte array representing
|
||||
* the tightly packed sorted list of corpus positions for the
|
||||
* given search phrase
|
||||
*/
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
lower_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const
|
||||
{
|
||||
TKN const* const a = &(*keyStart);
|
||||
TKN const* const z = &(*keyStop);
|
||||
return lower_bound(a,z);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/** returns the start position in the byte array representing
|
||||
* the tightly packed sorted list of corpus positions for the
|
||||
* given search phrase
|
||||
*/
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
lower_bound(TKN const* const keyStart,
|
||||
TKN const* const keyStop) const
|
||||
{
|
||||
return lower_bound(keyStart,keyStop-keyStart);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
lower_bound(TKN const* const keyStart, int keyLen) const
|
||||
{
|
||||
if (keyLen == 0) return startArray;
|
||||
char const* const lower = getLowerBound(keyStart->id());
|
||||
char const* const upper = getUpperBound(keyStart->id());
|
||||
return find_start(lower,upper,keyStart,keyLen,0);
|
||||
}
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/** returns the upper bound in the byte array representing
|
||||
* the tightly packed sorted list of corpus positions for the
|
||||
* given search phrase (i.e., points just beyond the range)
|
||||
*/
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
upper_bound(typename vector<TKN>::const_iterator const& keyStart,
|
||||
typename vector<TKN>::const_iterator const& keyStop) const
|
||||
{
|
||||
TKN const* const a = &((TKN)*keyStart);
|
||||
TKN const* const z = &((TKN)*keyStop);
|
||||
return upper_bound(a,z-a);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/** returns the upper bound in the byte array representing
|
||||
* the tightly packed sorted list of corpus positions for the
|
||||
* given search phrase (i.e., points just beyond the range)
|
||||
*/
|
||||
template<typename TKN>
|
||||
char const*
|
||||
TSA<TKN>::
|
||||
upper_bound(TKN const* keyStart, int keyLength) const
|
||||
{
|
||||
if (keyLength == 0) return arrayEnd();
|
||||
char const* const lower = getLowerBound(keyStart->id());
|
||||
char const* const upper = getUpperBound(keyStart->id());
|
||||
return find_end(lower,upper,keyStart,keyLength,0);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
count_type
|
||||
TSA<TKN>::
|
||||
rawCnt2(TKN const* keyStart, size_t keyLen) const
|
||||
{
|
||||
char const* lo = lower_bound(keyStart,keyLen);
|
||||
char const* up = upper_bound(keyStart,keyLen);
|
||||
// cerr << up-lo << endl;
|
||||
return rawCnt(lo,up);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
uint64_t
|
||||
TSA<TKN>::
|
||||
getSequenceId(typename vector<TKN>::const_iterator const& pstart,
|
||||
typename vector<TKN>::const_iterator const& pstop) const
|
||||
{
|
||||
return getSequenceId(&(*pstart),pstop-pstart);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
uint64_t
|
||||
TSA<TKN>::
|
||||
getSequenceId(TKN const* pstart, ushort plen) const
|
||||
{
|
||||
char const* p = lower_bound(pstart,plen);
|
||||
if (!p) return 0; // not found!
|
||||
ArrayEntry I;
|
||||
readEntry(p,I);
|
||||
uint64_t ret = I.sid;
|
||||
ret <<= 16;
|
||||
ret += I.offset;
|
||||
ret <<= 16;
|
||||
ret += plen;
|
||||
return ret;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
vector<TKN>
|
||||
TSA<TKN>::
|
||||
getSequence(uint64_t pid) const
|
||||
{
|
||||
size_t plen = pid % 65536;
|
||||
size_t offset = (pid >> 16) % 65536;
|
||||
TKN const* w = corpus->sntStart(pid >> 32)+offset;
|
||||
vector<TKN> ret(plen);
|
||||
for (size_t i = 0; i < plen; i++, w = w->next())
|
||||
{
|
||||
assert(w);
|
||||
ret[i] = *w;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
string
|
||||
TSA<TKN>::
|
||||
getSequence(uint64_t pid, TokenIndex const& V) const
|
||||
{
|
||||
ostringstream buf;
|
||||
TKN const* a = getSequenceStart(pid);
|
||||
buf << V[a->id()];
|
||||
size_t len = getSequenceLength(pid);
|
||||
for (a = a->next(); --len>0; a = a->next())
|
||||
buf << " " << V[a->id()];
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
TKN const*
|
||||
TSA<TKN>::
|
||||
getSequenceStart(uint64_t pid) const
|
||||
{
|
||||
size_t offset = (pid >> 16) % 65536;
|
||||
return corpus->sntStart(pid >> 32)+offset;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
ushort
|
||||
TSA<TKN>::
|
||||
getSequenceLength(uint64_t pid) const
|
||||
{
|
||||
return (pid % 65536);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
size_t
|
||||
TSA<TKN>::
|
||||
getCorpusSize() const
|
||||
{
|
||||
return corpusSize;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
Ttrack<TKN> const*
|
||||
TSA<TKN>::
|
||||
getCorpus() const
|
||||
{
|
||||
return corpus;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
tsa::ArrayEntry &
|
||||
TSA<TKN>::
|
||||
readEntry(char const* p, tsa::ArrayEntry& I) const
|
||||
{
|
||||
I.pos = p;
|
||||
p = readSid(p,endArray,I.sid);
|
||||
I.next = readOffset(p,endArray,I.offset);
|
||||
assert(I.sid < corpus->size());
|
||||
assert(I.offset < corpus->sntLen(I.sid));
|
||||
return I;
|
||||
};
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
/// find all instances of the tree described by [treeStart, treeEnd)
|
||||
template<typename TKN>
|
||||
typename TSA<TKN>::bitset_pointer
|
||||
TSA<TKN>::
|
||||
getBitSet(TKN const* startKey, size_t keyLen) const
|
||||
{
|
||||
bitset_pointer ret;
|
||||
if (bsc != NULL)
|
||||
ret = bsc->get(startKey,keyLen);
|
||||
else
|
||||
{
|
||||
ret.reset(new bitvector(corpus->size()));
|
||||
fillBitSet(startKey,keyLen,*ret);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
size_t
|
||||
TSA<TKN>::
|
||||
markOccurrences(char const* lo, char const* up, size_t len,
|
||||
bitvector& bitset, bool markOnlyStartPosition) const
|
||||
{
|
||||
id_type sid;
|
||||
ushort off;
|
||||
count_type wcount=0;
|
||||
TKN const* crpStart = corpus->sntStart(0);
|
||||
char const* p = lo;
|
||||
while (p < up)
|
||||
{
|
||||
p = readSid(p,up,sid);
|
||||
p = readOffset(p,up,off);
|
||||
TKN const* t = corpus->sntStart(sid)+off;
|
||||
if (markOnlyStartPosition)
|
||||
bitset.set(t-crpStart);
|
||||
else
|
||||
for (size_t i = 0; i < len; ++i, t = t->next())
|
||||
bitset.set(t-crpStart);
|
||||
wcount++;
|
||||
}
|
||||
return wcount;
|
||||
}
|
||||
#if 1
|
||||
template<typename TKN>
|
||||
bool
|
||||
TSA<TKN>::
|
||||
findBranches(TKN const* base, bitvector const& terminals,
|
||||
vector<tree_iterator>& dest) const
|
||||
{
|
||||
dest.assign(terminals.count(),tree_iterator(this));
|
||||
for (size_t i = terminals.find_first(), k = 0;
|
||||
i < terminals.size();
|
||||
i = terminals.find_next(i),++k)
|
||||
{
|
||||
for (TKN const* x = base+i; x && x->id(); x = x->next())
|
||||
if (!dest[k].extend(x->id()))
|
||||
return false;
|
||||
}
|
||||
typename tree_iterator::SortByApproximateCount sorter;
|
||||
sort(dest.begin(),dest.end(),sorter);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
118
moses/mm/ug_tsa_bitset_cache.h
Normal file
118
moses/mm/ug_tsa_bitset_cache.h
Normal file
@ -0,0 +1,118 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2010 Ulrich Germann. All rights reserved.
|
||||
|
||||
#ifndef __ug_tsa_bitset_cache_h
|
||||
#define __ug_tsa_bitset_cache_h
|
||||
//#include "ug_tsa_base.h"
|
||||
#include <map>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
// A simple mechanism for caching bit vectors representing occurrences of token
|
||||
// sequences in a corpus. Useful for very frequent items for which the bit
|
||||
// vector is expensive to create on the fly. The variable threshold determines
|
||||
// when bit vectors are cached and when they are created on the fly, using the
|
||||
// size of the range of entries in the TSA's index in bytes to determine
|
||||
// whether or not to store the respective bit vector in the cache.
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
template<typename TSA>
|
||||
class
|
||||
BitSetCache
|
||||
{
|
||||
public:
|
||||
typedef boost::dynamic_bitset<uint64_t> BitSet;
|
||||
typedef boost::shared_ptr<BitSet> bsptr;
|
||||
typedef map<pair<char const*,ushort>,bsptr> myMap;
|
||||
typedef myMap::iterator myMapIter;
|
||||
private:
|
||||
TSA const* tsa;
|
||||
myMap cached1,cached2;
|
||||
int threshold;
|
||||
public:
|
||||
|
||||
BitSetCache() : tsa(NULL), threshold(0) {};
|
||||
BitSetCache(TSA const* t, size_t th=4194304)
|
||||
{
|
||||
init(t,th);
|
||||
};
|
||||
|
||||
void
|
||||
init(TSA const* t, size_t th=4194304)
|
||||
{
|
||||
tsa = t;
|
||||
threshold = th;
|
||||
}
|
||||
|
||||
bsptr
|
||||
get(typename TSA::Token const* keyStart, size_t keyLen)
|
||||
{
|
||||
bsptr ret;
|
||||
char const* lo = tsa->lower_bound(keyStart,keyLen);
|
||||
char const* up = tsa->upper_bound(keyStart,keyLen);
|
||||
if (!lo) return ret;
|
||||
if (up-lo > threshold)
|
||||
{
|
||||
pair<char const*,ushort> k(lo,keyLen);
|
||||
myMapIter m = cached1.find(k);
|
||||
if (m != cached1.end())
|
||||
ret = m->second;
|
||||
else
|
||||
{
|
||||
ret.reset(new BitSet(tsa->getCorpus()->size()));
|
||||
cached1[k] = ret;
|
||||
}
|
||||
}
|
||||
else if (ret == NULL)
|
||||
ret.reset(new BitSet(tsa->getCorpus()->size()));
|
||||
if (ret->count() == 0)
|
||||
tsa->setBits(lo,up,*ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// get bitvector with the path occurrences marked
|
||||
bsptr
|
||||
get2(typename TSA::Token const* keyStart, size_t keyLen, bool onlyEndpoint=true)
|
||||
{
|
||||
bsptr ret;
|
||||
char const* lo = tsa->lower_bound(keyStart,keyLen);
|
||||
char const* up = tsa->upper_bound(keyStart,keyLen);
|
||||
if (!lo) return ret;
|
||||
if (up-lo > threshold)
|
||||
{
|
||||
pair<char const*,ushort> k(lo,keyLen);
|
||||
// cout << "bla " << keyStart->id() << " "
|
||||
// << cached2.size() << " " << up-lo << " " << k.second << endl;
|
||||
myMapIter m = cached2.find(k);
|
||||
if (m != cached2.end())
|
||||
ret = m->second;
|
||||
else
|
||||
{
|
||||
ret.reset(new BitSet(tsa->getCorpus()->numTokens()));
|
||||
cached2[k] = ret;
|
||||
}
|
||||
}
|
||||
else if (ret == NULL)
|
||||
ret.reset(new BitSet(tsa->getCorpus()->numTokens()));
|
||||
if (ret->count() == 0)
|
||||
{
|
||||
if (onlyEndpoint)
|
||||
tsa->setTokenBits(lo,up,keyLen,*ret);
|
||||
else
|
||||
tsa->markOccurrences(lo,up,keyLen,*ret,false);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
void clear()
|
||||
{
|
||||
cached1.clear();
|
||||
cached2.clear();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
#endif
|
868
moses/mm/ug_tsa_tree_iterator.h
Normal file
868
moses/mm/ug_tsa_tree_iterator.h
Normal file
@ -0,0 +1,868 @@
|
||||
// -*- c++ -*-
|
||||
// (c) 2007 - 2010 Ulrich Germann. All rights reserved.
|
||||
#ifndef __ug_tsa_tree_iterator_h
|
||||
#define __ug_tsa_tree_iterator_h
|
||||
|
||||
#include "ug_tsa_array_entry.h"
|
||||
#include "ug_typedefs.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
#include <iostream>
|
||||
|
||||
// #include "ug_bv_iter.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
|
||||
#ifndef _DISPLAY_CHAIN
|
||||
#define _DISPLAY_CHAIN
|
||||
// for debugging only
|
||||
template<typename T>
|
||||
void display(T const* x, string label)
|
||||
{
|
||||
cout << label << ":"; for (;x;x=next(x)) cout << " " << x->lemma; cout << endl;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename T> class TSA;
|
||||
|
||||
// CLASS DEFINITION
|
||||
// The TSA_tree_iterator allows traversal of a Token Sequence Array as if it was a trie.
|
||||
// down(): go to first child
|
||||
// over(): go to next sibling
|
||||
// up(): go to parent
|
||||
// extend(id): go to a specific child node
|
||||
// all four functions return true if successful, false otherwise
|
||||
// lower_bound() and upper_bound() give the range of entries in the array covered by the
|
||||
// "virtual trie node".
|
||||
template<typename TKN>
|
||||
class
|
||||
TSA_tree_iterator
|
||||
{
|
||||
protected:
|
||||
vector<char const*> lower;
|
||||
vector<char const*> upper;
|
||||
|
||||
// for debugging ...
|
||||
void showBounds(ostream& out) const;
|
||||
public:
|
||||
typedef TKN Token;
|
||||
|
||||
virtual ~TSA_tree_iterator() {};
|
||||
|
||||
TSA<Token> const* root;
|
||||
// TO BE DONE: make the pointer private and add a const function to return the pointer
|
||||
|
||||
// TSA_tree_iterator(TSA_tree_iterator const& other);
|
||||
TSA_tree_iterator(TSA<Token> const* s);
|
||||
// TSA_tree_iterator(TSA<Token> const* s, Token const& t);
|
||||
// TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, Token const* kend);
|
||||
// TSA_tree_iterator(TSA<Token> const* s,
|
||||
// TokenIndex const& V,
|
||||
// string const& key);
|
||||
|
||||
char const* lower_bound(int p) const;
|
||||
char const* upper_bound(int p) const;
|
||||
|
||||
size_t size() const;
|
||||
// Token const& wid(int p) const;
|
||||
Token const* getToken(int p) const;
|
||||
id_type getSid() const;
|
||||
ushort getOffset(int p) const;
|
||||
size_t sntCnt(int p=-1) const;
|
||||
size_t rawCnt(int p=-1) const;
|
||||
uint64_t getPid(int p=-1) const; // get phrase id
|
||||
|
||||
virtual bool extend(Token const& id);
|
||||
virtual bool extend(id_type id);
|
||||
virtual bool down();
|
||||
virtual bool over();
|
||||
virtual bool up();
|
||||
|
||||
string str(TokenIndex const* V=NULL, int start=0, int stop=0) const;
|
||||
|
||||
// checks if the sentence [start,stop) contains the given sequence.
|
||||
bool match(Token const* start, Token const* stop) const;
|
||||
// checks if the sentence /sid/ contains the given sequence.
|
||||
bool match(id_type sid) const;
|
||||
|
||||
// fillBitSet: deprecated; use markSentences() instead
|
||||
count_type
|
||||
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const;
|
||||
|
||||
count_type
|
||||
markEndOfSequence(Token const* start, Token const* stop,
|
||||
boost::dynamic_bitset<uint64_t>& dest) const;
|
||||
count_type
|
||||
markSequence(Token const* start, Token const* stop, bitvector& dest) const;
|
||||
|
||||
count_type
|
||||
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const;
|
||||
|
||||
count_type
|
||||
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset,
|
||||
bool markOnlyStartPosition=false) const;
|
||||
|
||||
count_type
|
||||
markOccurrences(vector<ushort>& dest) const;
|
||||
|
||||
uint64_t
|
||||
getSequenceId() const;
|
||||
|
||||
// equivalent but more efficient than
|
||||
// bitvector tmp; markSentences(tmp); foo &= tmp;
|
||||
bitvector& filterSentences(bitvector& foo) const;
|
||||
|
||||
/// a special auxiliary function for finding trees
|
||||
void
|
||||
tfAndRoot(bitvector const& ref, // reference root positions
|
||||
bitvector const& snt, // relevant sentences
|
||||
bitvector& dest) const;
|
||||
|
||||
size_t arrayByteSpanSize(int p = -1) const
|
||||
{
|
||||
if (lower.size()==0) return 0; // or endArray-startArray???
|
||||
if (p < 0) p = lower.size()+p;
|
||||
assert(p >=0 && p < int(lower.size()));
|
||||
return lower.size() ? upper[p]-lower[p] : 0;
|
||||
}
|
||||
|
||||
struct SortByApproximateCount
|
||||
{
|
||||
bool operator()(TSA_tree_iterator const& a,
|
||||
TSA_tree_iterator const& b) const
|
||||
{
|
||||
if (a.size()==0) return b.size() ? true : false;
|
||||
if (b.size()==0) return false;
|
||||
return a.arrayByteSpanSize() < b.arrayByteSpanSize();
|
||||
}
|
||||
};
|
||||
|
||||
double approxOccurrenceCount(int p=-1) const
|
||||
{
|
||||
return arrayByteSpanSize(p)/root->aveIndexEntrySize();
|
||||
}
|
||||
|
||||
size_t grow(Token const* t, Token const* stop)
|
||||
{
|
||||
while ((t != stop) && extend(*t)) t = t->next();
|
||||
return this->size();
|
||||
}
|
||||
|
||||
size_t grow(Token const* snt, bitvector const& cov)
|
||||
{
|
||||
size_t x = cov.find_first();
|
||||
while (x < cov.size() && extend(snt[x]))
|
||||
x = cov.find_next(x);
|
||||
return this->size();
|
||||
}
|
||||
|
||||
sptr<vector<typename ttrack::Position> >
|
||||
randomSample(int level, size_t N) const;
|
||||
|
||||
};
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
// DOWN
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename TSA_TYPE>
|
||||
bool
|
||||
TSA_tree_iterator<TSA_TYPE>::
|
||||
down()
|
||||
{
|
||||
assert(root);
|
||||
if (lower.size() == 0)
|
||||
{
|
||||
char const* lo = root->arrayStart();
|
||||
assert(lo < root->arrayEnd());
|
||||
if (lo == root->arrayEnd()) return false; // array is empty, can't go down
|
||||
tsa::ArrayEntry A(root,lo);
|
||||
assert(root->corpus->getToken(A));
|
||||
assert(lo < root->getUpperBound(root->corpus->getToken(A)->id()));
|
||||
lower.push_back(lo);
|
||||
Token const* foo = this->getToken(0);
|
||||
upper.push_back(root->upper_bound(foo,lower.size()));
|
||||
return lower.size();
|
||||
}
|
||||
else
|
||||
{
|
||||
char const* lo = lower.back();
|
||||
tsa::ArrayEntry A(root,lo);
|
||||
Token const* a = root->corpus->getToken(A); assert(a);
|
||||
Token const* z = next(a);
|
||||
for (size_t i = 1; i < size(); ++i) z = next(z);
|
||||
if (z < root->corpus->sntStart(A.sid) || z >= root->corpus->sntEnd(A.sid))
|
||||
{
|
||||
char const* up = upper.back();
|
||||
lo = root->find_longer(lo,up,a,lower.size(),0);
|
||||
if (!lo) return false;
|
||||
root->readEntry(lo,A);
|
||||
a = root->corpus->getToken(A); assert(a);
|
||||
z = next(a);
|
||||
assert(z >= root->corpus->sntStart(A.sid) && z < root->corpus->sntEnd(A.sid));
|
||||
}
|
||||
lower.push_back(lo);
|
||||
char const* up = root->getUpperBound(a->id());
|
||||
char const* u = root->find_end(lo,up,a,lower.size(),0);
|
||||
assert(u);
|
||||
upper.push_back(u);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// OVER
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
bool
|
||||
TSA_tree_iterator<Token>::
|
||||
over()
|
||||
{
|
||||
if (lower.size() == 0)
|
||||
return false;
|
||||
if (lower.size() == 1)
|
||||
{
|
||||
Token const* t = this->getToken(0);
|
||||
id_type wid = t->id();
|
||||
char const* hi = root->getUpperBound(wid);
|
||||
if (upper[0] < hi)
|
||||
{
|
||||
lower[0] = upper[0];
|
||||
Token const* foo = this->getToken(0);
|
||||
upper.back() = root->upper_bound(foo,lower.size());
|
||||
}
|
||||
else
|
||||
{
|
||||
for (++wid; wid < root->indexSize; ++wid)
|
||||
{
|
||||
char const* lo = root->getLowerBound(wid);
|
||||
if (lo == root->endArray) return false;
|
||||
char const* hi = root->getUpperBound(wid);
|
||||
if (!hi) return false;
|
||||
if (lo == hi) continue;
|
||||
assert(lo);
|
||||
lower[0] = lo;
|
||||
Token const* foo = this->getToken(0);
|
||||
upper.back() = root->upper_bound(foo,lower.size());
|
||||
break;
|
||||
}
|
||||
}
|
||||
return wid < root->indexSize;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (upper.back() == root->arrayEnd())
|
||||
return false;
|
||||
tsa::ArrayEntry L(root,lower.back());
|
||||
tsa::ArrayEntry U(root,upper.back());
|
||||
|
||||
// display(root->corpus->getToken(L),"L1");
|
||||
// display(root->corpus->getToken(U),"U1");
|
||||
|
||||
int x = root->corpus->cmp(U,L,lower.size()-1);
|
||||
// cerr << "x=" << x << endl;
|
||||
if (x != 1)
|
||||
return false;
|
||||
lower.back() = upper.back();
|
||||
|
||||
// display(root->corpus->getToken(U),"L2");
|
||||
|
||||
Token const* foo = this->getToken(0);
|
||||
// display(foo,"F!");
|
||||
upper.back() = root->upper_bound(foo,lower.size());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// UP
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
bool
|
||||
TSA_tree_iterator<Token>::
|
||||
up()
|
||||
{
|
||||
if (lower.size())
|
||||
{
|
||||
lower.pop_back();
|
||||
upper.pop_back();
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// CONSTRUCTORS
|
||||
//----------------------------------------------------------------------------
|
||||
template<typename Token>
|
||||
TSA_tree_iterator<Token>::
|
||||
TSA_tree_iterator(TSA<Token> const* s)
|
||||
: root(s)
|
||||
{};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
#if 0
|
||||
template<typename Token>
|
||||
TSA_tree_iterator<Token>::
|
||||
TSA_tree_iterator(TSA<Token> const* s,
|
||||
TokenIndex const& V,
|
||||
string const& key)
|
||||
: root(s)
|
||||
{
|
||||
istringstream buf(key); string w;
|
||||
while (buf >> w)
|
||||
{
|
||||
if (this->extend(V[w]))
|
||||
continue;
|
||||
else
|
||||
{
|
||||
lower.clear();
|
||||
upper.clear();
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
TSA_tree_iterator<Token>::
|
||||
TSA_tree_iterator(TSA_tree_iterator<Token> const& other)
|
||||
: root(other.root)
|
||||
{
|
||||
lower = other.lower;
|
||||
upper = other.upper;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
TSA_tree_iterator<Token>::
|
||||
TSA_tree_iterator(TSA<Token> const* s, Token const& t)
|
||||
: root(s)
|
||||
{
|
||||
char const* up = root->getUpperBound(t.id());
|
||||
if (!up) return;
|
||||
lower.push_back(root->getLowerBound(t.id()));
|
||||
upper.push_back(up);
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
TSA_tree_iterator<Token>::
|
||||
TSA_tree_iterator(TSA<Token> const* s, Token const* kstart, Token const* kend)
|
||||
: root(s)
|
||||
{
|
||||
for (;kstart != kend; kstart = kstart->next())
|
||||
if (!extend(*kstart))
|
||||
break;
|
||||
if (kstart != kend)
|
||||
{
|
||||
lower.clear();
|
||||
upper.clear();
|
||||
}
|
||||
};
|
||||
#endif
|
||||
// ---------------------------------------------------------------------------
|
||||
// EXTEND
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
bool
|
||||
TSA_tree_iterator<Token>::
|
||||
extend(id_type const id)
|
||||
{
|
||||
return extend(Token(id));
|
||||
}
|
||||
|
||||
|
||||
template<typename Token>
|
||||
bool
|
||||
TSA_tree_iterator<Token>::
|
||||
extend(Token const& t)
|
||||
{
|
||||
if (lower.size())
|
||||
{
|
||||
char const* lo = lower.back();
|
||||
char const* hi = upper.back();
|
||||
lo = root->find_start(lo, hi, &t, 1, lower.size());
|
||||
if (!lo) return false;
|
||||
lower.push_back(lo);
|
||||
hi = root->find_end(lo, hi, getToken(-1), 1, lower.size()-1);
|
||||
upper.push_back(hi);
|
||||
}
|
||||
else
|
||||
{
|
||||
char const* lo = root->getLowerBound(t.id());
|
||||
char const* hi = root->getUpperBound(t.id());
|
||||
|
||||
if (lo==hi) return false;
|
||||
lo = root->find_start(lo, hi, &t, 1, lower.size());
|
||||
if (!lo) return false;
|
||||
lower.push_back(lo);
|
||||
#if 0
|
||||
tsa::ArrayEntry I;
|
||||
root->readEntry(lo,I);
|
||||
cout << I.sid << " " << I.offset << endl;
|
||||
cout << root->corpus->sntLen(I.sid) << endl;
|
||||
#endif
|
||||
hi = root->find_end(lo, hi, getToken(0), 1, 0);
|
||||
upper.push_back(hi);
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
size_t
|
||||
TSA_tree_iterator<Token>::
|
||||
size() const
|
||||
{
|
||||
return lower.size();
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
id_type
|
||||
TSA_tree_iterator<Token>::
|
||||
getSid() const
|
||||
{
|
||||
char const* p = (lower.size() ? lower.back() : root->startArray);
|
||||
char const* q = (upper.size() ? upper.back() : root->endArray);
|
||||
id_type sid;
|
||||
root->readSid(p,q,sid);
|
||||
return sid;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
uint64_t
|
||||
TSA_tree_iterator<Token>::
|
||||
getPid(int p) const
|
||||
{
|
||||
if (p < 0) p += upper.size();
|
||||
char const* lb = lower_bound(p);
|
||||
char const* ub = upper_bound(p);
|
||||
uint64_t sid,off;
|
||||
root->readOffset(root->readSid(lb,ub,sid),ub,off);
|
||||
uint64_t ret = (sid<<32) + (off<<16) + uint64_t(p+1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
char const*
|
||||
TSA_tree_iterator<Token>::
|
||||
lower_bound(int p) const
|
||||
{
|
||||
if (p < 0) p += lower.size();
|
||||
assert(p >= 0 && p < int(lower.size()));
|
||||
return lower[p];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
char const*
|
||||
TSA_tree_iterator<Token>::
|
||||
upper_bound(int p) const
|
||||
{
|
||||
if (p < 0) p += upper.size();
|
||||
assert(p >= 0 && p < int(upper.size()));
|
||||
return upper[p];
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/* @return a pointer to the position in the corpus
|
||||
* where this->wid(p) is read from
|
||||
*/
|
||||
template<typename Token>
|
||||
Token const*
|
||||
TSA_tree_iterator<Token>::
|
||||
getToken(int p) const
|
||||
{
|
||||
if (lower.size()==0) return NULL;
|
||||
tsa::ArrayEntry A(root,lower.back());
|
||||
Token const* t = root->corpus->getToken(A); assert(t);
|
||||
#ifndef NDEBUG
|
||||
Token const* bos = root->corpus->sntStart(A.sid);
|
||||
Token const* eos = root->corpus->sntEnd(A.sid);
|
||||
#endif
|
||||
if (p < 0) p += lower.size();
|
||||
// cerr << p << ". " << t->id() << endl;
|
||||
while (p-- > 0)
|
||||
{
|
||||
t = next(t);
|
||||
// if (t) cerr << p << ". " << t->id() << endl;
|
||||
assert(t >= bos && t < eos);
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
size_t
|
||||
TSA_tree_iterator<Token>::
|
||||
sntCnt(int p) const
|
||||
{
|
||||
if (p < 0)
|
||||
p = lower.size()+p;
|
||||
assert(p>=0);
|
||||
if (lower.size() == 0) return root->getCorpusSize();
|
||||
return reinterpret_cast<TSA<Token> const* const>(root)->sntCnt(lower[p],upper[p]);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
size_t
|
||||
TSA_tree_iterator<Token>::
|
||||
rawCnt(int p) const
|
||||
{
|
||||
if (p < 0)
|
||||
p = lower.size()+p;
|
||||
assert(p>=0);
|
||||
if (lower.size() == 0) return root->getCorpusSize();
|
||||
return root->rawCnt(lower[p],upper[p]);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
fillBitSet(boost::dynamic_bitset<uint64_t>& bitset) const
|
||||
{
|
||||
return markSentences(bitset);
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markSentences(boost::dynamic_bitset<uint64_t>& bitset) const
|
||||
{
|
||||
assert(root && root->corpus);
|
||||
bitset.resize(root->corpus->size());
|
||||
bitset.reset();
|
||||
if (lower.size()==0) return 0;
|
||||
char const* lo = lower.back();
|
||||
char const* up = upper.back();
|
||||
char const* p = lo;
|
||||
id_type sid;
|
||||
ushort off;
|
||||
count_type wcount=0;
|
||||
while (p < up)
|
||||
{
|
||||
p = root->readSid(p,up,sid);
|
||||
p = root->readOffset(p,up,off);
|
||||
bitset.set(sid);
|
||||
wcount++;
|
||||
}
|
||||
return wcount;
|
||||
}
|
||||
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markOccurrences(boost::dynamic_bitset<uint64_t>& bitset, bool markOnlyStartPosition) const
|
||||
{
|
||||
assert(root && root->corpus);
|
||||
if (bitset.size() != root->corpus->numTokens())
|
||||
bitset.resize(root->corpus->numTokens());
|
||||
bitset.reset();
|
||||
if (lower.size()==0) return 0;
|
||||
char const* lo = lower.back();
|
||||
char const* up = upper.back();
|
||||
return root->markOccurrences(lo,up,lower.size(),bitset,markOnlyStartPosition);
|
||||
}
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markOccurrences(vector<ushort>& dest) const
|
||||
{
|
||||
assert(root && root->corpus);
|
||||
assert(dest.size() == root->corpus->numTokens());
|
||||
if (lower.size()==0) return 0;
|
||||
char const* lo = lower.back();
|
||||
char const* up = upper.back();
|
||||
char const* p = lo;
|
||||
id_type sid;
|
||||
ushort off;
|
||||
count_type wcount=0;
|
||||
Token const* crpStart = root->corpus->sntStart(0);
|
||||
while (p < up)
|
||||
{
|
||||
p = root->readSid(p,up,sid);
|
||||
p = root->readOffset(p,up,off);
|
||||
Token const* t = root->corpus->sntStart(sid)+off;
|
||||
for (size_t i = 1; i < lower.size(); ++i, t = t->next());
|
||||
dest[t-crpStart]++;
|
||||
wcount++;
|
||||
}
|
||||
return wcount;
|
||||
}
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
// mark all endpoints of instances of the path represented by this
|
||||
// iterator in the sentence [start,stop)
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markEndOfSequence(Token const* start, Token const* stop,
|
||||
boost::dynamic_bitset<uint64_t>& dest) const
|
||||
{
|
||||
count_type matchCount=0;
|
||||
Token const* a = getToken(0);
|
||||
for (Token const* x = start; x < stop; ++x)
|
||||
{
|
||||
if (*x != *a) continue;
|
||||
Token const* y = x;
|
||||
Token const* b = a;
|
||||
size_t i;
|
||||
for (i = 0; *b==*y && ++i < this->size();)
|
||||
{
|
||||
b = b->next();
|
||||
y = y->next();
|
||||
if (y < start || y >= stop) break;
|
||||
}
|
||||
if (i == this->size())
|
||||
{
|
||||
dest.set(y-start);
|
||||
++matchCount;
|
||||
}
|
||||
}
|
||||
return matchCount;
|
||||
}
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
// mark all occurrences of the sequence represented by this
|
||||
// iterator in the sentence [start,stop)
|
||||
template<typename Token>
|
||||
count_type
|
||||
TSA_tree_iterator<Token>::
|
||||
markSequence(Token const* start,
|
||||
Token const* stop,
|
||||
bitvector& dest) const
|
||||
{
|
||||
count_type numMatches=0;
|
||||
Token const* a = getToken(0);
|
||||
for (Token const* x = start; x < stop; ++x)
|
||||
{
|
||||
if (*x != *a) continue;
|
||||
Token const* y = x;
|
||||
Token const* b = a;
|
||||
size_t i;
|
||||
for (i = 0; *b==*y && i++ < this->size();)
|
||||
{
|
||||
dest.set(y-start);
|
||||
b = b->next();
|
||||
y = y->next();
|
||||
if (y < start || y >= stop) break;
|
||||
}
|
||||
if (i == this->size()) ++numMatches;
|
||||
}
|
||||
return numMatches;
|
||||
}
|
||||
//---------------------------------------------------------------------------
|
||||
|
||||
template<typename Token>
|
||||
uint64_t
|
||||
TSA_tree_iterator<Token>::
|
||||
getSequenceId() const
|
||||
{
|
||||
if (this->size() == 0) return 0;
|
||||
char const* p = this->lower_bound(-1);
|
||||
typename Token::ArrayEntry I;
|
||||
root->readEntry(p,I);
|
||||
return (uint64_t(I.sid)<<32)+(I.offset<<16)+this->size();
|
||||
}
|
||||
|
||||
template<typename Token>
|
||||
string
|
||||
TSA_tree_iterator<Token>::
|
||||
str(TokenIndex const* V, int start, int stop) const
|
||||
{
|
||||
if (this->size()==0) return "";
|
||||
if (start < 0) start = this->size()+start;
|
||||
if (stop <= 0) stop = this->size()+stop;
|
||||
assert(start>=0 && start < int(this->size()));
|
||||
assert(stop > 0 && stop <= int(this->size()));
|
||||
Token const* x = this->getToken(0);
|
||||
ostringstream buf;
|
||||
for (int i = start; i < stop; ++i, x = x->next())
|
||||
{
|
||||
assert(x);
|
||||
buf << (i > start ? " " : "");
|
||||
if (V) buf << (*V)[x->id()];
|
||||
else buf << x->id();
|
||||
}
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<typename Token>
|
||||
string
|
||||
TSA_tree_iterator<Token>::
|
||||
str(Vocab const& V, int start, int stop) const
|
||||
{
|
||||
if (this->size()==0) return "";
|
||||
if (start < 0) start = this->size()+start;
|
||||
if (stop <= 0) stop = this->size()+stop;
|
||||
assert(start>=0 && start < int(this->size()));
|
||||
assert(stop > 0 && stop <= int(this->size()));
|
||||
Token const* x = this->getToken(0);
|
||||
ostringstream buf;
|
||||
for (int i = start; i < stop; ++i, x = x->next())
|
||||
{
|
||||
assert(x);
|
||||
buf << (i > start ? " " : "");
|
||||
buf << V[x->id()].str;
|
||||
}
|
||||
return buf.str();
|
||||
}
|
||||
#endif
|
||||
|
||||
/// @return true if the sentence [start,stop) contains the sequence
|
||||
template<typename Token>
|
||||
bool
|
||||
TSA_tree_iterator<Token>::
|
||||
match(Token const* start, Token const* stop) const
|
||||
{
|
||||
Token const* a = getToken(0);
|
||||
for (Token const* t = start; t < stop; ++t)
|
||||
{
|
||||
if (*t != *a) continue;
|
||||
Token const* b = a;
|
||||
Token const* y = t;
|
||||
size_t i;
|
||||
for (i = 1; i < lower.size(); ++i)
|
||||
{
|
||||
y = y->next();
|
||||
if (y < start || y >= stop) break;
|
||||
b = b->next();
|
||||
if (*b != *y) break;
|
||||
}
|
||||
if (i == lower.size()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// @return true if the sentence /sid/ contains the sequence
|
||||
template<typename Token>
|
||||
bool
|
||||
TSA_tree_iterator<Token>::
|
||||
match(id_type sid) const
|
||||
{
|
||||
return match(root->corpus->sntStart(sid),root->corpus->sntEnd(sid));
|
||||
}
|
||||
|
||||
/// a special auxiliary function for finding trees
|
||||
// @param sntcheck: number of roots in the respective sentence
|
||||
// @param dest: bitvector to keep track of the exact root location
|
||||
template<typename Token>
|
||||
void
|
||||
TSA_tree_iterator<Token>::
|
||||
tfAndRoot(bitvector const& ref, // reference root positions
|
||||
bitvector const& snt, // relevant sentences
|
||||
bitvector& dest) const
|
||||
{
|
||||
tsa::ArrayEntry I(lower.back());
|
||||
Token const* crpStart = root->corpus->sntStart(0);
|
||||
do
|
||||
{
|
||||
root->readEntry(I.next,I);
|
||||
if (!snt.test(I.sid)) continue; // skip, no root there
|
||||
// find my endpoint:
|
||||
Token const* t = root->corpus->getToken(I)->next(lower.size()-1);
|
||||
assert(t >= crpStart);
|
||||
size_t p = t-crpStart;
|
||||
if (ref.test(p)) // it's a valid root
|
||||
dest.set(p);
|
||||
} while (I.next != upper.back());
|
||||
}
|
||||
|
||||
// @param bv: bitvector with bits set for selected sentences
|
||||
// @return: reference to bv
|
||||
template<typename Token>
|
||||
bitvector&
|
||||
TSA_tree_iterator<Token>::
|
||||
filterSentences(bitvector& bv) const
|
||||
{
|
||||
float aveSntLen = root->corpus->numTokens()/root->corpus->size();
|
||||
size_t ANDcost = bv.size()/8; // cost of dest&=ref;
|
||||
float aveEntrySize = ((root->endArray-root->startArray)
|
||||
/root->corpus->numTokens());
|
||||
if (arrayByteSpanSize()+ANDcost < aveEntrySize*aveSntLen*bv.count())
|
||||
{
|
||||
bitvector tmp(bv.size());
|
||||
markSentences(tmp);
|
||||
bv &= tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t i = bv.find_first(); i < bv.size(); i = bv.find_next(i))
|
||||
if (!match(i)) bv.reset(i);
|
||||
}
|
||||
return bv;
|
||||
}
|
||||
|
||||
inline
|
||||
size_t
|
||||
randInt(size_t N)
|
||||
{
|
||||
return size_t(N*(rand()/(RAND_MAX+1.)));
|
||||
}
|
||||
|
||||
/// randomly select up to N occurrences of the sequence
|
||||
template<typename Token>
|
||||
sptr<vector<typename ttrack::Position> >
|
||||
TSA_tree_iterator<Token>::
|
||||
randomSample(int level, size_t N) const
|
||||
{
|
||||
if (level < 0) level += lower.size();
|
||||
assert(level >=0);
|
||||
|
||||
sptr<vector<typename ttrack::Position> >
|
||||
ret(new vector<typename ttrack::Position>(N));
|
||||
|
||||
size_t m=0; // number of samples selected so far
|
||||
typename Token::ArrayEntry I(lower.at(level));
|
||||
char const* stop = upper.at(level);
|
||||
while (m < N && I.next < stop)
|
||||
{
|
||||
root->readEntry(I.next,I);
|
||||
|
||||
// t: expected number of remaining samples
|
||||
double t = (stop - I.pos)/root->aveIndexEntrySize();
|
||||
double r = t*rand()/(RAND_MAX+1.);
|
||||
if (r < N-m)
|
||||
{
|
||||
ret->at(m).offset = I.offset;
|
||||
ret->at(m++).sid = I.sid;
|
||||
}
|
||||
}
|
||||
ret->resize(m);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
} // end of namespace ugdiss
|
||||
#endif
|
55
moses/mm/ug_ttrack_base.cc
Normal file
55
moses/mm/ug_ttrack_base.cc
Normal file
@ -0,0 +1,55 @@
|
||||
// Memory-mapped corpus track
|
||||
// (c) Ulrich Germann. All rights reserved
|
||||
|
||||
#include <sstream>
|
||||
|
||||
#include "ug_mm_ttrack.h"
|
||||
#include "tpt_pickler.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
#if 0
|
||||
template<>
|
||||
id_type
|
||||
Ttrack<id_type>::
|
||||
toID(id_type const& t)
|
||||
{
|
||||
return t;
|
||||
}
|
||||
#endif
|
||||
|
||||
/** @return string representation of sentence /sid/ */
|
||||
template<>
|
||||
string
|
||||
Ttrack<id_type>::
|
||||
str(id_type sid, TokenIndex const& T) const
|
||||
{
|
||||
assert(sid < numTokens());
|
||||
id_type const* stop = sntEnd(sid);
|
||||
id_type const* strt = sntStart(sid);
|
||||
ostringstream buf;
|
||||
if (strt < stop) buf << T[*strt];
|
||||
while (++strt < stop)
|
||||
buf << " " << T[*strt];
|
||||
return buf.str();
|
||||
}
|
||||
|
||||
#if 0
|
||||
template<>
|
||||
string
|
||||
Ttrack<id_type>::
|
||||
str(id_type sid, Vocab const& V) const
|
||||
{
|
||||
assert(sid < numTokens());
|
||||
id_type const* stop = sntEnd(sid);
|
||||
id_type const* strt = sntStart(sid);
|
||||
ostringstream buf;
|
||||
if (strt < stop) buf << V[*strt].str;
|
||||
while (++strt < stop)
|
||||
buf << " " << V[*strt].str;
|
||||
return buf.str();
|
||||
}
|
||||
#endif
|
||||
}
|
348
moses/mm/ug_ttrack_base.h
Normal file
348
moses/mm/ug_ttrack_base.h
Normal file
@ -0,0 +1,348 @@
|
||||
// -*- c++ -*-
|
||||
// Base class for corpus tracks. mmTtrack (memory-mapped Ttrack) and imTtrack (in-memory Ttrack)
|
||||
// are derived from this class.
|
||||
|
||||
// This code is part of a refactorization of the earlier Ttrack class as a template class for
|
||||
// tokens of arbitrary fixed-length size.
|
||||
// (c) 2007-2009 Ulrich Germann. All rights reserved.
|
||||
|
||||
#ifndef __ug_ttrack_base
|
||||
#define __ug_ttrack_base
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
|
||||
#include "ug_ttrack_position.h"
|
||||
#include "tpt_typedefs.h"
|
||||
#include "tpt_tokenindex.h"
|
||||
// #include "ug_vocab.h"
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
|
||||
typedef boost::dynamic_bitset<uint64_t> bdBitset;
|
||||
|
||||
template<typename TKN=id_type>
|
||||
class Ttrack
|
||||
{
|
||||
protected:
|
||||
id_type numSent;
|
||||
id_type numWords;
|
||||
|
||||
public:
|
||||
|
||||
virtual ~Ttrack() {};
|
||||
typedef typename ttrack::Position Position;
|
||||
typedef TKN Token;
|
||||
|
||||
/** @return a pointer to beginning of sentence /sid/ */
|
||||
virtual
|
||||
TKN const*
|
||||
sntStart(size_t sid) const = 0;
|
||||
|
||||
/** @return end point of sentence /sid/ */
|
||||
virtual
|
||||
TKN const*
|
||||
sntEnd(size_t sid) const = 0;
|
||||
|
||||
TKN const*
|
||||
getToken(Position const& p) const;
|
||||
|
||||
template<typename T>
|
||||
T const*
|
||||
getTokenAs(Position const& p) const
|
||||
{ return reinterpret_cast<T const*>(getToken(p)); }
|
||||
|
||||
template<typename T>
|
||||
T const*
|
||||
sntStartAs(id_type sid) const
|
||||
{ return reinterpret_cast<T const*>(sntStart(sid)); }
|
||||
|
||||
template<typename T>
|
||||
T const*
|
||||
sntEndAs(id_type sid) const
|
||||
{ return reinterpret_cast<T const*>(sntEnd(sid)); }
|
||||
|
||||
/** @return length of sentence /sid/ */
|
||||
size_t sntLen(size_t sid) const { return sntEnd(sid) - sntStart(sid); }
|
||||
|
||||
size_t
|
||||
startPos(id_type sid) const { return sntStart(sid)-sntStart(0); }
|
||||
|
||||
size_t
|
||||
endPos(id_type sid) const { return sntEnd(sid)-sntStart(0); }
|
||||
|
||||
/** Don't use this unless you want a copy of the sentence */
|
||||
vector<TKN>
|
||||
operator[](id_type sid) const
|
||||
{
|
||||
return vector<TKN>(sntStart(sid),sntEnd(sid));
|
||||
}
|
||||
|
||||
/** @return size of corpus in number of sentences */
|
||||
virtual size_t size() const = 0;
|
||||
|
||||
/** @return size of corpus in number of words/tokens */
|
||||
virtual size_t numTokens() const = 0;
|
||||
|
||||
/** @return string representation of sentence /sid/
|
||||
* Currently only defined for Ttrack<id_type> */
|
||||
string str(id_type sid, TokenIndex const& T) const;
|
||||
|
||||
// /** @return string representation of sentence /sid/
|
||||
// * Currently only defined for Ttrack<id_type> */
|
||||
// string str(id_type sid, Vocab const& V) const;
|
||||
|
||||
/** counts the tokens in the corpus; used for example in the construction of
|
||||
* token sequence arrays */
|
||||
count_type count_tokens(vector<count_type>& cnt, bdBitset const& filter,
|
||||
int lengthCutoff=0, ostream* log=NULL) const;
|
||||
|
||||
// static id_type toID(TKN const& t);
|
||||
|
||||
int cmp(Position const& A, Position const& B, int keyLength) const;
|
||||
int cmp(Position const& A, TKN const* keyStart, int keyLength=-1,
|
||||
int depth=0) const;
|
||||
|
||||
virtual id_type findSid(TKN const* t) const = 0; // find the sentence id of a given token
|
||||
// virtual id_type findSid(id_type TokenOffset) const = 0; // find the sentence id of a given token
|
||||
|
||||
|
||||
// the following three functions are currently not used by any program ... (deprecate?)
|
||||
TKN const*
|
||||
find_next_within_sentence(TKN const* startKey,
|
||||
int keyLength,
|
||||
Position startHere) const;
|
||||
|
||||
Position
|
||||
find_first(TKN const* startKey, int keyLength,
|
||||
bdBitset const* filter=NULL) const;
|
||||
|
||||
Position
|
||||
find_next(TKN const* startKey, int keyLength, Position startAfter,
|
||||
bdBitset const* filter=NULL) const;
|
||||
|
||||
|
||||
virtual size_t offset(TKN const* t) const { return t-sntStart(0); }
|
||||
};
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
TKN const*
|
||||
Ttrack<TKN>::
|
||||
getToken(Position const& p) const
|
||||
{
|
||||
TKN const* ret = sntStart(p.sid)+p.offset;
|
||||
return (ret < sntEnd(p.sid)) ? ret : NULL;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template<typename TKN>
|
||||
count_type
|
||||
Ttrack<TKN>::
|
||||
count_tokens(vector<count_type>& cnt, bdBitset const& filter,
|
||||
int lengthCutoff, ostream* log) const
|
||||
{
|
||||
cnt.clear();
|
||||
cnt.reserve(500000);
|
||||
count_type totalCount=0;
|
||||
int64_t expectedTotal=numTokens();
|
||||
for (size_t sid = filter.find_first();
|
||||
sid < filter.size();
|
||||
sid = filter.find_next(sid))
|
||||
{
|
||||
TKN const* k = sntStart(sid);
|
||||
TKN const* const stop = sntEnd(sid);
|
||||
if (lengthCutoff && stop-k >= lengthCutoff)
|
||||
{
|
||||
if (log)
|
||||
*log << "WARNING: skipping sentence #" << sid
|
||||
<< " with more than 65536 tokens" << endl;
|
||||
expectedTotal -= stop-k;
|
||||
}
|
||||
else
|
||||
{
|
||||
totalCount += stop-k;
|
||||
for (; k < stop; ++k)
|
||||
{
|
||||
// cout << sid << " " << stop-k << " " << k->lemma << " " << k->id() << " " << sizeof(*k) << endl;
|
||||
id_type wid = k->id();
|
||||
while (wid >= cnt.size()) cnt.push_back(0);
|
||||
cnt[wid]++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (this->size() == filter.count())
|
||||
{
|
||||
if (totalCount != expectedTotal)
|
||||
cerr << "OOPS: expected " << expectedTotal
|
||||
<< " tokens but counted " << totalCount << endl;
|
||||
assert(totalCount == expectedTotal);
|
||||
}
|
||||
return totalCount;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
int
|
||||
Ttrack<TKN>::
|
||||
cmp(Position const& A, Position const& B, int keyLength) const
|
||||
{
|
||||
if (keyLength==0) return 2;
|
||||
assert(A.sid < this->size());
|
||||
assert(B.sid < this->size());
|
||||
|
||||
TKN const* a = getToken(A);
|
||||
TKN const* bosA = sntStart(A.sid);
|
||||
TKN const* eosA = sntEnd(A.sid);
|
||||
|
||||
TKN const* b = getToken(B);
|
||||
TKN const* bosB = sntStart(B.sid);
|
||||
TKN const* eosB = sntEnd(B.sid);
|
||||
|
||||
int ret=-1;
|
||||
|
||||
#if 0
|
||||
cerr << "A: "; for (TKN const* x = a; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
|
||||
cerr << "B: "; for (TKN const* x = b; x; x = next(x)) cerr << x->lemma << " "; cerr << endl;
|
||||
#endif
|
||||
|
||||
while (a >= bosA && a < eosA)
|
||||
{
|
||||
// cerr << keyLength << "a. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
|
||||
if (*a < *b) { break; } // return -1;
|
||||
if (*a > *b) { ret = 2; break; } // return 2;
|
||||
a = next(a);
|
||||
b = next(b);
|
||||
// cerr << keyLength << "b. " << (a ? a->lemma : 0) << " " << (b ? b->lemma : 0) << endl;
|
||||
if (--keyLength==0 || b < bosB || b >= eosB)
|
||||
{
|
||||
ret = (a < bosA || a >= eosA) ? 0 : 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// cerr << "RETURNING " << ret << endl;
|
||||
return ret;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
int
|
||||
Ttrack<TKN>::
|
||||
cmp(Position const& A, TKN const* key, int keyLength, int depth) const
|
||||
{
|
||||
if (keyLength==0 || !key) return 2;
|
||||
assert(A.sid < this->size());
|
||||
TKN const* x = getToken(A);
|
||||
TKN const* stopx = x->stop(*this,A.sid);
|
||||
for (int i = 0; i < depth; ++i)
|
||||
{
|
||||
x = x->next();
|
||||
if (x == stopx) return -1;
|
||||
// assert(x != stopx);
|
||||
}
|
||||
while (x != stopx)
|
||||
{
|
||||
if (*x < *key) return -1;
|
||||
if (*x > *key) return 2;
|
||||
key = key->next();
|
||||
x = x->next();
|
||||
if (--keyLength==0) // || !key)
|
||||
return (x == stopx) ? 0 : 1;
|
||||
assert(key);
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
TKN const*
|
||||
Ttrack<TKN>::
|
||||
find_next_within_sentence(TKN const* startKey, int keyLength,
|
||||
Position startHere) const
|
||||
{
|
||||
for (TKN const* t = getToken(startHere); t; t = getToken(startHere))
|
||||
{
|
||||
#if 0
|
||||
int foo = cmp(startHere,startKey,1);
|
||||
if (foo == 0 || foo ==1)
|
||||
{
|
||||
TKN const* k = startKey->next();
|
||||
TKN const* t2 = t->next();
|
||||
if (t2)
|
||||
{
|
||||
cout << t2->lemma << "." << int(t2->minpos) << " "
|
||||
<< k->lemma << "." << int(k->minpos) << " "
|
||||
<< t2->cmp(*k) << endl;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
int x = cmp(startHere,startKey,keyLength,0);
|
||||
if (x == 0 || x == 1) return t;
|
||||
startHere.offset++;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
typename Ttrack<TKN>::Position
|
||||
Ttrack<TKN>::
|
||||
find_first(TKN const* startKey, int keyLength, bdBitset const* filter) const
|
||||
{
|
||||
if (filter)
|
||||
{
|
||||
for (size_t sid = filter->find_first();
|
||||
sid < filter->size();
|
||||
sid = filter->find_next(sid))
|
||||
{
|
||||
TKN const* x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
|
||||
if (x) return Position(sid,x-sntStart(sid));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (size_t sid = 0; sid < this->size(); ++sid)
|
||||
{
|
||||
TKN const* x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
|
||||
if (x) return Position(sid,x-sntStart(sid));
|
||||
}
|
||||
}
|
||||
return Position(this->size(),0);
|
||||
}
|
||||
|
||||
template<typename TKN>
|
||||
typename Ttrack<TKN>::Position
|
||||
Ttrack<TKN>::
|
||||
find_next(TKN const* startKey, int keyLength, Position startAfter, bdBitset const* filter) const
|
||||
{
|
||||
id_type sid = startAfter.sid;
|
||||
startAfter.offset++;
|
||||
if (filter) assert(filter->test(sid));
|
||||
TKN const* x = find_next_within_sentence(startKey,keyLength,startAfter);
|
||||
if (x) return Position(sid,x -sntStart(sid));
|
||||
if (filter)
|
||||
{
|
||||
for (sid = filter->find_next(sid); sid < filter->size(); sid = filter->find_next(sid))
|
||||
{
|
||||
x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
|
||||
if (x) break;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (++sid; sid < this->size(); sid++)
|
||||
{
|
||||
x = find_next_within_sentence(startKey,keyLength,Position(sid,0));
|
||||
if (x) break;
|
||||
}
|
||||
}
|
||||
if (x)
|
||||
return Position(sid,x-sntStart(sid));
|
||||
else
|
||||
return Position(this->size(),0);
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
9
moses/mm/ug_ttrack_position.cc
Normal file
9
moses/mm/ug_ttrack_position.cc
Normal file
@ -0,0 +1,9 @@
|
||||
#include "ug_ttrack_position.h"
|
||||
namespace ugdiss
|
||||
{
|
||||
namespace ttrack
|
||||
{
|
||||
Position::Position() : sid(0), offset(0) {};
|
||||
Position::Position(id_type _sid, ushort _off) : sid(_sid), offset(_off) {};
|
||||
}
|
||||
}
|
89
moses/mm/ug_ttrack_position.h
Normal file
89
moses/mm/ug_ttrack_position.h
Normal file
@ -0,0 +1,89 @@
|
||||
// -*- c++ -*-
|
||||
#ifndef __ug_ttrack_position_h
|
||||
#define __ug_ttrack_position_h
|
||||
|
||||
#include <cassert>
|
||||
#include "ug_typedefs.h"
|
||||
|
||||
// A token position in a Ttrack, with a LESS functor for comparing token
|
||||
// positions in whatever sorting order the underlying token type implies.
|
||||
//
|
||||
// (c) 2007-2010 Ulrich Germann. All rights reserved.
|
||||
|
||||
namespace ugdiss
|
||||
{
|
||||
namespace ttrack
|
||||
{
|
||||
/** Represents a position in a corpus (sentence Id + offset from beginning
|
||||
* of sentence) */
|
||||
class
|
||||
Position
|
||||
{
|
||||
public:
|
||||
id_type sid;
|
||||
ushort offset;
|
||||
Position();
|
||||
Position(id_type _sid, ushort _off);
|
||||
template<typename TTRACK_TYPE> class LESS; // probably abandoned
|
||||
}; // end of deklaration of Position
|
||||
|
||||
#if 1
|
||||
template<typename TTRACK_TYPE>
|
||||
class
|
||||
Position::
|
||||
LESS
|
||||
{
|
||||
TTRACK_TYPE const* c;
|
||||
public:
|
||||
typedef typename TTRACK_TYPE::Token Token;
|
||||
|
||||
LESS(TTRACK_TYPE const* crp) : c(crp) {};
|
||||
|
||||
bool operator()(Position const& A, Position const& B) const
|
||||
{
|
||||
Token const* a = c->getToken(A); assert(a);
|
||||
Token const* b = c->getToken(B); assert(b);
|
||||
|
||||
if (a == b) return false;
|
||||
|
||||
Token const* bosA = c->sntStart(A.sid);
|
||||
Token const* eosA = c->sntEnd(A.sid);
|
||||
|
||||
Token const* bosB = c->sntStart(B.sid);
|
||||
Token const* eosB = c->sntEnd(B.sid);
|
||||
|
||||
#if 0
|
||||
Token const* z = a;
|
||||
cout << "A: " << z->id();
|
||||
for (z = next(z); z >= bosA && z < eosA; z = next(z))
|
||||
cout << "-" << z->id();
|
||||
cout << endl;
|
||||
|
||||
z = b;
|
||||
cout << "B: " << z->id();
|
||||
for (z = next(z); z >= bosB && z < eosB; z = next(z))
|
||||
cout << "-" << z->id();
|
||||
cout << endl;
|
||||
#endif
|
||||
while (*a == *b)
|
||||
{
|
||||
a = next(a);
|
||||
b = next(b);
|
||||
if (a < bosA || a >= eosA)
|
||||
return (b >= bosB && b < eosB);
|
||||
if (b < bosB || b >= eosB)
|
||||
return false;
|
||||
}
|
||||
int x = a->cmp(*b);
|
||||
|
||||
// cout << " " << (x < 0 ? "YES" : "NO") << endl;
|
||||
|
||||
assert (x != 0);
|
||||
return x < 0;
|
||||
}
|
||||
}; // end of definition of LESS
|
||||
#endif
|
||||
} // end of namespace ttrack
|
||||
} // end of namespace ugdiss
|
||||
#endif
|
||||
|
36
moses/mm/ug_typedefs.h
Normal file
36
moses/mm/ug_typedefs.h
Normal file
@ -0,0 +1,36 @@
|
||||
// -*- c++ -*-
|
||||
// typedefs for Uli Germann's stuff
|
||||
#ifndef __ug_typedefs_h
|
||||
#define __ug_typedefs_h
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <boost/shared_ptr.hpp>
|
||||
#include <boost/scoped_ptr.hpp>
|
||||
#include <vector>
|
||||
#include <stdint.h>
|
||||
#include "tpt_typedefs.h"
|
||||
namespace ugdiss
|
||||
{
|
||||
using namespace std;
|
||||
typedef boost::dynamic_bitset<uint64_t> bitvector;
|
||||
|
||||
typedef vector<vector<float> > flt_2d_table;
|
||||
typedef vector<flt_2d_table> flt_3d_table;
|
||||
typedef vector<flt_3d_table> flt_4d_table;
|
||||
|
||||
typedef vector<vector<ushort> > ushort_2d_table;
|
||||
typedef vector<ushort_2d_table> ushort_3d_table;
|
||||
typedef vector<ushort_3d_table> ushort_4d_table;
|
||||
|
||||
typedef vector<vector<short> > short_2d_table;
|
||||
typedef vector<short_2d_table> short_3d_table;
|
||||
typedef vector<short_3d_table> short_4d_table;
|
||||
|
||||
typedef vector<vector<int> > int_2d_table;
|
||||
typedef vector<int_2d_table> int_3d_table;
|
||||
typedef vector<int_3d_table> int_4d_table;
|
||||
}
|
||||
|
||||
#define sptr boost::shared_ptr
|
||||
#define scoptr boost::scoped_ptr
|
||||
#define rcast reinterpret_cast
|
||||
#endif
|
Loading…
Reference in New Issue
Block a user