Made a few things compile with Makefile outside of the bjam compile.

This commit is contained in:
Ulrich Germann 2015-10-30 14:31:24 +00:00
parent 07c8614877
commit b40bc81811
18 changed files with 190 additions and 66 deletions

View File

@ -1,5 +1,5 @@
#BUILDING MOSES
#
#PACKAGES
#Language models (optional):
#--with-irstlm=/path/to/irstlm
@ -245,7 +245,7 @@ if [ option.get "with-mm" : : "yes" ]
moses/TranslationModel/UG//ptable-describe-features
moses/TranslationModel/UG//count-ptable-features
moses/TranslationModel/UG//ptable-lookup
# moses/TranslationModel/UG//spe-check-coverage
moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG/mm//mtt-demo1
moses/TranslationModel/UG/mm//mtt-build
moses/TranslationModel/UG/mm//mtt-dump
@ -256,6 +256,7 @@ if [ option.get "with-mm" : : "yes" ]
moses/TranslationModel/UG/mm//mmlex-lookup
moses/TranslationModel/UG/mm//mtt-count-words
moses/TranslationModel/UG/mm//calc-coverage
moses/TranslationModel/UG//check-coverage
moses/TranslationModel/UG//try-align
;
}

View File

@ -59,6 +59,18 @@ $(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil
;
exe check-coverage :
check-coverage.cc
$(TOP)/moses//moses
$(TOP)/moses/TranslationModel/UG/generic//generic
$(TOP)//boost_iostreams
$(TOP)//boost_filesystem
$(TOP)//boost_program_options
$(TOP)/moses/TranslationModel/UG/mm//mm
$(TOP)/moses/TranslationModel/UG//mmsapt
$(TOP)/util//kenutil
;
exe sim-pe :
sim-pe.cc
$(TOP)/moses//moses

View File

@ -17,7 +17,7 @@ echo $$d
endef
MOSES_ROOT := $(shell $(find_moses_root))
$(info MOSES_ROOT=${MOSES_ROOT})
# ===============================================================================
# COMPILATION PREFERENCES
# ===============================================================================
@ -35,7 +35,9 @@ CXXFLAGS += -DMAX_NUM_FACTORS=4
CXXFLAGS += -DKENLM_MAX_ORDER=5
CXXFLAGS += -DWITH_THREADS
CXXFLAGS += -DNO_MOSES
CXXFLAGS += -I${MOSES_ROOT} -I.
CXXFLAGS += -DMMT
CXXFLAGS += -I$(dir ${MOSES_ROOT})mmt-only
CXXFLAGS += -I${MOSES_ROOT} -I. -I${MOSES_ROOT}/opt/include
ifeq ($(variant),debug)
CXXFLAGS += -ggdb -O0
@ -45,7 +47,7 @@ else ifeq ($(variant),syntax)
CXXFLAGS += -fsyntax-only
endif
# LDFLAGS = -L${MOSES_ROOT}/lib -L ./lib/
LDFLAGS = -L${MOSES_ROOT}/opt/lib64 -L./lib/
# WDIR = build/$(variant)/${HOSTTYPE}/${KERNEL}
WDIR = build/$(variant)
@ -60,14 +62,22 @@ nil:
# libraries required
LIBS = m z bz2 pthread dl ${BOOSTLIBS}
#LIBS += tcmalloc
BOOSTLIBS := thread system filesystem program_options iostreams
BOOSTLIBS := $(addprefix boost_,${BOOSTLIBS})
ifdef ($(BOOSTLIBTAG),"")
BOOSTLIBS := program_options iostreams thread system filesystem
BOOSTLIBS := $(addprefix -lboost_,${BOOSTLIBS})
ifeq ($(BOOSTLIBTAG),"")
BOOSTLIBS := $(addsuffix ${BOOSTLIBTAG},${BOOSTLIBS})
endif
STATIC_LIBS = m bz2 z dl rt
DYNAMIC_LIBS = pthread
#DYNAMIC_LIBS += tcmalloc
LIBS = -Wl,-B$(link)
LIBS += -L${MOSES_ROOT}/opt/lib64 ${BOOSTLIBS}
LIBS += $(addprefix -l,${STATIC_LIBS})
LIBS += -Wl,-Bdynamic
LIBS += $(addprefix -l,${DYNAMIC_LIBS})
cc2obj = $(addsuffix .o,$(patsubst ${MOSES_ROOT}%,$(WDIR)%,\
$(patsubst .%,$(WDIR)%,$(basename $1))))
cc2exe = $(addprefix ./bin/$(variant)/,$(basename $(notdir $1)))
@ -79,7 +89,7 @@ DEP += $(basename $(call cc2obj,$1)).d
$(call cc2obj,$1): $1
@echo -e "COMPILING $1"
@mkdir -p $$(@D)
@${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
${CXX} ${CXXFLAGS} -MD -MP -c $$< -o $$@
endef
@ -90,7 +100,7 @@ $(call cc2exe,$1): $(call cc2obj,$1) $(LIBOBJ)
ifneq ($(variant),syntax)
@echo -e "LINKING $$@"
@mkdir -p $${@D}
@${CXX} ${CXXFLAGS} -o $$@ $(LIBOBJ) $(addprefix -l,${LIBS}) $$<
${CXX} ${CXXFLAGS} -o $$@ $$< $(LIBOBJ) ${LIBS}
endif
endef
@ -106,7 +116,8 @@ skip += ug_splice_arglist.cc
# skip += ug_lexical_reordering.cc
# objects from elsewhere in the moses tree that are needed
extra = ${MOSES_ROOT}/util/exception.cc
extra = ${MOSES_ROOT}/util/exception.cc
extra += ${MOSES_ROOT}/util/integer_to_string.cc
$(foreach f,$(skip),$(eval broken+=$(shell find -name $f)))
broken += $(wildcard ./mm/stashed/*)

View File

@ -0,0 +1,81 @@
// #include "mmsapt.h"
// #include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h"
// #include "moses/TranslationTask.h"
#include <boost/foreach.hpp>
#include <boost/format.hpp>
#include <boost/tokenizer.hpp>
#include <boost/shared_ptr.hpp>
#include <algorithm>
#include <iostream>
#include "mm/ug_bitext.h"
#include "generic/file_io/ug_stream.h"
#include <string>
#include <sstream>
using namespace Moses;
using namespace sapt;
using namespace std;
using namespace boost;
typedef sapt::L2R_Token<sapt::SimpleWordId> Token;
typedef mmBitext<Token> bitext_t;
struct mycmp
{
bool operator() (pair<string,uint32_t> const& a,
pair<string,uint32_t> const& b) const
{
return a.second > b.second;
}
};
string
basename(string const path, string const suffix)
{
size_t p = path.find_last_of("/");
size_t k = path.size() - suffix.size();
cout << path << " " << suffix << endl;
cout << path.substr(0,p) << " " << path.substr(k) << endl;
return path.substr(p, suffix == &path[k] ? k-p : path.size() - p);
}
int main(int argc, char* argv[])
{
bitext_t B;
B.open(argv[1],argv[2],argv[3]);
string line;
string ifile = argv[4];
string docname = basename(ifile, string(".") + argv[2] + ".gz");
boost::iostreams::filtering_istream in;
ugdiss::open_input_stream(ifile,in);
while(getline(in,line))
{
cout << line << " [" << docname << "]" << endl;
vector<id_type> snt;
B.V1->fillIdSeq(line,snt);
for (size_t i = 0; i < snt.size(); ++i)
{
bitext_t::iter m(B.I1.get());
for (size_t k = i; k < snt.size() && m.extend(snt[k]); ++k)
{
if (m.ca() > 500) continue;
sapt::tsa::ArrayEntry I(m.lower_bound(-1));
char const* stop = m.upper_bound(-1);
map<string,uint32_t> cnt;
while (I.next != stop)
{
m.root->readEntry(I.next,I);
++cnt[B.docname(I.sid)];
}
cout << setw(8) << int(m.ca()) << " " << B.V1->toString(&snt[i],&snt[k+1]) << endl;
typedef pair<string,uint32_t> entry;
vector<entry> ranked; ranked.reserve(cnt.size());
BOOST_FOREACH(entry const& e, cnt) ranked.push_back(e);
sort(ranked.begin(),ranked.end(),mycmp());
BOOST_FOREACH(entry const& e, ranked)
cout << setw(12) << " " << e.second << " " << e.first << endl;
cout << endl;
}
}
}
}

View File

@ -29,7 +29,7 @@ HOST ?= $(shell hostname)
HOSTTYPE ?= $(shell uname -m)
KERNEL = $(shell uname -r)
MOSES_ROOT = ${HOME}/code/mosesdecoder
MOSES_ROOT ?= ${HOME}/code/mosesdecoder
WDIR = build/${HOSTTYPE}/${KERNEL}/${OPTI}
VPATH = ${HOME}/code/mosesdecoder/
CXXFLAGS = ${PROF} -ggdb -Wall -O${OPTI} ${INCLUDES}

View File

@ -28,8 +28,8 @@ Bitext<Token>::agenda
while (j->nextSample(sid,offset))
{
aln.clear();
int po_fwd = Moses::LRModel::NONE;
int po_bwd = Moses::LRModel::NONE;
int po_fwd = LRModel::NONE;
int po_bwd = LRModel::NONE;
int docid = j->m_bias ? j->m_bias->GetClass(sid) : -1;
bitvector* full_aln = j->fwd ? &full_alignment : NULL;

View File

@ -17,7 +17,7 @@ namespace sapt
jstats()
: my_rcnt(0), my_cnt2(0), my_wcnt(0), my_bcnt(0)
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
for (int i = 0; i <= LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
my_aln.reserve(1);
}
@ -30,7 +30,7 @@ namespace sapt
my_bcnt = other.bcnt();
my_aln = other.aln();
indoc = other.indoc;
for (int i = 0; i <= Moses::LRModel::NONE; i++)
for (int i = 0; i <= LRModel::NONE; i++)
{
ofwd[i] = other.ofwd[i];
obwd[i] = other.obwd[i];
@ -41,7 +41,7 @@ namespace sapt
jstats::
dcnt_fwd(PhraseOrientation const idx) const
{
assert(idx <= Moses::LRModel::NONE);
assert(idx <= LRModel::NONE);
return ofwd[idx];
}
@ -49,7 +49,7 @@ namespace sapt
jstats::
dcnt_bwd(PhraseOrientation const idx) const
{
assert(idx <= Moses::LRModel::NONE);
assert(idx <= LRModel::NONE);
return obwd[idx];
}

View File

@ -24,8 +24,8 @@ namespace sapt
std::vector<std::pair<size_t, std::vector<unsigned char> > > my_aln;
// internal word alignment
uint32_t ofwd[Moses::LRModel::NONE+1]; // forward distortion type counts
uint32_t obwd[Moses::LRModel::NONE+1]; // backward distortion type counts
uint32_t ofwd[LRModel::NONE+1]; // forward distortion type counts
uint32_t obwd[LRModel::NONE+1]; // backward distortion type counts
public:
std::map<uint32_t,uint32_t> indoc;
@ -48,8 +48,8 @@ namespace sapt
bool valid();
uint32_t dcnt_fwd(PhraseOrientation const idx) const;
uint32_t dcnt_bwd(PhraseOrientation const idx) const;
void fill_lr_vec(Moses::LRModel::Direction const& dir,
Moses::LRModel::ModelType const& mdl,
void fill_lr_vec(LRModel::Direction const& dir,
LRModel::ModelType const& mdl,
std::vector<float>& v);
};
}

View File

@ -12,7 +12,7 @@ namespace sapt
pstats::
pstats() : raw_cnt(0), sample_cnt(0), good(0), sum_pairs(0), in_progress(0)
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
for (int i = 0; i <= LRModel::NONE; ++i)
ofwd[i] = obwd[i] = 0;
}

View File

@ -30,8 +30,8 @@ namespace sapt
size_t sum_pairs; // total number of target phrases extracted (can be > raw_cnt)
size_t in_progress; // how many threads are currently working on this?
uint32_t ofwd[Moses::LRModel::NONE+1]; // distribution of fwd phrase orientations
uint32_t obwd[Moses::LRModel::NONE+1]; // distribution of bwd phrase orientations
uint32_t ofwd[LRModel::NONE+1]; // distribution of fwd phrase orientations
uint32_t obwd[LRModel::NONE+1]; // distribution of bwd phrase orientations
indoc_map_t indoc;
trg_map_t trg;
@ -43,14 +43,14 @@ namespace sapt
bool
add(uint64_t const pid, // target phrase id
float const w, // sample weight (1./(# of phrases extractable))
float const b, // sample bias score
alnvec const& a, // local alignment
uint32_t const cnt2, // raw target phrase count
uint32_t fwd_o, // fwd. phrase orientation
uint32_t bwd_o, // bwd. phrase orientation
int const docid); // document where sample was found
float const w, // sample weight (1./(# of phrases extractable))
float const b, // sample bias score
alnvec const& a, // local alignment
uint32_t const cnt2, // raw target phrase count
uint32_t fwd_o, // fwd. phrase orientation
uint32_t bwd_o, // bwd. phrase orientation
int const docid); // document where sample was found
void
count_sample(int const docid, // document where sample was found
size_t const num_pairs, // # of phrases extractable here

View File

@ -74,8 +74,11 @@ BitextSampler : public Moses::reference_counter
public:
BitextSampler(BitextSampler const& other);
BitextSampler const& operator=(BitextSampler const& other);
BitextSampler(bitext const* const bitext, typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias, size_t const min_samples, size_t const max_samples,
BitextSampler(bitext const* const bitext,
typename bitext::iter const& phrase,
SPTR<SamplingBias const> const& bias,
size_t const min_samples,
size_t const max_samples,
sampling_method const method);
~BitextSampler();
SPTR<pstats> stats();

View File

@ -227,7 +227,9 @@ namespace sapt
// Now sort the array
if (log) *log << "sorting .... with " << threads << " threads." << std::endl;
#ifndef NO_MOSES
double start_time = util::WallTime();
#endif
boost::scoped_ptr<ug::ThreadPool> tpool;
tpool.reset(new ug::ThreadPool(threads));
@ -252,8 +254,10 @@ namespace sapt
}
}
tpool.reset();
#ifndef NO_MOSES
if (log) *log << "Done sorting after " << util::WallTime() - start_time
<< " seconds." << std::endl;
#endif
this->startArray = reinterpret_cast<char const*>(&(*sufa.begin()));
this->endArray = reinterpret_cast<char const*>(&(*sufa.end()));
this->numTokens = sufa.size();

View File

@ -4,7 +4,7 @@ namespace sapt
{
using namespace std;
Moses::LRModel::ReorderingType po_other = Moses::LRModel::NONE;
LRModel::ReorderingType po_other = LRModel::NONE;
// check if min and max in the aligmnet vector v are within the
// bounds LFT and RGT and update the actual bounds L and R; update
// the total count of alignment links in the underlying phrase
@ -83,54 +83,56 @@ namespace sapt
return ret;
}
Moses::LRModel::ReorderingType
// LRModel::ReorderingType
sapt::PhraseOrientation
find_po_fwd(vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2,
size_t s1, size_t e1,
size_t s2, size_t e2)
{
if (e2 == a2.size()) // end of target sentence
return Moses::LRModel::M;
return LRModel::M;
size_t y = e2, L = e2, R = a2.size()-1; // won't change
size_t x = e1, T = e1, B = a1.size()-1;
if (e1 < a1.size() && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::M;
return LRModel::M;
B = x = s1-1; T = 0;
if (s1 && expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S;
return LRModel::S;
while (e2 < a2.size() && a2[e2].size() == 0) ++e2;
if (e2 == a2.size()) // should never happen, actually
return Moses::LRModel::NONE;
return LRModel::NONE;
if (a2[e2].back() < s1)
return Moses::LRModel::DL;
return LRModel::DL;
if (a2[e2].front() >= e1)
return Moses::LRModel::DR;
return Moses::LRModel::NONE;
return LRModel::DR;
return LRModel::NONE;
}
Moses::LRModel::ReorderingType
// LRModel::ReorderingType
PhraseOrientation
find_po_bwd(vector<vector<ushort> >& a1,
vector<vector<ushort> >& a2,
size_t s1, size_t e1,
size_t s2, size_t e2)
{
if (s1 == 0 && s2 == 0) return Moses::LRModel::M;
if (s2 == 0) return Moses::LRModel::DR;
if (s1 == 0) return Moses::LRModel::DL;
if (s1 == 0 && s2 == 0) return LRModel::M;
if (s2 == 0) return LRModel::DR;
if (s1 == 0) return LRModel::DL;
size_t y = s2-1, L = 0, R = s2-1; // won't change
size_t x = s1-1, T = 0, B = s1-1;
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::M;
return LRModel::M;
T = x = e1; B = a1.size()-1;
if (expand_block(a1,a2,x,y,T,L,B,R) >= 0)
return Moses::LRModel::S;
return LRModel::S;
while (s2-- && a2[s2].size() == 0);
Moses::LRModel::ReorderingType ret;
LRModel::ReorderingType ret;
ret = (a2[s2].size() == 0 ? po_other :
a2[s2].back() < s1 ? Moses::LRModel::DR :
a2[s2].front() >= e1 ? Moses::LRModel::DL :
a2[s2].back() < s1 ? LRModel::DR :
a2[s2].front() >= e1 ? LRModel::DL :
po_other);
#if 0
cout << "s1=" << s1 << endl;

View File

@ -12,7 +12,7 @@ namespace sapt {
#ifdef NO_MOSES
class LRModel{
public:
enum ModelType { Monotonic, MSD, MSLR, LeftRight, None };
enum Direction { Forward, Backward, Bidirectional };

View File

@ -26,8 +26,8 @@ namespace sapt
uint32_t raw1, raw2, sample1, sample2, good1, good2, joint;
float cum_bias;
std::vector<float> fvals;
float dfwd[Moses::LRModel::NONE+1]; // distortion counts // counts or probs?
float dbwd[Moses::LRModel::NONE+1]; // distortion counts
float dfwd[LRModel::NONE+1]; // distortion counts // counts or probs?
float dbwd[LRModel::NONE+1]; // distortion counts
std::vector<unsigned char> aln;
float score;
bool inverse;
@ -125,7 +125,7 @@ namespace sapt
// }
// should we do that here or leave the raw counts?
for (int i = 0; i <= Moses::LRModel::NONE; i++)
for (int i = 0; i <= LRModel::NONE; i++)
{
PhraseOrientation po = static_cast<PhraseOrientation>(i);
dfwd[i] = js.dcnt_fwd(po);
@ -201,7 +201,7 @@ namespace sapt
, inverse(o.inverse)
, indoc(o.indoc)
{
for (int i = 0; i <= Moses::LRModel::NONE; ++i)
for (int i = 0; i <= LRModel::NONE; ++i)
{
dfwd[i] = o.dfwd[i];
dbwd[i] = o.dbwd[i];

View File

@ -63,7 +63,9 @@ namespace Moses
, btfix(new mmbitext)
, m_bias_log(NULL)
, m_bias_loglevel(0)
#ifndef NO_MOSES
, m_lr_func(NULL)
#endif
, m_sampling_method(random_sampling)
, bias_key(((char*)this)+3)
, cache_key(((char*)this)+2)
@ -597,6 +599,7 @@ namespace Moses
// Evaluate with all features that can be computed using available factors
tp->EvaluateInIsolation(src, m_featuresToApply);
#ifndef NO_MOSES
if (m_lr_func)
{
LRModel::ModelType mdl = m_lr_func->GetModel().GetModelType();
@ -605,6 +608,7 @@ namespace Moses
pool.fill_lr_vec(dir, mdl, *scores);
tp->SetExtraScores(m_lr_func, scores);
}
#endif
return tp;
}
@ -879,6 +883,7 @@ namespace Moses
if (!context->cache1) context->cache1.reset(new pstats::cache_t);
if (!context->cache2) context->cache2.reset(new pstats::cache_t);
#ifndef NO_MOSES
if (m_lr_func_name.size() && m_lr_func == NULL)
{
FeatureFunction* lr = &FeatureFunction::FindFeatureFunction(m_lr_func_name);
@ -887,6 +892,7 @@ namespace Moses
<< " does not seem to be a lexical reordering function!");
// todo: verify that lr_func implements a hierarchical reordering model
}
#endif
}
bool

View File

@ -26,7 +26,9 @@
#include "moses/TranslationModel/UG/TargetPhraseCollectionCache.h"
#ifndef NO_MOSES
#include "moses/FF/LexicalReordering/LexicalReordering.h"
#endif
#include "moses/InputFileStream.h"
#include "moses/FactorTypeSet.h"
@ -82,7 +84,9 @@ namespace Moses
boost::scoped_ptr<std::ofstream> m_bias_logger; // for logging to a file
std::ostream* m_bias_log;
int m_bias_loglevel;
#ifndef NO_MOSES
LexicalReordering* m_lr_func; // associated lexical reordering function
#endif
std::string m_lr_func_name; // name of associated lexical reordering function
sapt::sampling_method m_sampling_method; // sampling method, see ug_bitext_sampler
boost::scoped_ptr<ug::ThreadPool> m_thread_pool;

View File

@ -1,7 +1,7 @@
# -*- makefile -*-
# # -*- makefile -*-
MOSES_CODE=/fs/gna0/germann/code/mosesdecoder
MOSES_ROOT=/fs/gna0/germann/moses
LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams)
ibm1-align: ibm1-align.cc
g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb
# MOSES_CODE=/fs/gna0/germann/code/mosesdecoder
# MOSES_ROOT=/fs/gna0/germann/moses
# LIBS = $(addprefix -l,moses icuuc icuio icui18n boost_iostreams)
# ibm1-align: ibm1-align.cc
# g++ -o $@ -L ${MOSES_ROOT}/lib -I ${MOSES_CODE} $^ ${LIBS} -ggdb