mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-25 04:43:03 +03:00
Added member function find_trg_phrase_bound(PhraseExtractionRecord& rec) to Bitext class.
This commit is contained in:
parent
8ae2894107
commit
f87f123366
@ -35,6 +35,7 @@
|
|||||||
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
|
#include "moses/TranslationModel/UG/generic/sampling/Sampling.h"
|
||||||
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
|
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
|
||||||
#include "moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h"
|
#include "moses/TranslationModel/UG/generic/threading/ug_thread_safe_counter.h"
|
||||||
|
#include "moses/TranslationModel/UG/generic/threading/ug_ref_counter.h"
|
||||||
// #include "moses/FF/LexicalReordering/LexicalReorderingState.h"
|
// #include "moses/FF/LexicalReordering/LexicalReorderingState.h"
|
||||||
#include "moses/Util.h"
|
#include "moses/Util.h"
|
||||||
|
|
||||||
@ -97,6 +98,7 @@ namespace Moses {
|
|||||||
class Bitext : public reference_counter
|
class Bitext : public reference_counter
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
template<typename Token> friend class BitextSampler;
|
||||||
typedef TKN Token;
|
typedef TKN Token;
|
||||||
typedef typename TSA<Token>::tree_iterator iter;
|
typedef typename TSA<Token>::tree_iterator iter;
|
||||||
typedef typename std::vector<PhrasePair<Token> > vec_ppair;
|
typedef typename std::vector<PhrasePair<Token> > vec_ppair;
|
||||||
@ -136,6 +138,7 @@ namespace Moses {
|
|||||||
// points of the target phrase; if non-NULL, store word
|
// points of the target phrase; if non-NULL, store word
|
||||||
// alignments in *core_alignment. If /flip/, source phrase is
|
// alignments in *core_alignment. If /flip/, source phrase is
|
||||||
// L2.
|
// L2.
|
||||||
|
bool find_trg_phr_bounds(PhraseExtractionRecord& rec);
|
||||||
bool find_trg_phr_bounds
|
bool find_trg_phr_bounds
|
||||||
( size_t const sid, // sentence to investigate
|
( size_t const sid, // sentence to investigate
|
||||||
size_t const start, // start of source phrase
|
size_t const start, // start of source phrase
|
||||||
@ -147,8 +150,6 @@ namespace Moses {
|
|||||||
bitvector* full_alignment, // stores full word alignment for this sent.
|
bitvector* full_alignment, // stores full word alignment for this sent.
|
||||||
bool const flip) const; // flip source and target (reverse lookup)
|
bool const flip) const; // flip source and target (reverse lookup)
|
||||||
|
|
||||||
bool find_trg_phr_bounds(PhraseExtractionRecord& rec) const;
|
|
||||||
|
|
||||||
// prep2 launches sampling and returns immediately.
|
// prep2 launches sampling and returns immediately.
|
||||||
// lookup (below) waits for the job to finish before it returns
|
// lookup (below) waits for the job to finish before it returns
|
||||||
sptr<pstats>
|
sptr<pstats>
|
||||||
@ -182,7 +183,6 @@ namespace Moses {
|
|||||||
void prep(ttasksptr const& ttask, iter const& phrase) const;
|
void prep(ttasksptr const& ttask, iter const& phrase) const;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
void setDefaultSampleSize(size_t const max_samples);
|
void setDefaultSampleSize(size_t const max_samples);
|
||||||
size_t getDefaultSampleSize() const;
|
size_t getDefaultSampleSize() const;
|
||||||
|
|
||||||
@ -337,21 +337,24 @@ namespace Moses {
|
|||||||
bool
|
bool
|
||||||
Bitext<Token>::
|
Bitext<Token>::
|
||||||
find_trg_phr_bounds
|
find_trg_phr_bounds
|
||||||
(size_t const sid, size_t const start, size_t const stop,
|
( size_t const sid, // sentence to investigate
|
||||||
size_t & s1, size_t & s2, size_t & e1, size_t & e2,
|
size_t const start, // start of source phrase
|
||||||
int & po_fwd, int & po_bwd,
|
size_t const stop, // last position of source phrase
|
||||||
std::vector<uchar>* core_alignment, bitvector* full_alignment,
|
size_t & s1, size_t & s2, // beginning and end of target start
|
||||||
bool const flip) const
|
size_t & e1, size_t & e2, // beginning and end of target end
|
||||||
|
int& po_fwd, int& po_bwd, // phrase orientations
|
||||||
|
std::vector<uchar> * core_alignment, // stores the core alignment
|
||||||
|
bitvector* full_alignment, // stores full word alignment for this sent.
|
||||||
|
bool const flip) const // flip source and target (reverse lookup)
|
||||||
{
|
{
|
||||||
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
|
// if (core_alignment) cout << "HAVE CORE ALIGNMENT" << endl;
|
||||||
|
// a word on the core_alignment (core_alignment):
|
||||||
// a word on the core_alignment:
|
|
||||||
//
|
//
|
||||||
// since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1
|
// Since fringe words ([s1,...,s2),[e1,..,e2) if s1 < s2, or e1
|
||||||
// < e2, respectively) are be definition unaligned, we store
|
// < e2, respectively) are be definition unaligned, we store
|
||||||
// only the core alignment in *core_alignment it is up to the
|
// only the core alignment in *aln. It is up to the calling
|
||||||
// calling function to shift alignment points over for start
|
// function to shift alignment points over for start positions
|
||||||
// positions of extracted phrases that start with a fringe word
|
// of extracted phrases that start with a fringe word
|
||||||
assert(T1);
|
assert(T1);
|
||||||
assert(T2);
|
assert(T2);
|
||||||
assert(Tx);
|
assert(Tx);
|
||||||
@ -378,18 +381,28 @@ namespace Moses {
|
|||||||
size_t lft = forbidden.size();
|
size_t lft = forbidden.size();
|
||||||
size_t rgt = 0;
|
size_t rgt = 0;
|
||||||
std::vector<std::vector<ushort> > aln1(slen1),aln2(slen2);
|
std::vector<std::vector<ushort> > aln1(slen1),aln2(slen2);
|
||||||
|
|
||||||
|
// process word alignment for this sentence
|
||||||
char const* p = Tx->sntStart(sid);
|
char const* p = Tx->sntStart(sid);
|
||||||
char const* x = Tx->sntEnd(sid);
|
char const* x = Tx->sntEnd(sid);
|
||||||
|
|
||||||
while (p < x)
|
while (p < x)
|
||||||
{
|
{
|
||||||
if (flip) { p = binread(p,trg); assert(p<x); p = binread(p,src); }
|
if (flip)
|
||||||
else { p = binread(p,src); assert(p<x); p = binread(p,trg); }
|
{
|
||||||
|
p = binread(p,trg);
|
||||||
|
assert(p<x);
|
||||||
|
p = binread(p,src);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
p = binread(p,src);
|
||||||
|
assert(p<x);
|
||||||
|
p = binread(p,trg);
|
||||||
|
}
|
||||||
|
|
||||||
UTIL_THROW_IF2((src >= slen1 || trg >= slen2),
|
UTIL_THROW_IF2((src >= slen1 || trg >= slen2),
|
||||||
"Alignment range error at sentence " << sid << "!\n"
|
"Alignment range error at sentence " << sid << "!\n"
|
||||||
<< src << "/" << slen1 << " " <<
|
<< src << "/" << slen1 << " " << trg << "/" << slen2);
|
||||||
trg << "/" << slen2);
|
|
||||||
|
|
||||||
if (src < start || src >= stop)
|
if (src < start || src >= stop)
|
||||||
forbidden.set(trg);
|
forbidden.set(trg);
|
||||||
|
Loading…
Reference in New Issue
Block a user