mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-05 15:58:03 +03:00
Support storing coordinates of target phrase
- Keep track of named spaces in StaticData - Adding coords to phrases implemented for Mmsapt
This commit is contained in:
parent
d29916bbb3
commit
ae1e51d81a
@ -68,13 +68,8 @@ public:
|
||||
size_t m_frontSpanCoveredLength;
|
||||
// how many words from the beginning are covered
|
||||
|
||||
// Coordinates in user-defined spaces, indexed by phrase dictionary pointer
|
||||
// Looking up PD* returns a vector of the input's coordinates in each space
|
||||
// known to the PD, in order (vector of pointers to float vectors). This
|
||||
// allows different models to use different subsets of all named spaces.
|
||||
typedef std::vector<boost::shared_ptr<std::vector<float> > > INCOORD;
|
||||
typedef std::map<PhraseDictionary const*, INCOORD> PD2IC;
|
||||
boost::shared_ptr<PD2IC> m_pd2InputCoord;
|
||||
// Coordinates in user-defined spaces (see "coord" XML tag)
|
||||
SPTR<std::map<size_t const, std::vector<float> > > m_coordMap;
|
||||
|
||||
InputType(AllOptions::ptr const& opts, long translationId = 0);
|
||||
virtual ~InputType();
|
||||
|
@ -936,4 +936,25 @@ void StaticData::ResetWeights(const std::string &denseWeights, const std::string
|
||||
}
|
||||
}
|
||||
|
||||
size_t StaticData::GetCoordSpace(string space) const
|
||||
{
|
||||
map<string, size_t>::const_iterator m = m_coordSpaceMap.find(space);
|
||||
if(m == m_coordSpaceMap.end()) {
|
||||
return 0;
|
||||
}
|
||||
return m->second;
|
||||
}
|
||||
|
||||
size_t StaticData::MapCoordSpace(string space)
|
||||
{
|
||||
map<string, size_t>::const_iterator m = m_coordSpaceMap.find(space);
|
||||
if (m != m_coordSpaceMap.end()) {
|
||||
return m->second;
|
||||
}
|
||||
size_t id = m_coordSpaceNextID;
|
||||
m_coordSpaceNextID += 1;
|
||||
m_coordSpaceMap[space] = id;
|
||||
return id;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -60,7 +60,7 @@ class PhraseDictionaryDynamicCacheBased;
|
||||
typedef std::pair<std::string, float> UnknownLHSEntry;
|
||||
typedef std::vector<UnknownLHSEntry> UnknownLHSList;
|
||||
|
||||
/** Contains global variables and contants.
|
||||
/** Contains global variables and constants.
|
||||
* Only 1 object of this class should be instantiated.
|
||||
* A const object of this class is accessible by any function during decoding by calling StaticData::Instance();
|
||||
*/
|
||||
@ -152,6 +152,12 @@ protected:
|
||||
bool ini_performance_options();
|
||||
|
||||
void initialize_features();
|
||||
|
||||
// Coordinate space name map for matching spaces across XML input ("coord"
|
||||
// tag) and feature functions that assign or use coordinates on target phrases
|
||||
std::map< std::string const, size_t > m_coordSpaceMap;
|
||||
size_t m_coordSpaceNextID = 1;
|
||||
|
||||
public:
|
||||
|
||||
//! destructor
|
||||
@ -394,6 +400,9 @@ public:
|
||||
return m_requireSortingAfterSourceContext;
|
||||
}
|
||||
|
||||
// Coordinate spaces
|
||||
size_t GetCoordSpace(std::string space) const;
|
||||
size_t MapCoordSpace(std::string space);
|
||||
};
|
||||
|
||||
}
|
||||
|
@ -333,6 +333,29 @@ SetExtraScores(FeatureFunction const* ff,
|
||||
m_cached_scores[ff] = s;
|
||||
}
|
||||
|
||||
vector<vector<float> const*> const&
|
||||
TargetPhrase::
|
||||
GetCoordList(size_t const spaceID) const
|
||||
{
|
||||
UTIL_THROW_IF2(!m_cached_coord,
|
||||
"No coordinates known for target phrase");
|
||||
CoordCache_t::const_iterator m = m_cached_coord->find(spaceID);
|
||||
UTIL_THROW_IF2(m == m_cached_coord->end(),
|
||||
"No coordinates known in given space for target phrase");
|
||||
return m->second;
|
||||
}
|
||||
|
||||
void
|
||||
TargetPhrase::
|
||||
PushCoord(size_t const spaceID,
|
||||
vector<float> const* coord)
|
||||
{
|
||||
if (!m_cached_coord) {
|
||||
m_cached_coord.reset(new CoordCache_t);
|
||||
}
|
||||
vector<vector<float> const *>& coordList = (*m_cached_coord)[spaceID];
|
||||
coordList.push_back(coord);
|
||||
}
|
||||
|
||||
void TargetPhrase::SetProperties(const StringPiece &str)
|
||||
{
|
||||
|
@ -56,9 +56,16 @@ public:
|
||||
Scores const* GetExtraScores(FeatureFunction const* ff) const;
|
||||
void SetExtraScores(FeatureFunction const* ff,boost::shared_ptr<Scores> const& scores);
|
||||
|
||||
typedef std::map<size_t const, std::vector<std::vector<float> const*> > CoordCache_t;
|
||||
std::vector<std::vector<float> const*> const& GetCoordList(size_t const spaceID) const;
|
||||
void PushCoord(size_t const spaceID, std::vector<float> const* coord);
|
||||
|
||||
private:
|
||||
ScoreCache_t m_cached_scores;
|
||||
// The coordinate cache stores vectors of pointers to vectors. The coordinate
|
||||
// vectors referenced by the pointers should be owned by the phrase dictionary
|
||||
// implementation.
|
||||
SPTR<CoordCache_t> m_cached_coord;
|
||||
WPTR<ContextScope> m_scope;
|
||||
|
||||
private:
|
||||
|
@ -147,14 +147,6 @@ public:
|
||||
|
||||
void SetParameter(const std::string& key, const std::string& value);
|
||||
|
||||
void AddKnownSpace(const std::string& name) {
|
||||
m_knownSpaces.push_back(name);
|
||||
}
|
||||
|
||||
const std::vector<std::string> &GetKnownSpaces() const {
|
||||
return m_knownSpaces;
|
||||
}
|
||||
|
||||
// LEGACY
|
||||
//! find list of translations that can translates a portion of src. Used by confusion network decoding
|
||||
virtual
|
||||
@ -179,9 +171,6 @@ protected:
|
||||
// cache
|
||||
size_t m_maxCacheSize; // 0 = no caching
|
||||
|
||||
// Named coordinate spaces used by this model, in order (see "coord" XML tag)
|
||||
std::vector<std::string> m_knownSpaces;
|
||||
|
||||
#ifdef WITH_THREADS
|
||||
//reader-writer lock
|
||||
mutable boost::thread_specific_ptr<CacheColl> m_cache;
|
||||
|
@ -286,16 +286,17 @@ namespace Moses
|
||||
BOOST_FOREACH(std::string instance, coord_instances)
|
||||
{
|
||||
vector<string> toks = Moses::Tokenize(instance, ":");
|
||||
string name = toks[0];
|
||||
string space = toks[0];
|
||||
string file = toks[1];
|
||||
//TODO: register this space for this model
|
||||
// Register that this model uses the given space
|
||||
m_coord_spaces.push_back(StaticData::InstanceNonConst().MapCoordSpace(space));
|
||||
// Load sid coordinates from file
|
||||
m_sid_coord_list.push_back(vector<vector<float> >());
|
||||
vector<vector<float> >& sid_coord = m_sid_coord_list[m_sid_coord_list.size() - 1];
|
||||
//TODO: support extra data for btdyn, here? extra?
|
||||
sid_coord.reserve(btfix->T1->size());
|
||||
string line;
|
||||
cerr << "Loading coordinate lines for space \"" << name << "\" from " << file << endl;
|
||||
cerr << "Loading coordinate lines for space \"" << space << "\" from " << file << endl;
|
||||
iostreams::filtering_istream in;
|
||||
ugdiss::open_input_stream(file, in);
|
||||
while(getline(in, line))
|
||||
@ -648,19 +649,27 @@ namespace Moses
|
||||
}
|
||||
#endif
|
||||
|
||||
// Track stats for rescoring non-cacheable phrases as needed
|
||||
// Track coordinates if requested
|
||||
if (m_track_coord)
|
||||
{
|
||||
cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1) << endl;
|
||||
BOOST_FOREACH(uint32_t const sid, *pool.sids)
|
||||
{
|
||||
BOOST_FOREACH(vector<vector<float> > coord, m_sid_coord_list)
|
||||
for(size_t i = 0; i < m_coord_spaces.size(); ++i)
|
||||
{
|
||||
//TODO: store coord[sid] in tp
|
||||
cerr << " : " << Join(" ", coord[sid]);
|
||||
tp->PushCoord(m_coord_spaces[i], &m_sid_coord_list[i][sid]);
|
||||
}
|
||||
cerr << endl;
|
||||
}
|
||||
/*
|
||||
cerr << btfix->toString(pool.p1, 0) << " ::: " << btfix->toString(pool.p2, 1);
|
||||
BOOST_FOREACH(size_t id, m_coord_spaces)
|
||||
{
|
||||
cerr << " [" << id << "]";
|
||||
vector<vector<float> const*> const& coordList = tp->GetCoordList(id);
|
||||
BOOST_FOREACH(vector<float> const* coord, coordList)
|
||||
cerr << " : " << Join(" ", *coord);
|
||||
}
|
||||
cerr << endl;
|
||||
*/
|
||||
}
|
||||
|
||||
return tp;
|
||||
|
@ -119,8 +119,10 @@ namespace Moses
|
||||
std::vector<SPTR<pscorer > > m_active_ff_common;
|
||||
// activated feature functions (dyn)
|
||||
|
||||
bool m_track_coord = false; // track coordinates? Effectively: track sids when sampling bitext?
|
||||
bool m_track_coord = false; // track coordinates? Track sids when sampling
|
||||
// from bitext, append coords to target phrases
|
||||
std::vector<std::vector<std::vector<float> > > m_sid_coord_list;
|
||||
std::vector<size_t> m_coord_spaces;
|
||||
|
||||
void
|
||||
parse_factor_spec(std::vector<FactorType>& flist, std::string const key);
|
||||
|
@ -405,33 +405,21 @@ ProcessAndStripXMLTags(AllOptions const& opts, string &line,
|
||||
// Coord: coordinates of the input sentence in a user-defined space
|
||||
// <coord space="NAME" coord="X Y Z ..." />
|
||||
// where NAME is the name of the space and X Y Z ... are floats. See
|
||||
// PScoreDist in PhraseDictionaryBitextSampling (Mmsapt) for an example
|
||||
// of using this information for feature scoring.
|
||||
// TODO for an example of using this information for feature scoring.
|
||||
else if (tagName == "coord") {
|
||||
// Parse tag
|
||||
string space = ParseXmlTagAttribute(tagContent, "space");
|
||||
vector<string> toks = Tokenize(ParseXmlTagAttribute(tagContent, "coord"));
|
||||
boost::shared_ptr<vector<float> > coord(new vector<float>);
|
||||
Scan<float>(*coord, toks);
|
||||
// Init if needed
|
||||
if (!input.m_pd2InputCoord) {
|
||||
input.m_pd2InputCoord.reset(new std::map<PhraseDictionary const*, std::vector<boost::shared_ptr<std::vector<float> > > >);
|
||||
}
|
||||
// Scan phrase dictionaries to see which (if any) use this space
|
||||
BOOST_FOREACH(PhraseDictionary const* pd, PhraseDictionary::GetColl()) {
|
||||
const vector<string>& pdKnownSpaces = pd->GetKnownSpaces();
|
||||
for (size_t i = 0; i < pdKnownSpaces.size(); ++i) {
|
||||
// Match
|
||||
if (pdKnownSpaces[i] == space) {
|
||||
// Make sure a slot to store the coordinates exists
|
||||
std::vector<boost::shared_ptr<std::vector<float> > >& inputCoord = (*input.m_pd2InputCoord)[pd];
|
||||
if (inputCoord.size() < i + 1) {
|
||||
inputCoord.resize(i + 1);
|
||||
}
|
||||
// Store
|
||||
inputCoord[i] = coord;
|
||||
}
|
||||
vector<string> tok = Tokenize(ParseXmlTagAttribute(tagContent, "coord"));
|
||||
size_t id = StaticData::Instance().GetCoordSpace(space);
|
||||
if (!id) {
|
||||
TRACE_ERR("ERROR: no models use space " << space << ", will be ignored" << endl);
|
||||
} else {
|
||||
// Init if needed
|
||||
if (!input.m_coordMap) {
|
||||
input.m_coordMap.reset(new std::map<size_t const, std::vector<float> >);
|
||||
}
|
||||
vector<float>& coord = (*input.m_coordMap)[id];
|
||||
Scan<float>(coord, tok);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user