2011-11-21 20:21:04 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - statistical machine translation system
|
|
|
|
Copyright (C) 2006-2011 University of Edinburgh
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2011-11-21 20:21:04 +04:00
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2011-11-21 20:21:04 +04:00
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
2013-05-29 21:16:15 +04:00
|
|
|
|
2011-11-21 20:21:04 +04:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <set>
|
|
|
|
#include <vector>
|
|
|
|
|
2015-06-01 18:40:35 +03:00
|
|
|
#include "Node.h"
|
|
|
|
|
|
|
|
namespace MosesTraining
|
2013-05-29 21:16:15 +04:00
|
|
|
{
|
2015-07-06 16:41:34 +03:00
|
|
|
namespace Syntax
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
namespace GHKM
|
|
|
|
{
|
2011-11-21 20:21:04 +04:00
|
|
|
|
|
|
|
class Node;
|
|
|
|
|
|
|
|
class Subgraph
|
|
|
|
{
|
2013-05-29 21:16:15 +04:00
|
|
|
public:
|
2011-11-21 20:21:04 +04:00
|
|
|
Subgraph(const Node *root)
|
2013-05-29 21:16:15 +04:00
|
|
|
: m_root(root)
|
|
|
|
, m_depth(0)
|
|
|
|
, m_size(root->GetType() == TREE ? 1 : 0)
|
|
|
|
, m_nodeCount(1)
|
|
|
|
, m_pcfgScore(0.0f) {}
|
2011-11-21 20:21:04 +04:00
|
|
|
|
|
|
|
Subgraph(const Node *root, const std::set<const Node *> &leaves)
|
2013-05-29 21:16:15 +04:00
|
|
|
: m_root(root)
|
|
|
|
, m_leaves(leaves)
|
|
|
|
, m_depth(-1)
|
|
|
|
, m_size(-1)
|
|
|
|
, m_nodeCount(-1)
|
|
|
|
, m_pcfgScore(0.0f) {
|
2011-11-21 20:21:04 +04:00
|
|
|
m_depth = CalcDepth(m_root);
|
|
|
|
m_size = CalcSize(m_root);
|
|
|
|
m_nodeCount = CountNodes(m_root);
|
2012-05-25 20:29:47 +04:00
|
|
|
m_pcfgScore = CalcPcfgScore();
|
2011-11-21 20:21:04 +04:00
|
|
|
}
|
|
|
|
|
2014-11-21 14:30:29 +03:00
|
|
|
Subgraph(const Subgraph &other, bool targetOnly=false)
|
2015-01-14 14:07:42 +03:00
|
|
|
: m_root(other.m_root)
|
|
|
|
, m_leaves(other.m_leaves)
|
|
|
|
, m_depth(other.m_depth)
|
|
|
|
, m_size(other.m_size)
|
|
|
|
, m_nodeCount(other.m_nodeCount)
|
|
|
|
, m_pcfgScore(other.m_pcfgScore) {
|
2014-11-21 14:30:29 +03:00
|
|
|
if (targetOnly && m_root->GetType() != SOURCE) {
|
|
|
|
// Replace any source-word sink nodes with their parents (except for
|
|
|
|
// the special case where the parent is a non-word tree node -- see
|
|
|
|
// below).
|
|
|
|
std::set<const Node *> targetLeaves;
|
|
|
|
for (std::set<const Node *>::const_iterator p = m_leaves.begin();
|
|
|
|
p != m_leaves.end(); ++p) {
|
|
|
|
const Node *leaf = *p;
|
|
|
|
if (leaf->GetType() != SOURCE) {
|
|
|
|
targetLeaves.insert(leaf);
|
|
|
|
} else {
|
|
|
|
const std::vector<Node*> &parents = leaf->GetParents();
|
|
|
|
for (std::vector<Node*>::const_iterator q = parents.begin();
|
|
|
|
q != parents.end(); ++q) {
|
|
|
|
const Node *parent = *q;
|
|
|
|
// Only add parents that are words, not tree nodes since those
|
|
|
|
// are never sink nodes. (A source word can have a tree node as
|
|
|
|
// its parent due to the heuristic for handling unaligned source
|
|
|
|
// words).
|
|
|
|
if (parent->GetType() == TARGET) {
|
|
|
|
targetLeaves.insert(*q);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
m_leaves.swap(targetLeaves);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
const Node *GetRoot() const {
|
|
|
|
return m_root;
|
|
|
|
}
|
|
|
|
const std::set<const Node *> &GetLeaves() const {
|
|
|
|
return m_leaves;
|
|
|
|
}
|
|
|
|
int GetDepth() const {
|
|
|
|
return m_depth;
|
|
|
|
}
|
|
|
|
int GetSize() const {
|
|
|
|
return m_size;
|
|
|
|
}
|
|
|
|
int GetNodeCount() const {
|
|
|
|
return m_nodeCount;
|
|
|
|
}
|
|
|
|
float GetPcfgScore() const {
|
|
|
|
return m_pcfgScore;
|
|
|
|
}
|
2011-11-21 20:21:04 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
bool IsTrivial() const {
|
|
|
|
return m_leaves.empty();
|
|
|
|
}
|
2011-11-21 20:21:04 +04:00
|
|
|
|
|
|
|
void GetTargetLeaves(std::vector<const Node *> &) const;
|
2013-09-10 17:41:26 +04:00
|
|
|
void PrintTree(std::ostream &out) const;
|
2015-03-05 00:40:56 +03:00
|
|
|
void PrintPartsOfSpeech(std::ostream &out) const;
|
2015-03-10 00:54:03 +03:00
|
|
|
void GetPartsOfSpeech(std::vector<std::string> &out) const;
|
2013-09-10 17:41:26 +04:00
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
private:
|
2011-11-21 20:21:04 +04:00
|
|
|
void GetTargetLeaves(const Node *, std::vector<const Node *> &) const;
|
|
|
|
int CalcDepth(const Node *) const;
|
|
|
|
int CalcSize(const Node *) const;
|
2012-05-25 20:29:47 +04:00
|
|
|
float CalcPcfgScore() const;
|
2011-11-21 20:21:04 +04:00
|
|
|
int CountNodes(const Node *) const;
|
2013-09-10 17:41:26 +04:00
|
|
|
void RecursivelyPrintTree(const Node *n, std::ostream &out) const;
|
2015-03-05 00:40:56 +03:00
|
|
|
void RecursivelyPrintPartsOfSpeech(const Node *n, std::ostream &out) const;
|
2015-03-10 00:54:03 +03:00
|
|
|
void RecursivelyGetPartsOfSpeech(const Node *n, std::vector<std::string> &out) const;
|
2011-11-21 20:21:04 +04:00
|
|
|
|
|
|
|
const Node *m_root;
|
|
|
|
std::set<const Node *> m_leaves;
|
|
|
|
int m_depth;
|
|
|
|
int m_size;
|
|
|
|
int m_nodeCount;
|
2012-05-25 20:29:47 +04:00
|
|
|
float m_pcfgScore;
|
2011-11-21 20:21:04 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
} // namespace GHKM
|
2015-07-06 16:41:34 +03:00
|
|
|
} // namespace Syntax
|
2015-06-01 18:40:35 +03:00
|
|
|
} // namespace MosesTraining
|