2012-05-25 20:29:47 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - statistical machine translation system
|
|
|
|
Copyright (C) 2006-2012 University of Edinburgh
|
2015-04-30 08:05:11 +03:00
|
|
|
|
2012-05-25 20:29:47 +04:00
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
2015-04-30 08:05:11 +03:00
|
|
|
|
2012-05-25 20:29:47 +04:00
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
2015-04-30 08:05:11 +03:00
|
|
|
|
2012-05-25 20:29:47 +04:00
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
|
|
|
#include "rule_collection.h"
|
|
|
|
|
2015-06-04 16:36:39 +03:00
|
|
|
#include "syntax-common/pcfg.h"
|
2012-05-25 20:29:47 +04:00
|
|
|
|
|
|
|
#include <cmath>
|
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
namespace MosesTraining
|
|
|
|
{
|
|
|
|
namespace Syntax
|
|
|
|
{
|
|
|
|
namespace PCFG
|
|
|
|
{
|
2012-05-25 20:29:47 +04:00
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
void RuleCollection::Add(std::size_t lhs, const std::vector<std::size_t> &rhs)
|
|
|
|
{
|
2012-05-25 20:29:47 +04:00
|
|
|
++collection_[lhs][rhs];
|
|
|
|
}
|
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
void RuleCollection::CreatePcfg(Pcfg &pcfg)
|
|
|
|
{
|
2012-05-26 16:13:23 +04:00
|
|
|
std::vector<std::size_t> key;
|
2012-05-25 20:29:47 +04:00
|
|
|
for (const_iterator p = begin(); p != end(); ++p) {
|
2012-05-26 16:13:23 +04:00
|
|
|
std::size_t lhs = p->first;
|
2012-05-25 20:29:47 +04:00
|
|
|
const RhsCountMap &rhs_counts = p->second;
|
2012-05-26 16:13:23 +04:00
|
|
|
std::size_t total = 0;
|
2012-05-25 20:29:47 +04:00
|
|
|
for (RhsCountMap::const_iterator q = rhs_counts.begin();
|
|
|
|
q != rhs_counts.end(); ++q) {
|
|
|
|
total += q->second;
|
|
|
|
}
|
|
|
|
for (RhsCountMap::const_iterator q = rhs_counts.begin();
|
|
|
|
q != rhs_counts.end(); ++q) {
|
2012-05-26 16:13:23 +04:00
|
|
|
const std::vector<std::size_t> &rhs = q->first;
|
|
|
|
std::size_t count = q->second;
|
2012-05-25 20:29:47 +04:00
|
|
|
double score = std::log(static_cast<double>(count) /
|
|
|
|
static_cast<double>(total));
|
|
|
|
key.clear();
|
|
|
|
key.push_back(lhs);
|
|
|
|
key.insert(key.end(), rhs.begin(), rhs.end());
|
|
|
|
pcfg.Add(key, score);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
} // namespace PCFG
|
2014-12-07 17:27:51 +03:00
|
|
|
} // namespace Syntax
|
|
|
|
} // namespace MosesTraining
|