Merge branch 'master' of github.com:moses-smt/mosesdecoder

This commit is contained in:
Hieu Hoang 2015-05-31 21:28:10 +04:00
commit 559cc4dac0
208 changed files with 917 additions and 243 deletions

View File

@ -28,7 +28,7 @@
#include "RuleExtractionOptions.h"
#include "SentenceAlignment.h"
#include "SyntaxTree.h"
#include "SyntaxNodeCollection.h"
namespace MosesTraining
{
@ -36,8 +36,8 @@ namespace MosesTraining
class SentenceAlignmentWithSyntax : public SentenceAlignment
{
public:
SyntaxTree targetTree;
SyntaxTree sourceTree;
SyntaxNodeCollection targetTree;
SyntaxNodeCollection sourceTree;
std::set<std::string> & m_targetLabelCollection;
std::set<std::string> & m_sourceLabelCollection;
std::map<std::string, int> & m_targetTopLabelCollection;

View File

@ -0,0 +1,75 @@
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <map>
#include <sstream>
#include <string>
#include <vector>
namespace MosesTraining
{
class SyntaxNode
{
protected:
int m_start, m_end;
std::string m_label;
std::vector< SyntaxNode* > m_children;
SyntaxNode* m_parent;
float m_pcfgScore;
public:
SyntaxNode( int startPos, int endPos, std::string label )
:m_start(startPos)
,m_end(endPos)
,m_label(label)
,m_parent(0)
,m_pcfgScore(0.0f) {
}
int GetStart() const {
return m_start;
}
int GetEnd() const {
return m_end;
}
std::string GetLabel() const {
return m_label;
}
float GetPcfgScore() const {
return m_pcfgScore;
}
void SetPcfgScore(float score) {
m_pcfgScore = score;
}
SyntaxNode *GetParent() {
return m_parent;
}
void SetParent(SyntaxNode *parent) {
m_parent = parent;
}
void AddChild(SyntaxNode* child) {
m_children.push_back(child);
}
const std::vector< SyntaxNode* > &GetChildren() const {
return m_children;
}
};
} // namespace MosesTraining

View File

@ -1,6 +1,3 @@
// $Id: SyntaxTree.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
@ -21,7 +18,7 @@
***********************************************************************/
#include "SyntaxTree.h"
#include "SyntaxNodeCollection.h"
#include <cassert>
#include <iostream>
@ -29,12 +26,12 @@
namespace MosesTraining
{
SyntaxTree::~SyntaxTree()
SyntaxNodeCollection::~SyntaxNodeCollection()
{
Clear();
}
void SyntaxTree::Clear()
void SyntaxNodeCollection::Clear()
{
m_top = 0;
// loop through all m_nodes, delete them
@ -45,7 +42,8 @@ void SyntaxTree::Clear()
m_index.clear();
}
SyntaxNode *SyntaxTree::AddNode( int startPos, int endPos, std::string label )
SyntaxNode *SyntaxNodeCollection::AddNode(int startPos, int endPos,
const std::string &label)
{
SyntaxNode* newNode = new SyntaxNode( startPos, endPos, label );
m_nodes.push_back( newNode );
@ -54,7 +52,7 @@ SyntaxNode *SyntaxTree::AddNode( int startPos, int endPos, std::string label )
return newNode;
}
ParentNodes SyntaxTree::Parse()
ParentNodes SyntaxNodeCollection::Parse()
{
ParentNodes parents;
@ -94,12 +92,12 @@ ParentNodes SyntaxTree::Parse()
return parents;
}
bool SyntaxTree::HasNode( int startPos, int endPos ) const
bool SyntaxNodeCollection::HasNode( int startPos, int endPos ) const
{
return GetNodes( startPos, endPos).size() > 0;
}
const std::vector< SyntaxNode* >& SyntaxTree::GetNodes( int startPos, int endPos ) const
const std::vector< SyntaxNode* >& SyntaxNodeCollection::GetNodes( int startPos, int endPos ) const
{
SyntaxTreeIndexIterator startIndex = m_index.find( startPos );
if (startIndex == m_index.end() )
@ -112,15 +110,7 @@ const std::vector< SyntaxNode* >& SyntaxTree::GetNodes( int startPos, int endPos
return endIndex->second;
}
// for printing out tree
std::string SyntaxTree::ToString() const
{
std::stringstream out;
out << *this;
return out.str();
}
void SyntaxTree::ConnectNodes()
void SyntaxNodeCollection::ConnectNodes()
{
typedef SyntaxTreeIndex2::const_reverse_iterator InnerIterator;
@ -162,27 +152,4 @@ void SyntaxTree::ConnectNodes()
}
}
std::ostream& operator<<(std::ostream& os, const SyntaxTree& t)
{
size_t size = t.m_index.size();
for(size_t length=1; length<=size; length++) {
for(size_t space=0; space<length; space++) {
os << " ";
}
for(size_t start=0; start<=size-length; start++) {
if (t.HasNode( start, start+(length-1) )) {
std::string label = t.GetNodes( start, start+(length-1) )[0]->GetLabel() + "#######";
os << label.substr(0,7) << " ";
} else {
os << "------- ";
}
}
os << std::endl;
}
return os;
}
}
} // namespace MosesTraining

View File

@ -1,6 +1,3 @@
// $Id: SyntaxTree.h 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2009 University of Edinburgh
@ -20,66 +17,22 @@
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <string>
#include <vector>
#include <map>
#include <sstream>
#include <string>
#include <vector>
#include "SyntaxNode.h"
namespace MosesTraining
{
class SyntaxNode
{
protected:
int m_start, m_end;
std::string m_label;
std::vector< SyntaxNode* > m_children;
SyntaxNode* m_parent;
float m_pcfgScore;
public:
SyntaxNode( int startPos, int endPos, std::string label )
:m_start(startPos)
,m_end(endPos)
,m_label(label)
,m_parent(0)
,m_pcfgScore(0.0f) {
}
int GetStart() const {
return m_start;
}
int GetEnd() const {
return m_end;
}
std::string GetLabel() const {
return m_label;
}
float GetPcfgScore() const {
return m_pcfgScore;
}
void SetPcfgScore(float score) {
m_pcfgScore = score;
}
SyntaxNode *GetParent() {
return m_parent;
}
void SetParent(SyntaxNode *parent) {
m_parent = parent;
}
void AddChild(SyntaxNode* child) {
m_children.push_back(child);
}
const std::vector< SyntaxNode* > &GetChildren() const {
return m_children;
}
};
typedef std::vector< int > SplitPoints;
typedef std::vector< SplitPoints > ParentNodes;
class SyntaxTree
class SyntaxNodeCollection
{
protected:
std::vector< SyntaxNode* > m_nodes;
@ -93,16 +46,14 @@ protected:
int m_size;
std::vector< SyntaxNode* > m_emptyNode;
friend std::ostream& operator<<(std::ostream&, const SyntaxTree&);
public:
SyntaxTree()
SyntaxNodeCollection()
: m_top(0) // m_top doesn't get set unless ConnectNodes is called.
, m_size(0) {}
~SyntaxTree();
~SyntaxNodeCollection();
SyntaxNode *AddNode( int startPos, int endPos, std::string label );
SyntaxNode *AddNode( int startPos, int endPos, const std::string &label );
SyntaxNode *GetTop() {
return m_top;
@ -119,10 +70,6 @@ public:
}
void ConnectNodes();
void Clear();
std::string ToString() const;
};
std::ostream& operator<<(std::ostream&, const SyntaxTree&);
}
} // namespace MosesTraining

View File

@ -1,6 +1,3 @@
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -27,7 +24,8 @@
#include <iostream>
#include <cstdlib>
#include <sstream>
#include "SyntaxTree.h"
#include "SyntaxNodeCollection.h"
#include "XmlException.h"
using namespace std;
@ -228,7 +226,10 @@ vector<string> TokenizeXml(const string& str)
parse because we don't have the completed source parsed until after this function
removes all the markup from it (CreateFromString in Sentence::Read).
*/
bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &labelCollection, map< string, int > &topLabelCollection, bool unescapeSpecialChars )
bool ProcessAndStripXMLTags(string &line, SyntaxNodeCollection &nodeCollection,
set< string > &labelCollection,
map< string, int > &topLabelCollection,
bool unescapeSpecialChars )
{
//parse XML markup in translation line
@ -374,7 +375,7 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
cerr << "XML TAG LABEL IS: '" << label << "'" << endl;
cerr << "XML SPAN IS: " << startPos << "-" << (endPos-1) << endl;
}
SyntaxNode *node = tree.AddNode( startPos, endPos-1, label );
SyntaxNode *node = nodeCollection.AddNode( startPos, endPos-1, label );
node->SetPcfgScore(pcfgScore);
}
}
@ -386,7 +387,7 @@ bool ProcessAndStripXMLTags(string &line, SyntaxTree &tree, set< string > &label
}
// collect top labels
const vector< SyntaxNode* >& topNodes = tree.GetNodes( 0, wordPos-1 );
const vector< SyntaxNode* >& topNodes = nodeCollection.GetNodes( 0, wordPos-1 );
for( vector< SyntaxNode* >::const_iterator node = topNodes.begin(); node != topNodes.end(); node++ ) {
SyntaxNode *n = *node;
const string &label = n->GetLabel();

View File

@ -1,6 +1,3 @@
// $Id: XmlOption.cpp 1960 2008-12-15 12:52:38Z phkoehn $
// vim:tabstop=2
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
@ -21,11 +18,13 @@
***********************************************************************/
#pragma once
#include <string>
#include <vector>
#include <set>
#include <map>
#include "SyntaxTree.h"
#include "SyntaxNodeCollection.h"
namespace MosesTraining
{
@ -35,9 +34,8 @@ std::string Trim(const std::string& str, const std::string dropChars = " \t\n\r"
std::string TrimXml(const std::string& str);
bool isXmlTag(const std::string& tag);
std::vector<std::string> TokenizeXml(const std::string& str);
bool ProcessAndStripXMLTags(std::string &line, SyntaxTree &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection, bool unescape = true);
bool ProcessAndStripXMLTags(std::string &line, SyntaxNodeCollection &tree, std::set< std::string > &labelCollection, std::map< std::string, int > &topLabelCollection, bool unescape = true);
std::string unescape(const std::string &str);
} // namespace
} // namespace MosesTraining

View File

@ -33,7 +33,8 @@
#include "Span.h"
#include "StsgRule.h"
#include "StsgRuleWriter.h"
#include "SyntaxTree.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
@ -172,7 +173,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Parse source tree and construct a SyntaxTree object.
MosesTraining::SyntaxTree sourceSyntaxTree;
MosesTraining::SyntaxNodeCollection sourceSyntaxTree;
MosesTraining::SyntaxNode *sourceSyntaxTreeRoot=NULL;
if (options.sourceLabels) {
@ -196,7 +197,7 @@ int ExtractGHKM::Main(int argc, char *argv[])
// Read source tokens.
std::vector<std::string> sourceTokens(ReadTokens(sourceLine));
// Construct a source ParseTree object from the SyntaxTree object.
// Construct a source ParseTree object from the SyntaxNodeCollection object.
std::auto_ptr<ParseTree> sourceParseTree;
if (options.sourceLabels) {

View File

@ -19,11 +19,12 @@
#include "ScfgRule.h"
#include <algorithm>
#include "Node.h"
#include "Subgraph.h"
#include "SyntaxTree.h"
#include <algorithm>
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
namespace Moses
{
@ -31,7 +32,7 @@ namespace GHKM
{
ScfgRule::ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxTree *sourceSyntaxTree)
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree)
: m_graphFragment(fragment)
, m_sourceLHS("X", NonTerminal)
, m_targetLHS(fragment.GetRoot()->GetLabel(), NonTerminal)
@ -133,9 +134,9 @@ ScfgRule::ScfgRule(const Subgraph &fragment,
}
}
void ScfgRule::PushSourceLabel(const MosesTraining::SyntaxTree *sourceSyntaxTree,
const Node *node,
const std::string &nonMatchingLabel)
void ScfgRule::PushSourceLabel(
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
const Node *node, const std::string &nonMatchingLabel)
{
ContiguousSpan span = Closure(node->GetSpan());
if (sourceSyntaxTree->HasNode(span.first,span.second)) { // does a source constituent match the span?

View File

@ -19,16 +19,16 @@
#pragma once
#include "Alignment.h"
#include "Rule.h"
#include "SyntaxTree.h"
#include <string>
#include <vector>
#include <list>
#include <memory>
#include <iostream>
#include "Alignment.h"
#include "Rule.h"
#include "SyntaxNodeCollection.h"
namespace Moses
{
namespace GHKM
@ -41,7 +41,7 @@ class ScfgRule : public Rule
{
public:
ScfgRule(const Subgraph &fragment,
const MosesTraining::SyntaxTree *sourceSyntaxTree = 0);
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree = 0);
const Subgraph &GetGraphFragment() const {
return m_graphFragment;
@ -78,9 +78,9 @@ public:
}
private:
void PushSourceLabel(const MosesTraining::SyntaxTree *sourceSyntaxTree,
const Node *node,
const std::string &nonMatchingLabel);
void PushSourceLabel(
const MosesTraining::SyntaxNodeCollection *sourceSyntaxTree,
const Node *node, const std::string &nonMatchingLabel);
const Subgraph& m_graphFragment;
Symbol m_sourceLHS;
@ -95,4 +95,3 @@ private:
} // namespace GHKM
} // namespace Moses

View File

@ -23,14 +23,15 @@
#include "Exception.h"
#include "SyntaxTree.h"
#include <map>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
namespace Moses
{
namespace GHKM
@ -58,7 +59,7 @@ private:
std::set<std::string> &m_labelSet;
std::map<std::string, int> &m_topLabelSet;
std::string m_line;
MosesTraining::SyntaxTree m_tree;
MosesTraining::SyntaxNodeCollection m_tree;
std::vector<std::string> m_words;
};

View File

@ -41,7 +41,7 @@
#include "HoleCollection.h"
#include "RuleExist.h"
#include "SentenceAlignmentWithSyntax.h"
#include "SyntaxTree.h"
#include "SyntaxNode.h"
#include "tables-core.h"
#include "XmlTree.h"
#include "InputFileStream.h"

View File

@ -28,7 +28,8 @@
#include <vector>
#include "pcfg_tree.h"
#include "SyntaxTree.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
namespace MosesTraining {
namespace Syntax {
@ -47,7 +48,7 @@ class XmlTreeParser {
std::set<std::string> m_labelSet;
std::map<std::string, int> m_topLabelSet;
std::string m_line;
MosesTraining::SyntaxTree m_tree;
MosesTraining::SyntaxNodeCollection m_tree;
std::vector<std::string> m_words;
};

View File

@ -43,7 +43,7 @@ int main(int argc, char* argv[])
// process into syntax tree representation
set< string > labelCollection; // set of labels, not used
map< string, int > topLabelCollection; // count of top labels, not used
SyntaxTree tree;
SyntaxNodeCollection tree;
ProcessAndStripXMLTags( inBufferString, tree, labelCollection, topLabelCollection, false );
const vector< string > inWords = util::tokenize( inBufferString );
@ -105,7 +105,7 @@ void init(int argc, char* argv[])
}
}
void store( SyntaxTree &tree, const vector< string > &words )
void store( SyntaxNodeCollection &tree, const vector< string > &words )
{
// output words
for( size_t i=0; i<words.size(); i++ ) {
@ -126,7 +126,7 @@ void store( SyntaxTree &tree, const vector< string > &words )
cout << endl;
}
void LeftBinarize( SyntaxTree &tree, ParentNodes &parents )
void LeftBinarize( SyntaxNodeCollection &tree, ParentNodes &parents )
{
for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) {
const SplitPoints &point = *p;
@ -143,7 +143,7 @@ void LeftBinarize( SyntaxTree &tree, ParentNodes &parents )
}
}
void RightBinarize( SyntaxTree &tree, ParentNodes &parents )
void RightBinarize( SyntaxNodeCollection &tree, ParentNodes &parents )
{
for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) {
const SplitPoints &point = *p;
@ -161,11 +161,11 @@ void RightBinarize( SyntaxTree &tree, ParentNodes &parents )
}
}
void SAMT( SyntaxTree &tree, ParentNodes &parents )
void SAMT( SyntaxNodeCollection &tree, ParentNodes &parents )
{
int numWords = tree.GetNumWords();
SyntaxTree newTree; // to store new nodes
SyntaxNodeCollection newTree; // to store new nodes
// look through parents to combine children
for(ParentNodes::const_iterator p = parents.begin(); p != parents.end(); p++) {

View File

@ -28,7 +28,7 @@
#include <algorithm>
#include <cstring>
#include "SyntaxTree.h"
#include "SyntaxNodeCollection.h"
#include "XmlTree.h"
#define LINE_MAX_LENGTH 1000000
@ -39,8 +39,8 @@ char SAMTLevel = 0;
// functions
void init(int argc, char* argv[]);
void store( MosesTraining::SyntaxTree &tree, const std::vector<std::string> &words );
void LeftBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
void RightBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
void SAMT( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
void store( MosesTraining::SyntaxNodeCollection &tree, const std::vector<std::string> &words );
void LeftBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
void RightBinarize( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );
void SAMT( MosesTraining::SyntaxNodeCollection &tree, MosesTraining::ParentNodes &parents );

View File

@ -13,17 +13,17 @@ namespace Syntax {
StringTree *XmlTreeParser::Parse(const std::string &line) {
line_ = line;
tree_.Clear();
node_collection_.Clear();
try {
if (!ProcessAndStripXMLTags(line_, tree_, label_set_, top_label_set_,
false)) {
if (!ProcessAndStripXMLTags(line_, node_collection_, label_set_,
top_label_set_, false)) {
throw Exception("");
}
} catch (const XmlException &e) {
throw Exception(e.getMsg());
}
tree_.ConnectNodes();
SyntaxNode *root = tree_.GetTop();
node_collection_.ConnectNodes();
SyntaxNode *root = node_collection_.GetTop();
assert(root);
words_ = util::tokenize(line_);
return ConvertTree(*root, words_);

View File

@ -5,7 +5,8 @@
#include <string>
#include <vector>
#include "SyntaxTree.h"
#include "SyntaxNode.h"
#include "SyntaxNodeCollection.h"
#include "exception.h"
#include "string_tree.h"
@ -26,7 +27,7 @@ class XmlTreeParser {
std::set<std::string> label_set_;
std::map<std::string, int> top_label_set_;
std::string line_;
MosesTraining::SyntaxTree tree_;
MosesTraining::SyntaxNodeCollection node_collection_;
std::vector<std::string> words_;
};

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
#use strict;
use warnings;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
#input hindi word urdu word, delete all those entries that have number on any side
use warnings;
@ -314,4 +317,4 @@ sub charFreqFilter{
}
}
}
}
}

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use utf8;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use utf8;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use utf8;
###############################################

View File

@ -1,9 +1,13 @@
#!/usr/bin/env python
# Usage: extract-target-trees.py [FILE]
#
# Reads moses-chart's -T output from FILE or standard input and writes trees to
# standard output in Moses' XML tree format.
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
"""Usage: extract-target-trees.py [FILE]
Reads moses-chart's -T output from FILE or standard input and writes trees to
standard output in Moses' XML tree format.
"""
import re
import sys

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
# Reads a source and hypothesis file and counts equal tokens. Some of these

View File

@ -1,6 +1,9 @@
#!/usr/bin/env perl
# Display OOV rate of a test set against a training corpus or a phrase table.
# Ondrej Bojar
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use strict;
use warnings;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
#sentence-by-sentence: take in a system output, with any number of factors, and a reference translation, also maybe with factors, and show each sentence and its errors

View File

@ -3,6 +3,8 @@
# Author : Loic BARRAULT
# Script to convert MOSES searchgraph to DOT format
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
#show-phrases-used: display all source and target phrases for each sentence in a corpus, and give average phrase length used

View File

@ -1,5 +1,8 @@
#package Corpus: hold a bunch of sentences in any language, with translation factors and stats about individual sentences and the corpus as a whole
#Evan Herbst, 7 / 25 / 06
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
package Corpus;
BEGIN

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
#by Philipp Koehn, de-augmented by Evan Herbst

View File

@ -1,4 +1,7 @@
#!/usr/bin/perl -w
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
use strict;

View File

@ -2,6 +2,9 @@
# Collects and prints all n-grams that appear in the given corpus both
# tokenized as well as untokenized.
# Ondrej Bojar
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use strict;
use warnings;

View File

@ -1,4 +1,8 @@
#!/bin/bash
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# Hackish summarization of weight-scan.pl results, heavily relies on tools by
# Ondrej Bojar (bojar@ufal.mff.cuni.cz), some of which need Mercury; beware.

View File

@ -1,4 +1,8 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# runs Moses many times changing the values of one weight, all others fixed
# nbest lists are always produced to allow for comparison of real and
# 'projected' BLEU (BLEU estimated from n-best lists collected at a neighouring

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# Experiment Management System
# Documentation at http://www.statmt.org/moses/?n=FactoredTraining.EMS

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/bin/sh
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
if [ $# -lt 8 ]
then

View File

@ -1,4 +1,7 @@
#!/bin/sh
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
if [ $# -lt 6 ]
then

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id: consolidate-training-data.perl 928 2009-09-02 02:58:01Z philipp $

View File

@ -1,4 +1,7 @@
#!/usr/bin/env python2
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
"""Version of ConfigParser which accepts default values."""

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
#######################
# Revision history

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id: report-experiment-scores.perl 407 2008-11-10 14:43:31Z philipp $

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# Based on Preprocessor written by Philipp Koehn

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,5 +1,10 @@
<?php
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
# main page frame, triggers the loading of parts
function show_analysis() {
global $task,$user,$setup,$id,$set;

View File

@ -1,5 +1,9 @@
<?php
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
function diff_analysis() {
global $task,$user,$setup,$id,$id2,$set;
global $comment,$dir;

View File

@ -1,5 +1,10 @@
<?php
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
function diff() {
global $experiment;
$display = $_GET["run"];

View File

@ -1,3 +1,7 @@
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
var nodeIn = [];
var nodeOut = [];
var nodeChildren = [];

View File

@ -1,5 +1,10 @@
<?php
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
require("lib.php");
require("overview.php");
require("analysis.php");

View File

@ -1,5 +1,10 @@
<?php
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
function load_experiment_info() {
global $dir,$task,$user,$setup;
global $evalset;

View File

@ -1,5 +1,9 @@
<?php
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
function setup() {
$setup = file("setup");

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,3 +1,7 @@
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
var xmlns="http://www.w3.org/2000/svg";
var RECOMBINED = 0;
var FROM = 1;

View File

@ -1,4 +1,10 @@
<?php
/*
This file is part of moses. Its use is licensed under the GNU Lesser General
Public License version 2.1 or, at your option, any later version.
*/
function sgviz($sentence) {
global $setup,$dir,$id,$set;
?><html><head><title>Search Graph Visualization, Sentence <?php $sentence ?></title>

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
binmode( STDIN, ":utf8" );
binmode( STDOUT, ":utf8" );

View File

@ -1,6 +1,9 @@
#!/usr/bin/env python
# compute Bleu scores with confidence intervals via boostrap resampling
# written by Ulrich Germann
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
from argparse import ArgumentParser
import math

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
#extract-factors.pl: extract only the desired factors from a factored corpus

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# example
# ./extract-parallel.perl 8 ./coreutils-8.9/src/split "./coreutils-8.9/src/sort --batch-size=253" ./extract ./corpus.5.en ./corpus.5.ar ./align.ar-en.grow-diag-final-and ./extracted 7 --NoFileLimit orientation --GZOutput

View File

@ -4,6 +4,9 @@
# ' ' to delimit nodes (i.e. original lines).
# Some rudimentary sanity checks are done on the fly.
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -7,6 +7,9 @@
# final nodes.
# Note that the output format may not contain any spaces.
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,6 +1,9 @@
#!/usr/bin/env perl
# A very simple script that converts fsal back to fsa format (openfst lattices)
# Ondrej Bojar, bojar@ufal.mff.cuni.cz
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# example
# ~/giza-parallel.perl 10 split ~/workspace/sourceforge/trunk/scripts/training/train-model.perl ar en train align

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
#lopar2pos: extract POSs from LOPAR output

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
#######################

View File

@ -1,20 +1,25 @@
#!/usr/bin/env python
# Written by Michael Denkowski
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# This script parallelizes decoding with simulated post-editing via moses XML
# input (XML entities need to be escaped in tokenization). Memory mapped
# dynamic phrase tables (Ulrich Germann,
# www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc40) and language models
# (Kenneth Heafield,
# http://www.statmt.org/moses/?n=FactoredTraining.BuildingLanguageModel#ntoc19)
# facilitate memory efficient multi process decoding. Input is divided into
# batches, each of which is decoded sequentially. Each batch pre-loads the
# data from previous batches.
"""Parallelize decoding with simulated post-editing via moses XML input.
# To use in tuning, run mert-moses.pl with --sim-pe=SYMAL where SYMAL is the
# alignment from input to references. Specify the number of jobs with
# --decoder-flags="-threads N".
(XML entities need to be escaped in tokenization). Memory mapped
dynamic phrase tables (Ulrich Germann,
www.statmt.org/moses/?n=Moses.AdvancedFeatures#ntoc40) and language models
(Kenneth Heafield,
http://www.statmt.org/moses/?n=FactoredTraining.BuildingLanguageModel#ntoc19)
facilitate memory efficient multi process decoding. Input is divided into
batches, each of which is decoded sequentially. Each batch pre-loads the
data from previous batches.
To use in tuning, run mert-moses.pl with --sim-pe=SYMAL where SYMAL is the
alignment from input to references. Specify the number of jobs with
--decoder-flags="-threads N".
"""
import gzip
import itertools

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
use warnings;

View File

@ -6,6 +6,9 @@ package ph_numbers;
# and decoder input
#
# (c) 2013 TAUS
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;

View File

@ -1,4 +1,7 @@
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
# $Id$
use warnings;

Some files were not shown because too many files have changed in this diff Show More