2008-06-11 14:52:57 +04:00
|
|
|
/***********************************************************************
|
|
|
|
Moses - factored phrase-based language decoder
|
|
|
|
Copyright (C) 2006 University of Edinburgh
|
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or
|
|
|
|
modify it under the terms of the GNU Lesser General Public
|
|
|
|
License as published by the Free Software Foundation; either
|
|
|
|
version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
|
|
|
This library is distributed in the hope that it will be useful,
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
Lesser General Public License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
|
|
License along with this library; if not, write to the Free Software
|
|
|
|
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
***********************************************************************/
|
|
|
|
|
2015-03-10 00:44:54 +03:00
|
|
|
#pragma once
|
2008-06-11 14:52:57 +04:00
|
|
|
|
|
|
|
#include <list>
|
|
|
|
#include <limits>
|
|
|
|
#include <vector>
|
2008-10-17 01:14:38 +04:00
|
|
|
#include <string>
|
2015-03-25 03:55:39 +03:00
|
|
|
#include <stdint.h>
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2015-03-26 19:37:04 +03:00
|
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
#include <boost/weak_ptr.hpp>
|
|
|
|
|
2012-06-29 02:29:46 +04:00
|
|
|
//! all the typedefs and enums goes here
|
|
|
|
|
2009-10-16 16:22:28 +04:00
|
|
|
|
2008-10-09 03:51:26 +04:00
|
|
|
namespace Moses
|
|
|
|
{
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
#define PROJECT_NAME "moses"
|
|
|
|
|
|
|
|
#ifndef BOS_
|
|
|
|
#define BOS_ "<s>" //Beginning of sentence symbol
|
|
|
|
#endif
|
2008-11-04 21:03:03 +03:00
|
|
|
#ifndef EOS_
|
2008-06-11 14:52:57 +04:00
|
|
|
#define EOS_ "</s>" //End of sentence symbol
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define UNKNOWN_FACTOR "UNK"
|
|
|
|
#define EPSILON "*EPS*"
|
|
|
|
|
|
|
|
#define NOT_FOUND std::numeric_limits<size_t>::max()
|
|
|
|
#define MAX_NGRAM_SIZE 20
|
|
|
|
|
|
|
|
const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
|
|
|
|
const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
|
|
|
|
const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
|
2009-01-01 21:16:54 +03:00
|
|
|
const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
|
2010-04-08 21:16:10 +04:00
|
|
|
const size_t DEFAULT_MAX_TRANS_OPT_SIZE = 5000;
|
2008-06-11 14:52:57 +04:00
|
|
|
const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
|
2014-08-31 16:27:17 +04:00
|
|
|
//#ifdef PT_UG
|
2015-01-14 14:07:42 +03:00
|
|
|
// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail.
|
2014-08-31 16:27:17 +04:00
|
|
|
// const size_t DEFAULT_MAX_PHRASE_LENGTH = 100000;
|
|
|
|
//#else
|
2008-06-11 14:52:57 +04:00
|
|
|
const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
|
2014-08-31 16:27:17 +04:00
|
|
|
//#endif
|
2015-02-24 15:35:00 +03:00
|
|
|
const size_t DEFAULT_MAX_CHART_SPAN = 20;
|
2008-06-11 14:52:57 +04:00
|
|
|
const size_t ARRAY_SIZE_INCR = 10; //amount by which a phrase gets resized when necessary
|
|
|
|
const float LOWEST_SCORE = -100.0f;
|
|
|
|
const float DEFAULT_BEAM_WIDTH = 0.00001f;
|
2008-12-13 15:08:55 +03:00
|
|
|
const float DEFAULT_EARLY_DISCARDING_THRESHOLD = 0.0f;
|
|
|
|
const float DEFAULT_TRANSLATION_OPTION_THRESHOLD = 0.0f;
|
2008-06-11 14:52:57 +04:00
|
|
|
const size_t DEFAULT_VERBOSE_LEVEL = 1;
|
|
|
|
|
2014-09-30 15:25:36 +04:00
|
|
|
// output floats with five significant digits
|
|
|
|
static const size_t PRECISION = 3;
|
|
|
|
|
2015-01-06 12:55:11 +03:00
|
|
|
// tolerance for equality in floating point comparisons
|
|
|
|
const float FLOAT_EPSILON = 0.0001;
|
|
|
|
|
2008-11-04 21:03:03 +03:00
|
|
|
// enums.
|
2008-06-11 14:52:57 +04:00
|
|
|
// must be 0, 1, 2, ..., unless otherwise stated
|
|
|
|
|
|
|
|
// can only be 2 at the moment
|
|
|
|
const int NUM_LANGUAGES = 2;
|
|
|
|
|
2012-10-19 13:16:45 +04:00
|
|
|
// Looking for MAX_NUM_FACTORS? It's defined by the build system: bjam --max-factors=4
|
2008-06-11 14:52:57 +04:00
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
enum FactorDirection {
|
|
|
|
Input, //! Source factors
|
|
|
|
Output //! Target factors
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
enum DecodeType {
|
2015-05-03 09:03:38 +03:00
|
|
|
Translate,
|
|
|
|
Generate
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
namespace LexReorderType
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
enum LexReorderType { // explain values
|
2015-05-03 09:03:38 +03:00
|
|
|
Backward,
|
|
|
|
Forward,
|
|
|
|
Bidirectional,
|
|
|
|
Fe,
|
|
|
|
F
|
2011-02-24 16:14:42 +03:00
|
|
|
};
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
namespace DistortionOrientationType
|
|
|
|
{
|
2011-02-24 16:14:42 +03:00
|
|
|
enum DistortionOrientationOptions {
|
|
|
|
Monotone, //distinguish only between monotone and non-monotone as possible orientations
|
|
|
|
Msd //further separate non-monotone into swapped and discontinuous
|
|
|
|
};
|
2008-06-11 14:52:57 +04:00
|
|
|
}
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
enum InputTypeEnum {
|
2015-05-03 09:03:38 +03:00
|
|
|
SentenceInput = 0,
|
|
|
|
ConfusionNetworkInput = 1,
|
|
|
|
WordLatticeInput = 2,
|
|
|
|
TreeInputType = 3,
|
|
|
|
//,WordLatticeInput2 = 4,
|
|
|
|
TabbedSentenceInput = 5,
|
|
|
|
ForestInputType = 6
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
enum XmlInputType {
|
2013-08-15 14:46:45 +04:00
|
|
|
XmlPassThrough = 0,
|
|
|
|
XmlIgnore = 1,
|
|
|
|
XmlExclusive = 2,
|
|
|
|
XmlInclusive = 3,
|
|
|
|
XmlConstraint = 4
|
2008-06-11 14:52:57 +04:00
|
|
|
};
|
|
|
|
|
2011-02-24 16:14:42 +03:00
|
|
|
enum DictionaryFind {
|
2015-05-03 09:03:38 +03:00
|
|
|
Best = 0,
|
|
|
|
All = 1
|
2012-01-26 15:38:40 +04:00
|
|
|
};
|
|
|
|
|
2015-02-12 16:03:26 +03:00
|
|
|
// Note: StaticData uses SearchAlgorithm to determine whether the translation
|
|
|
|
// model is phrase-based or syntax-based. If you add a syntax-based search
|
|
|
|
// algorithm here then you should also update StaticData::IsSyntax().
|
2011-02-24 16:14:42 +03:00
|
|
|
enum SearchAlgorithm {
|
2015-05-03 09:03:38 +03:00
|
|
|
Normal = 0,
|
|
|
|
CubePruning = 1,
|
2015-03-26 19:37:04 +03:00
|
|
|
//,CubeGrowing = 2
|
2015-05-03 09:03:38 +03:00
|
|
|
CYKPlus = 3,
|
|
|
|
NormalBatch = 4,
|
|
|
|
ChartIncremental = 5,
|
|
|
|
SyntaxS2T = 6,
|
|
|
|
SyntaxT2S = 7,
|
|
|
|
SyntaxT2S_SCFG = 8,
|
|
|
|
SyntaxF2S = 9,
|
|
|
|
DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
|
2011-02-24 16:14:42 +03:00
|
|
|
};
|
|
|
|
|
|
|
|
enum SourceLabelOverlap {
|
2015-05-03 09:03:38 +03:00
|
|
|
SourceLabelOverlapAdd = 0,
|
|
|
|
SourceLabelOverlapReplace = 1,
|
|
|
|
SourceLabelOverlapDiscard = 2
|
2010-04-08 21:16:10 +04:00
|
|
|
};
|
2011-02-24 16:14:42 +03:00
|
|
|
|
2011-08-26 06:37:52 +04:00
|
|
|
enum WordAlignmentSort {
|
2015-05-03 09:03:38 +03:00
|
|
|
NoSort = 0,
|
|
|
|
TargetOrder = 1
|
2011-08-26 06:37:52 +04:00
|
|
|
};
|
|
|
|
|
2013-05-29 21:16:15 +04:00
|
|
|
enum FormatType {
|
2015-05-03 09:03:38 +03:00
|
|
|
MosesFormat,
|
|
|
|
HieroFormat
|
2012-07-23 15:26:15 +04:00
|
|
|
};
|
|
|
|
|
2014-11-04 16:13:56 +03:00
|
|
|
enum S2TParsingAlgorithm {
|
|
|
|
RecursiveCYKPlus,
|
|
|
|
Scope3
|
|
|
|
};
|
|
|
|
|
2008-06-11 14:52:57 +04:00
|
|
|
// typedef
|
|
|
|
typedef size_t FactorType;
|
|
|
|
|
2008-09-12 22:09:06 +04:00
|
|
|
typedef std::vector<float> Scores;
|
|
|
|
typedef std::vector<std::string> WordAlignments;
|
|
|
|
|
2010-01-28 15:12:57 +03:00
|
|
|
typedef std::vector<FactorType> FactorList;
|
|
|
|
|
2008-09-12 22:09:06 +04:00
|
|
|
typedef std::pair<std::vector<std::string const*>,WordAlignments > StringWordAlignmentCand;
|
2008-10-09 03:51:26 +04:00
|
|
|
|
2015-03-26 19:37:04 +03:00
|
|
|
class TranslationTask;
|
|
|
|
typedef boost::shared_ptr<TranslationTask> ttasksptr;
|
|
|
|
typedef boost::weak_ptr<TranslationTask> ttaskwptr;
|
2008-10-09 03:51:26 +04:00
|
|
|
}
|
2015-03-10 00:44:54 +03:00
|
|
|
|