Merge branch 'master' of https://github.com/moses-smt/mosesdecoder into mmt-dev

Conflicts:
	moses/Syntax/F2S/Manager-inl.h
	moses/TranslationModel/UG/mmsapt.cpp
This commit is contained in:
Ulrich Germann 2015-04-26 02:12:16 +01:00
commit 0d72cdd72c
222 changed files with 2014 additions and 538 deletions

View File

@ -109,14 +109,17 @@ size_t lookup( string query )
return suffixArray.Count( queryString );
}
vector<string> tokenize( const char input[] )
// Duplicate of definition in util/tokenize.hh.
// TODO: Can we de-duplicate this? At the time of writing biconcor does not
// use util at all.
vector<string> tokenize(const char input[])
{
vector< string > token;
bool betweenWords = true;
int start=0;
int i=0;
for(; input[i] != '\0'; i++) {
bool isSpace = (input[i] == ' ' || input[i] == '\t');
int i;
for(i = 0; input[i] != '\0'; i++) {
const bool isSpace = (input[i] == ' ' || input[i] == '\t');
if (!isSpace && betweenWords) {
start = i;

View File

@ -45,8 +45,8 @@ struct LMClient {
exit(1);
}
bzero((char *)&server, sizeof(server));
bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
memset(&server, '\0', sizeof(server));
memcpy((char *)&server.sin_addr, hp->h_addr, hp->h_length);
server.sin_family = hp->h_addrtype;
server.sin_port = htons(port);

46
contrib/mada/qsub-madamira.perl Executable file
View File

@ -0,0 +1,46 @@
#!/usr/bin/env perl
use warnings;
use strict;
use File::Slurp;
use File::Basename;
use Cwd 'abs_path';
my $splitDir = $ARGV[0];
$splitDir = abs_path($splitDir);
my @files = read_dir $splitDir;
my $qsubDir=dirname($splitDir) ."/qsub";
print STDERR "qsubDir=$qsubDir\n";
`mkdir -p $qsubDir`;
my $out2Dir=dirname($splitDir) ."/out2";
print STDERR "out2Dir=$out2Dir\n";
`mkdir -p $out2Dir`;
for my $file ( @files ) {
print STDERR "$file ";
my $qsubFile = "$qsubDir/$file.sh";
open(RUN_FILE, ">$qsubFile");
print RUN_FILE "#!/usr/bin/env bash\n"
."#PBS -d/scratch/hh65/workspace/experiment/ar-en \n"
."#PBS -l mem=5gb \n\n"
."export PATH=\"/scratch/statmt/bin:/share/apps/NYUAD/perl/gcc_4.9.1/5.20.1/bin:/share/apps/NYUAD/jdk/1.8.0_31/bin:/share/apps/NYUAD/zlib/gcc_4.9.1/1.2.8/bin:/share/apps/NYUAD/cmake/gcc_4.9.1/3.1.0-rc3/bin:/share/apps/NYUAD/boost/gcc_4.9.1/openmpi_1.8.3/1.57.0/bin:/share/apps/NYUAD/openmpi/gcc_4.9.1/1.8.3/bin:/share/apps/NYUAD/python/gcc_4.9.1/2.7.9/bin:/share/apps/NYUAD/gcc/binutils/2.21/el6/bin:/share/apps/NYUAD/gcc/gcc/4.9.1/el6/bin:/usr/lib64/qt-3.3/bin:/usr/local/bin:/bin:/usr/bin:/usr/local/sbin:/usr/sbin:/sbin:/opt/bio/ncbi/bin:/opt/bio/mpiblast/bin:/opt/bio/EMBOSS/bin:/opt/bio/clustalw/bin:/opt/bio/tcoffee/bin:/opt/bio/hmmer/bin:/opt/bio/phylip/exe:/opt/bio/mrbayes:/opt/bio/fasta:/opt/bio/glimmer/bin:/opt/bio/glimmer/scripts:/opt/bio/gromacs/bin:/opt/bio/gmap/bin:/opt/bio/tigr/bin:/opt/bio/autodocksuite/bin:/opt/bio/wgs/bin:/opt/ganglia/bin:/opt/ganglia/sbin:/opt/bin:/usr/java/latest/bin:/opt/pdsh/bin:/opt/rocks/bin:/opt/rocks/sbin:/opt/torque/bin:/opt/torque/sbin:/home/hh65/bin:/home/hh65/bin\" \n"
."module load NYUAD/2.0 \n"
."module load gcc python/2.7.9 openmpi/1.8.3 boost cmake zlib jdk perl expat \n"
."cd /scratch/statmt/MADAMIRA-release-20140709-1.0 \n";
print RUN_FILE "java -Xmx2500m -Xms2500m -XX:NewRatio=3 -jar /scratch/statmt/MADAMIRA-release-20140709-1.0/MADAMIRA.jar "
."-rawinput $splitDir/$file -rawoutdir $out2Dir -rawconfig /scratch/statmt/MADAMIRA-release-20140709-1.0/samples/sampleConfigFile.xml \n";
close(RUN_FILE);
my $cmd = "qsub $qsubFile";
`$cmd`;
}

View File

@ -46,6 +46,7 @@ namespace mpi = boost::mpi;
#include "moses/FF/PhrasePairFeature.h"
#include "moses/FF/WordPenaltyProducer.h"
#include "moses/LM/Base.h"
#include "util/random.hh"
using namespace Mira;
using namespace std;
@ -54,6 +55,7 @@ namespace po = boost::program_options;
int main(int argc, char** argv)
{
util::rand_init();
size_t rank = 0;
size_t size = 1;
#ifdef MPI_ENABLE

View File

@ -25,6 +25,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/Word.h"
#include "moses/FF/FeatureFunction.h"
#include "Decoder.h"
#include "util/random.hh"
typedef std::map<const Moses::FeatureFunction*, std::vector< float > > ProducerWeightMap;
typedef std::pair<const Moses::FeatureFunction*, std::vector< float > > ProducerWeightPair;
@ -37,8 +38,7 @@ template <class T> bool from_string(T& t, const std::string& s, std::ios_base& (
struct RandomIndex {
ptrdiff_t operator()(ptrdiff_t max) {
srand(time(0)); // Initialize random number generator with current time.
return static_cast<ptrdiff_t> (rand() % max);
return util::rand_excl(max);
}
};

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="manual-label" InternalType="Console">
<Plugins>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="manual-label">
@ -14,6 +31,8 @@
<File Name="Main.cpp"/>
<File Name="Main.h"/>
</VirtualDirectory>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
<Settings Type="Executable">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -33,6 +52,8 @@
<Linker Options="" Required="yes">
<LibraryPath Value="/Users/hieu/workspace/github/mosesdecoder/boost/lib64"/>
<Library Value="boost_program_options"/>
<Library Value="boost_filesystem"/>
<Library Value="boost_system"/>
</Linker>
<ResourceCompiler Options="" Required="no"/>
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
@ -107,6 +128,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Debug"/>
<Dependencies Name="Release"/>
</CodeLite_Project>

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="moses-cmd" InternalType="Console">
<Plugins>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="src"/>
@ -9,6 +26,14 @@
<File Name="../../../moses-cmd/MainVW.cpp" ExcludeProjConfig="Debug"/>
<File Name="../../../moses-cmd/MainVW.h" ExcludeProjConfig="Debug"/>
</VirtualDirectory>
<Dependencies Name="Release"/>
<Dependencies Name="Debug">
<Project Name="OnDiskPt"/>
<Project Name="lm"/>
<Project Name="moses"/>
<Project Name="search"/>
<Project Name="util"/>
</Dependencies>
<Settings Type="Executable">
<GlobalSettings>
<Compiler Options="" C_Options="" Assembler="">
@ -53,7 +78,7 @@
<Library Value="rt"/>
</Linker>
<ResourceCompiler Options="" Required="no"/>
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="-f /var/folders/c4/2p48fcwx611dmkdqq44mbblm0000gn/T/ZVd8xvuJAR.ini -i /Users/hieu/workspace/github/moses-regression-tests/tests/phrase.basic-surface-binptable.oldformat/to-translate.txt" UseSeparateDebugArgs="yes" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
<Environment EnvVarSetName="&lt;Use Defaults&gt;" DbgSetName="&lt;Use Defaults&gt;">
<![CDATA[]]>
</Environment>
@ -125,12 +150,4 @@
</Completion>
</Configuration>
</Settings>
<Dependencies Name="Release"/>
<Dependencies Name="Debug">
<Project Name="OnDiskPt"/>
<Project Name="lm"/>
<Project Name="moses"/>
<Project Name="search"/>
<Project Name="util"/>
</Dependencies>
</CodeLite_Project>

View File

@ -474,8 +474,6 @@
<File Name="../../../moses/FF/DistortionScoreProducer.h"/>
<File Name="../../../moses/FF/DynamicCacheBasedLanguageModel.cpp"/>
<File Name="../../../moses/FF/DynamicCacheBasedLanguageModel.h"/>
<File Name="../../../moses/FF/ExternalFeature.cpp"/>
<File Name="../../../moses/FF/ExternalFeature.h"/>
<File Name="../../../moses/FF/Factory.cpp"/>
<File Name="../../../moses/FF/Factory.h"/>
<File Name="../../../moses/FF/FeatureFunction.cpp"/>

View File

@ -42,6 +42,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "RelativeEntropyCalc.h"
#include "LexicalReordering.h"
#include "LexicalReorderingState.h"
#include "util/random.hh"
#ifdef HAVE_PROTOBUF
#include "hypergraph.pb.h"
@ -205,7 +206,7 @@ int main(int argc, char** argv)
//initialise random numbers
srand(time(NULL));
rand_init();
// set up read/writing class
IOWrapper* ioWrapper = GetIOWrapper(staticData);

View File

@ -536,7 +536,7 @@ public:
{
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
path.GetScoreBreakdown().OutputAllFeatureScores(buf);
path.GetScoreBreakdown()->OutputAllFeatureScores(buf);
nBestXMLItem["fvals"] = xmlrpc_c::value_string(buf.str());
}

View File

@ -17,6 +17,7 @@
#include "util/exception.hh"
#include "util/file_piece.hh"
#include "util/random.hh"
#include "util/tokenize_piece.hh"
#include "util/string_piece.hh"
#include "FeatureDataIterator.h"
@ -286,7 +287,7 @@ void Data::createShards(size_t shard_count, float shard_size, const string& scor
} else {
//create shards by randomly sampling
for (size_t i = 0; i < floor(shard_size+0.5); ++i) {
shard_contents.push_back(rand() % data_size);
shard_contents.push_back(util::rand_excl(data_size));
}
}

View File

@ -13,6 +13,8 @@
#include <iostream>
#include <string>
#include "util/unistd.hh"
#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
#include <ext/stdio_filebuf.h>

View File

@ -40,28 +40,3 @@ inputfilestream::~inputfilestream()
void inputfilestream::close()
{
}
outputfilestream::outputfilestream(const std::string &filePath)
: std::ostream(0), m_streambuf(0), m_is_good(false)
{
// check if file is readable
std::filebuf* fb = new std::filebuf();
m_is_good = (fb->open(filePath.c_str(), std::ios::out) != NULL);
if (IsGzipFile(filePath)) {
throw runtime_error("Output to a zipped file not supported!");
} else {
m_streambuf = fb;
}
this->init(m_streambuf);
}
outputfilestream::~outputfilestream()
{
delete m_streambuf;
m_streambuf = 0;
}
void outputfilestream::close()
{
}

View File

@ -22,20 +22,4 @@ public:
void close();
};
class outputfilestream : public std::ostream
{
protected:
std::streambuf *m_streambuf;
bool m_is_good;
public:
explicit outputfilestream(const std::string &filePath);
virtual ~outputfilestream();
bool good() const {
return m_is_good;
}
void close();
};
#endif // MERT_FILE_STREAM_H_

View File

@ -1,6 +1,9 @@
#include <iostream>
#include "util/tokenize_piece.hh"
#include "ForestRescore.h"
#include "MiraFeatureVector.h"
#define BOOST_TEST_MODULE MertForestRescore
#include <boost/test/unit_test.hpp>
@ -10,8 +13,7 @@
using namespace std;
using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(viterbi_simple_lattice)
{
BOOST_AUTO_TEST_CASE(viterbi_simple_lattice) {
Vocab vocab;
WordVec words;
string wordStrings[] =
@ -242,5 +244,101 @@ BOOST_AUTO_TEST_CASE(viterbi_3branch_lattice)
BOOST_CHECK_EQUAL(6, hopeHypo.bleuStats[8]);
}
BOOST_AUTO_TEST_CASE(viterbi_full_hypergraph) {
Vocab vocab;
//References
ReferenceSet references;
references.AddLine(0,"in addition to EU support for businesses , also the administration of national business support will be concentrated in four Centres for Economic Development , Transport and Environment ( ELY Centres ) , starting from mid @-@ September .",vocab);
//Load the hypergraph
Graph graph(vocab);
util::scoped_fd fd(util::OpenReadOrThrow("mert/hgtest/0.gz"));
util::FilePiece file(fd.release());
ReadGraph(file,graph);
//prune
SparseVector weights;
weights.set("OpSequenceModel0_1",0.011187);
weights.set("OpSequenceModel0_2",-0.002797);
weights.set("OpSequenceModel0_3",0.002797);
weights.set("OpSequenceModel0_4",-0.000140);
weights.set("OpSequenceModel0_5",0.004195);
weights.set("Distortion0",0.041952);
weights.set("PhrasePenalty0",0.027968);
weights.set("WordPenalty0",-0.139841);
weights.set("UnknownWordPenalty0",1.000000);
weights.set("LM0",0.069920);
weights.set("LexicalReordering0_1",0.041952);
weights.set("LexicalReordering0_2",0.041952);
weights.set("LexicalReordering0_3",0.041952);
weights.set("LexicalReordering0_4",0.041952);
weights.set("LexicalReordering0_5",0.041952);
weights.set("LexicalReordering0_6",0.041952);
weights.set("LexicalReordering0_7",0.041952);
weights.set("LexicalReordering0_8",0.041952);
weights.set("TranslationModel0_1",0.027968);
weights.set("TranslationModel0_2",0.027968);
weights.set("TranslationModel0_3",0.027968);
weights.set("TranslationModel0_4",0.027968);
weights.set("TranslationModel0_5",0.027968);
weights.set("TranslationModel0_6",0.027968);
weights.set("TranslationModel0_7",0.027968);
weights.set("TranslationModel0_8",0.027968);
weights.set("TranslationModel0_9",0.027968);
weights.set("TranslationModel0_10",0.027968);
weights.set("TranslationModel0_11",0.027968);
weights.set("TranslationModel0_12",0.027968);
weights.set("TranslationModel0_13",0.027968);
size_t edgeCount = 500;
boost::shared_ptr<Graph> prunedGraph;
prunedGraph.reset(new Graph(vocab));
graph.Prune(prunedGraph.get(), weights, edgeCount);
vector<ValType> bg(9);
HgHypothesis bestHypo;
//best hypothesis
Viterbi(*prunedGraph, weights, 0, references, 0, bg, &bestHypo);
//check output as expected
string expectedStr = "<s> the EU matters , but also the national matters management focus since mid @-@ September four ely @-@ centre . </s>";
util::TokenIter<util::SingleCharacter, true> expected(expectedStr, util::SingleCharacter(' '));
for (size_t i = 0; i < bestHypo.text.size(); ++i) {
//cerr << bestHypo.text[i]->first << " ";
BOOST_CHECK_EQUAL(*expected,bestHypo.text[i]->first);
++expected;
}
BOOST_CHECK(!expected);
//cerr << endl;
//check scores
BOOST_CHECK_CLOSE(-80.062,bestHypo.featureVector.get("OpSequenceModel0_1"), 0.001);
BOOST_CHECK_CLOSE(2,bestHypo.featureVector.get("OpSequenceModel0_2"), 0.001);
BOOST_CHECK_CLOSE(2,bestHypo.featureVector.get("OpSequenceModel0_3"), 0.001);
BOOST_CHECK_CLOSE(3,bestHypo.featureVector.get("OpSequenceModel0_4"), 0.001);
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("OpSequenceModel0_5"), 0.001);
BOOST_CHECK_CLOSE(-6,bestHypo.featureVector.get("Distortion0"), 0.001);
BOOST_CHECK_CLOSE(14,bestHypo.featureVector.get("PhrasePenalty0"), 0.001);
BOOST_CHECK_CLOSE(-20,bestHypo.featureVector.get("WordPenalty0"), 0.001);
BOOST_CHECK_CLOSE(-100,bestHypo.featureVector.get("UnknownWordPenalty0"), 0.001);
BOOST_CHECK_CLOSE(-126.616,bestHypo.featureVector.get("LM0"), 0.001);
BOOST_CHECK_CLOSE(-5.2238,bestHypo.featureVector.get("LexicalReordering0_1"), 0.001);
BOOST_CHECK_CLOSE(-0.29515,bestHypo.featureVector.get("LexicalReordering0_2"), 0.001);
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("LexicalReordering0_3"), 0.001);
BOOST_CHECK_CLOSE(-0.470004,bestHypo.featureVector.get("LexicalReordering0_4"), 0.001);
BOOST_CHECK_CLOSE(-9.28267,bestHypo.featureVector.get("LexicalReordering0_5"), 0.001);
BOOST_CHECK_CLOSE(-0.470004,bestHypo.featureVector.get("LexicalReordering0_6"), 0.001);
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("LexicalReordering0_7"), 0.001);
BOOST_CHECK_CLOSE(-0.402678,bestHypo.featureVector.get("LexicalReordering0_8"), 0.001);
BOOST_CHECK_CLOSE(-54.3119,bestHypo.featureVector.get("TranslationModel0_1"), 0.001);
BOOST_CHECK_CLOSE(-62.2619,bestHypo.featureVector.get("TranslationModel0_2"), 0.001);
BOOST_CHECK_CLOSE(-23.8782,bestHypo.featureVector.get("TranslationModel0_3"), 0.001);
BOOST_CHECK_CLOSE(-25.1626,bestHypo.featureVector.get("TranslationModel0_4"), 0.001);
BOOST_CHECK_CLOSE(12.9986,bestHypo.featureVector.get("TranslationModel0_5"), 0.001);
BOOST_CHECK_CLOSE(3.99959,bestHypo.featureVector.get("TranslationModel0_6"), 0.001);
BOOST_CHECK_CLOSE(1.99979,bestHypo.featureVector.get("TranslationModel0_7"), 0.001);
BOOST_CHECK_CLOSE(1.99979,bestHypo.featureVector.get("TranslationModel0_8"), 0.001);
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_9"), 0.001);
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_10"), 0.001);
BOOST_CHECK_CLOSE(0,bestHypo.featureVector.get("TranslationModel0_11"), 0.001);
BOOST_CHECK_CLOSE(0.999896,bestHypo.featureVector.get("TranslationModel0_12"), 0.001);
BOOST_CHECK_CLOSE(7.99917,bestHypo.featureVector.get("TranslationModel0_13"), 0.001);
}

View File

@ -18,6 +18,7 @@
#include "ScoreStats.h"
#include "Util.h"
#include "util/unistd.hh"
using namespace std;
@ -25,7 +26,7 @@ namespace MosesTuning
{
// Meteor supported
#if defined(__GLIBCXX__) || defined(__GLIBCPP__)
#if (defined(__GLIBCXX__) || defined(__GLIBCPP__)) && !defined(_WIN32)
// for clarity
#define CHILD_STDIN_READ pipefds_input[0]

View File

@ -3,6 +3,7 @@
#include <cmath>
#include <cstdlib>
#include "util/exception.hh"
#include "util/random.hh"
#include "FeatureStats.h"
#include "Optimizer.h"
@ -57,10 +58,8 @@ void Point::Randomize()
UTIL_THROW_IF(m_min.size() != Point::m_dim, util::Exception, "Error");
UTIL_THROW_IF(m_max.size() != Point::m_dim, util::Exception, "Error");
for (unsigned int i = 0; i < size(); i++) {
operator[](i) = m_min[i] +
static_cast<float>(random()) / static_cast<float>(RAND_MAX) * (m_max[i] - m_min[i]);
}
for (unsigned int i = 0; i < size(); i++)
operator[](i) = util::rand_incl(m_min[i], m_max[i]);
}
double Point::operator*(const FeatureStats& F) const

View File

@ -5,11 +5,8 @@
- check that --pairwise-ranked is compatible with all optimization metrics
- Replace the standard rand() currently used in MERT and PRO with better
random generators such as Boost's random generators (e.g., boost::mt19937).
- create a Random class to hide the details, i.e., how to generate
random numbers, which allows us to use custom random generators more
easily.
- Use better random generators in util/random.cc, e.g. boost::mt19937.
- Support plugging of custom random generators.
Pros:
- In MERT, you might want to use the random restarting technique to avoid

View File

@ -11,7 +11,20 @@ using namespace MosesTuning;
BOOST_AUTO_TEST_CASE(timer_basic_test)
{
Timer timer;
const int sleep_time_microsec = 40; // ad-hoc microseconds to pass unit tests.
// Sleep time. The test will sleep for this number of microseconds, and
// expect the elapsed time to be noticeable.
// Keep this number low to avoid wasting test time sleeping, but at least as
// high as the Boost timer's resolution. Tests must pass consistently, not
// just on lucky runs.
#if defined(WIN32)
// Timer resolution on Windows seems to be a millisecond. Anything less and
// the test fails consistently.
const int sleep_time_microsec = 1000;
#else
// Unix-like systems seem to have more fine-grained clocks.
const int sleep_time_microsec = 40;
#endif
timer.start();
BOOST_REQUIRE(timer.is_running());

View File

@ -1,3 +1,4 @@
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <string>
@ -15,6 +16,7 @@
#include "Timer.h"
#include "Util.h"
#include "Data.h"
#include "util/random.hh"
using namespace std;
using namespace MosesTuning;
@ -91,17 +93,15 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
if (bootstrap) {
vector<float> scores;
for (int i = 0; i < bootstrap; ++i) {
// TODO: Use smart pointer for exceptional-safety.
ScoreData* scoredata = new ScoreData(g_scorer);
ScoreData scoredata(g_scorer);
for (int j = 0; j < n; ++j) {
int randomIndex = random() % n;
scoredata->add(entries[randomIndex], j);
const int randomIndex = util::rand_excl(n);
scoredata.add(entries[randomIndex], j);
}
g_scorer->setScoreData(scoredata);
g_scorer->setScoreData(&scoredata);
candidates_t candidates(n, 0);
float score = g_scorer->score(candidates);
scores.push_back(score);
delete scoredata;
}
float avg = average(scores);
@ -121,15 +121,13 @@ void EvaluatorUtil::evaluate(const string& candFile, int bootstrap, bool nbest_i
cout.precision(4);
cout << avg << "\t[" << lb << "," << rb << "]" << endl;
} else {
// TODO: Use smart pointer for exceptional-safety.
ScoreData* scoredata = new ScoreData(g_scorer);
ScoreData scoredata(g_scorer);
for (int sid = 0; sid < n; ++sid) {
scoredata->add(entries[sid], sid);
scoredata.add(entries[sid], sid);
}
g_scorer->setScoreData(scoredata);
g_scorer->setScoreData(&scoredata);
candidates_t candidates(n, 0);
float score = g_scorer->score(candidates);
delete scoredata;
if (g_has_more_files) cout << candFile << "\t";
if (g_has_more_scorers) cout << g_scorer->getName() << "\t";
@ -287,10 +285,10 @@ void InitSeed(const ProgramOption *opt)
{
if (opt->has_seed) {
cerr << "Seeding random numbers with " << opt->seed << endl;
srandom(opt->seed);
util::rand_init(opt->seed);
} else {
cerr << "Seeding random numbers with system clock " << endl;
srandom(time(NULL));
util::rand_init();
}
}

BIN
mert/hgtest/0.gz Normal file

Binary file not shown.

View File

@ -40,6 +40,7 @@ de recherches du Canada
#include <boost/scoped_ptr.hpp>
#include "util/exception.hh"
#include "util/random.hh"
#include "BleuScorer.h"
#include "HopeFearDecoder.h"
@ -122,10 +123,10 @@ int main(int argc, char** argv)
if (vm.count("random-seed")) {
cerr << "Initialising random seed to " << seed << endl;
srand(seed);
util::rand_init(seed);
} else {
cerr << "Initialising random seed from system clock" << endl;
srand(time(NULL));
util::rand_init();
}
// Initialize weights

View File

@ -24,6 +24,7 @@
#include "Types.h"
#include "Timer.h"
#include "Util.h"
#include "util/random.hh"
#include "moses/ThreadPool.h"
@ -289,10 +290,10 @@ int main(int argc, char **argv)
if (option.has_seed) {
cerr << "Seeding random numbers with " << option.seed << endl;
srandom(option.seed);
util::rand_init(option.seed);
} else {
cerr << "Seeding random numbers with system clock " << endl;
srandom(time(NULL));
util::rand_init();
}
if (option.sparse_weights_file.size()) ++option.pdim;

View File

@ -43,6 +43,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "ScoreDataIterator.h"
#include "BleuScorer.h"
#include "Util.h"
#include "util/random.hh"
using namespace std;
using namespace MosesTuning;
@ -141,10 +142,10 @@ int main(int argc, char** argv)
if (vm.count("random-seed")) {
cerr << "Initialising random seed to " << seed << endl;
srand(seed);
util::rand_init(seed);
} else {
cerr << "Initialising random seed from system clock" << endl;
srand(time(NULL));
util::rand_init();
}
if (scoreFiles.size() == 0 || featureFiles.size() == 0) {
@ -211,11 +212,11 @@ int main(int argc, char** argv)
vector<float> scores;
size_t n_translations = hypotheses.size();
for(size_t i=0; i<n_candidates; i++) {
size_t rand1 = rand() % n_translations;
size_t rand1 = util::rand_excl(n_translations);
pair<size_t,size_t> translation1 = hypotheses[rand1];
float bleu1 = smoothedSentenceBleu(scoreDataIters[translation1.first]->operator[](translation1.second), bleuSmoothing, smoothBP);
size_t rand2 = rand() % n_translations;
size_t rand2 = util::rand_excl(n_translations);
pair<size_t,size_t> translation2 = hypotheses[rand2];
float bleu2 = smoothedSentenceBleu(scoreDataIters[translation2.first]->operator[](translation2.second), bleuSmoothing, smoothBP);

View File

@ -45,6 +45,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "moses/FF/StatefulFeatureFunction.h"
#include "moses/FF/StatelessFeatureFunction.h"
#include "moses/TrainingTask.h"
#include "util/random.hh"
#ifdef HAVE_PROTOBUF
#include "hypergraph.pb.h"
@ -117,7 +118,7 @@ int main(int argc, char** argv)
//initialise random numbers
srand(time(NULL));
util::rand_init();
// set up read/writing class
IFVERBOSE(1) {

View File

@ -27,6 +27,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <sstream>
#include <vector>
#include "util/random.hh"
#include "util/usage.hh"
#ifdef WIN32
@ -91,7 +92,7 @@ SimpleTranslationInterface::SimpleTranslationInterface(const string &mosesIni):
exit(1);
}
srand(time(NULL));
util::rand_init();
}
@ -185,7 +186,7 @@ batch_run()
const StaticData& staticData = StaticData::Instance();
//initialise random numbers
srand(time(NULL));
util::rand_init();
IFVERBOSE(1) PrintUserTime("Created input-output object");

View File

@ -13,8 +13,11 @@
#include "LexicalReordering.h"
#include "SparseReordering.h"
#include <boost/algorithm/string/predicate.hpp>
using namespace std;
using namespace boost::algorithm;
namespace Moses
{
@ -57,6 +60,7 @@ const std::string& SparseReorderingFeatureKey::Name (const string& wordListId)
SparseReordering::SparseReordering(const map<string,string>& config, const LexicalReordering* producer)
: m_producer(producer)
, m_useWeightMap(false)
{
static const string kSource= "source";
static const string kTarget = "target";
@ -80,6 +84,14 @@ SparseReordering::SparseReordering(const map<string,string>& config, const Lexic
} else {
UTIL_THROW(util::Exception, "Sparse reordering requires source or target, not " << fields[1]);
}
} else if (fields[0] == "weights") {
ReadWeightMap(i->second);
m_useWeightMap = true;
for (int reoType=0; reoType<=LRModel::MAX; ++reoType) {
ostringstream buf;
buf << reoType;
m_featureMap2.push_back(m_producer->GetFeatureName(buf.str()));
}
} else if (fields[0] == "phrase") {
m_usePhrase = true;
@ -175,7 +187,16 @@ void SparseReordering::AddFeatures(
SparseReorderingFeatureKey key(id, type, wordFactor, false, position, side, reoType);
FeatureMap::const_iterator fmi = m_featureMap.find(key);
assert(fmi != m_featureMap.end());
scores->SparsePlusEquals(fmi->second, 1.0);
if (m_useWeightMap) {
WeightMap::const_iterator wmi = m_weightMap.find(fmi->second.name());
if (wmi != m_weightMap.end()) {
if (wmi->second != 0) {
scores->SparsePlusEquals(m_featureMap2[reoType], wmi->second);
}
}
} else {
scores->SparsePlusEquals(fmi->second, 1.0);
}
}
for (size_t id = 0; id < clusterMaps->size(); ++id) {
@ -186,7 +207,16 @@ void SparseReordering::AddFeatures(
SparseReorderingFeatureKey key(id, type, clusterIter->second, true, position, side, reoType);
FeatureMap::const_iterator fmi = m_featureMap.find(key);
assert(fmi != m_featureMap.end());
scores->SparsePlusEquals(fmi->second, 1.0);
if (m_useWeightMap) {
WeightMap::const_iterator wmi = m_weightMap.find(fmi->second.name());
if (wmi != m_weightMap.end()) {
if (wmi->second != 0) {
scores->SparsePlusEquals(m_featureMap2[reoType], wmi->second);
}
}
} else {
scores->SparsePlusEquals(fmi->second, 1.0);
}
}
}
@ -256,5 +286,29 @@ void SparseReordering::CopyScores(
}
void SparseReordering::ReadWeightMap(const string& filename)
{
util::FilePiece file(filename.c_str());
StringPiece line;
while (true) {
try {
line = file.ReadLine();
} catch (const util::EndOfFileException &e) {
break;
}
util::TokenIter<util::SingleCharacter, true> lineIter(line,util::SingleCharacter(' '));
UTIL_THROW_IF2(!lineIter, "Malformed weight line: '" << line << "'");
const std::string& name = lineIter->as_string();
++lineIter;
UTIL_THROW_IF2(!lineIter, "Malformed weight line: '" << line << "'");
float weight = Moses::Scan<float>(lineIter->as_string());
std::pair< WeightMap::iterator, bool> inserted = m_weightMap.insert( std::make_pair(name, weight) );
UTIL_THROW_IF2(!inserted.second, "Duplicate weight: '" << name << "'");
}
}
} //namespace

View File

@ -112,10 +112,16 @@ private:
typedef boost::unordered_map<SparseReorderingFeatureKey, FName, HashSparseReorderingFeatureKey, EqualsSparseReorderingFeatureKey> FeatureMap;
FeatureMap m_featureMap;
typedef boost::unordered_map<std::string, float> WeightMap;
WeightMap m_weightMap;
bool m_useWeightMap;
std::vector<FName> m_featureMap2;
void ReadWordList(const std::string& filename, const std::string& id,
SparseReorderingFeatureKey::Side side, std::vector<WordList>* pWordLists);
void ReadClusterMap(const std::string& filename, const std::string& id, SparseReorderingFeatureKey::Side side, std::vector<ClusterMap>* pClusterMaps);
void PreCalculateFeatureNames(size_t index, const std::string& id, SparseReorderingFeatureKey::Side side, const Factor* factor, bool isCluster);
void ReadWeightMap(const std::string& filename);
void AddFeatures(
SparseReorderingFeatureKey::Type type, SparseReorderingFeatureKey::Side side,

View File

@ -86,6 +86,10 @@ struct VWTargetSentence {
int src = it->first;
int tgt = it->second;
if (src >= m_sourceConstraints.size() || tgt >= m_targetConstraints.size()) {
UTIL_THROW2("VW :: alignment point out of bounds: " << src << "-" << tgt);
}
m_sourceConstraints[src].Update(tgt);
m_targetConstraints[tgt].Update(src);
}

View File

@ -98,6 +98,7 @@ HypergraphOutput<M>::HypergraphOutput(size_t precision) :
// If this line gives you compile errors,
// contact Lane Schwartz on the Moses mailing list
m_hypergraphDir = nbestPath.parent_path().string();
if (m_hypergraphDir.empty()) m_hypergraphDir=".";
} else {
stringstream hypergraphDirName;

View File

@ -1,14 +1,15 @@
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <unistd.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <netdb.h>
#include "Remote.h"
#include "moses/Factor.h"
#if !defined(_WIN32) && !defined(_WIN64)
#include <arpa/inet.h>
#endif
namespace Moses
{
@ -41,12 +42,16 @@ bool LanguageModelRemote::start(const std::string& host, int port)
sock = socket(AF_INET, SOCK_STREAM, 0);
hp = gethostbyname(host.c_str());
if (hp==NULL) {
#if defined(_WIN32) || defined(_WIN64)
fprintf(stderr, "gethostbyname failed\n");
#else
herror("gethostbyname failed");
#endif
exit(1);
}
bzero((char *)&server, sizeof(server));
bcopy(hp->h_addr, (char *)&server.sin_addr, hp->h_length);
memset(&server, '\0', sizeof(server));
memcpy((char *)&server.sin_addr, hp->h_addr, hp->h_length);
server.sin_family = hp->h_addrtype;
server.sin_port = htons(port);

View File

@ -4,9 +4,15 @@
#include "SingleFactor.h"
#include "moses/TypeDef.h"
#include "moses/Factor.h"
#include <sys/socket.h>
#include <sys/types.h>
#if defined(_WIN32) || defined(_WIN64)
#include <winsock2.h>
#else
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#endif
namespace Moses
{

View File

@ -55,6 +55,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#endif
#include "util/exception.hh"
#include "util/random.hh"
using namespace std;
@ -426,7 +427,7 @@ void Manager::CalcLatticeSamples(size_t count, TrellisPathList &ret) const
//cerr << endl;
//draw the sample
float frandom = log((float)rand()/RAND_MAX);
const float frandom = log(util::rand_incl(0.0f, 1.0f));
size_t position = 1;
float sum = candidateScores[0];
for (; position < candidateScores.size() && sum < frandom; ++position) {
@ -1645,7 +1646,7 @@ void Manager::OutputNBest(std::ostream& out
out << " |||";
// print scores with feature names
path.GetScoreBreakdown().OutputAllFeatureScores(out );
path.GetScoreBreakdown()->OutputAllFeatureScores(out);
// total
out << " ||| " << path.GetTotalScore();

View File

@ -31,6 +31,7 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include "InputFileStream.h"
#include "StaticData.h"
#include "util/exception.hh"
#include "util/random.hh"
#include <boost/program_options.hpp>
@ -1393,7 +1394,7 @@ struct Credit {
this->contact = contact ;
this->currentPursuits = currentPursuits ;
this->areaResponsibility = areaResponsibility;
this->sortId = rand() % 1000;
this->sortId = util::rand_excl(1000);
}
bool operator<(const Credit &other) const {

View File

@ -40,12 +40,12 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &ff,
HyperTree &trie)
HyperTree &trie,
boost::unordered_set<std::size_t> &sourceTermSet)
{
PrintUserTime(std::string("Start loading HyperTree"));
// const StaticData &staticData = StaticData::Instance();
// const std::string &factorDelimiter = staticData.GetFactorDelimiter();
sourceTermSet.clear();
std::size_t count = 0;
@ -106,6 +106,7 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
// Source-side
HyperPath sourceFragment;
hyperPathLoader.Load(sourceString, sourceFragment);
ExtractSourceTerminalSetFromHyperPath(sourceFragment, sourceTermSet);
// Target-side
TargetPhrase *targetPhrase = new TargetPhrase(&ff);
@ -144,6 +145,23 @@ bool HyperTreeLoader::Load(const std::vector<FactorType> &input,
return true;
}
void HyperTreeLoader::ExtractSourceTerminalSetFromHyperPath(
const HyperPath &hp, boost::unordered_set<std::size_t> &sourceTerminalSet)
{
for (std::vector<HyperPath::NodeSeq>::const_iterator p = hp.nodeSeqs.begin();
p != hp.nodeSeqs.end(); ++p) {
for (std::vector<std::size_t>::const_iterator q = p->begin();
q != p->end(); ++q) {
const std::size_t factorId = *q;
if (factorId >= moses_MaxNumNonterminals &&
factorId != HyperPath::kComma &&
factorId != HyperPath::kEpsilon) {
sourceTerminalSet.insert(factorId);
}
}
}
}
} // namespace F2S
} // namespace Syntax
} // namespace Moses

View File

@ -3,9 +3,12 @@
#include <istream>
#include <vector>
#include <boost/unordered_set.hpp>
#include "moses/TypeDef.h"
#include "moses/Syntax/RuleTableFF.h"
#include "HyperPath.h"
#include "HyperTree.h"
#include "HyperTreeCreator.h"
@ -23,7 +26,12 @@ public:
const std::vector<FactorType> &output,
const std::string &inFile,
const RuleTableFF &,
HyperTree &);
HyperTree &,
boost::unordered_set<std::size_t> &);
private:
void ExtractSourceTerminalSetFromHyperPath(
const HyperPath &, boost::unordered_set<std::size_t> &);
};
} // namespace F2S

View File

@ -39,6 +39,7 @@ Manager<RuleMatcher>::Manager(ttasksptr const& ttask)
if (const ForestInput *p = dynamic_cast<const ForestInput*>(&m_source)) {
m_forest = p->GetForest();
m_rootVertex = p->GetRootVertex();
m_sentenceLength = p->GetSize();
} else if (const TreeInput *p = dynamic_cast<const TreeInput*>(&m_source)) {
T2S::InputTreeBuilder builder;
T2S::InputTree tmpTree;
@ -46,6 +47,7 @@ Manager<RuleMatcher>::Manager(ttasksptr const& ttask)
boost::shared_ptr<Forest> forest = boost::make_shared<Forest>();
m_rootVertex = T2S::InputTreeToForest(tmpTree, *forest);
m_forest = forest;
m_sentenceLength = p->GetSize();
} else {
UTIL_THROW2("ERROR: F2S::Manager requires input to be a tree or forest");
}
@ -83,8 +85,13 @@ void Manager<RuleMatcher>::Decode()
p = sortedVertices.begin(); p != sortedVertices.end(); ++p) {
const Forest::Vertex &vertex = **p;
// Skip terminal vertices.
// Skip terminal vertices (after checking if they are OOVs).
if (vertex.incoming.empty()) {
if (vertex.pvertex.span.GetStartPos() > 0 &&
vertex.pvertex.span.GetEndPos() < m_sentenceLength-1 &&
IsUnknownSourceWord(vertex.pvertex.symbol)) {
m_oovs.insert(vertex.pvertex.symbol);
}
continue;
}
@ -190,6 +197,21 @@ void Manager<RuleMatcher>::InitializeStacks()
}
}
template<typename RuleMatcher>
bool Manager<RuleMatcher>::IsUnknownSourceWord(const Word &w) const
{
const std::size_t factorId = w[0]->GetId();
const std::vector<RuleTableFF*> &ffs = RuleTableFF::Instances();
for (std::size_t i = 0; i < ffs.size(); ++i) {
RuleTableFF *ff = ffs[i];
const boost::unordered_set<std::size_t> &sourceTerms =
ff->GetSourceTerminalSet();
if (sourceTerms.find(factorId) != sourceTerms.end()) {
return false;
}
}
return true;
}
template<typename RuleMatcher>
const SHyperedge *Manager<RuleMatcher>::GetBestSHyperedge() const

View File

@ -50,10 +50,13 @@ private:
void InitializeStacks();
bool IsUnknownSourceWord(const Word &) const;
void RecombineAndSort(const std::vector<SHyperedge*> &, SVertexStack &);
boost::shared_ptr<const Forest> m_forest;
const Forest::Vertex *m_rootVertex;
std::size_t m_sentenceLength; // Includes <s> and </s>
PVertexToStackMap m_stackMap;
boost::shared_ptr<HyperTree> m_glueRuleTrie;
std::vector<boost::shared_ptr<RuleMatcher> > m_mainRuleMatchers;

View File

@ -35,7 +35,8 @@ void RuleTableFF::Load()
staticData.GetSearchAlgorithm() == SyntaxT2S) {
F2S::HyperTree *trie = new F2S::HyperTree(this);
F2S::HyperTreeLoader loader;
loader.Load(m_input, m_output, m_filePath, *this, *trie);
loader.Load(m_input, m_output, m_filePath, *this, *trie,
m_sourceTerminalSet);
m_table = trie;
} else if (staticData.GetSearchAlgorithm() == SyntaxS2T) {
S2TParsingAlgorithm algorithm = staticData.GetS2TParsingAlgorithm();

View File

@ -43,10 +43,17 @@ public:
return 0;
}
// Get the source terminal vocabulary for this table's grammar (as a set of
// factor IDs)
const boost::unordered_set<std::size_t> &GetSourceTerminalSet() const {
return m_sourceTerminalSet;
}
private:
static std::vector<RuleTableFF*> s_instances;
const RuleTable *m_table;
boost::unordered_set<std::size_t> m_sourceTerminalSet;
};
} // Syntax

View File

@ -24,14 +24,18 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
#include <limits>
#include <iostream>
#include <sys/mman.h>
#include <cstdio>
#include <unistd.h>
#ifndef __MMAN_PAGE_SIZE__
#define __MMAN_PAGE_SIZE__ sysconf(_SC_PAGE_SIZE)
#if defined(_WIN32) || defined(_WIN64)
#include <windows.h>
#include <io.h>
#else
#include <sys/mman.h>
#endif
#include "util/mmap.hh"
namespace Moses
{
template <class T>
@ -60,25 +64,25 @@ public:
MmapAllocator() throw()
: m_file_ptr(std::tmpfile()), m_file_desc(fileno(m_file_ptr)),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr) throw()
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
MmapAllocator(std::FILE* f_ptr, size_t data_offset) throw()
: m_file_ptr(f_ptr), m_file_desc(fileno(m_file_ptr)),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
m_data_offset(data_offset), m_fixed(true), m_count(new size_t(0)) {
}
MmapAllocator(std::string fileName) throw()
: m_file_ptr(std::fopen(fileName.c_str(), "wb+")), m_file_desc(fileno(m_file_ptr)),
m_page_size(__MMAN_PAGE_SIZE__), m_map_size(0), m_data_ptr(0),
m_page_size(util::SizePage()), m_map_size(0), m_data_ptr(0),
m_data_offset(0), m_fixed(false), m_count(new size_t(0)) {
}
@ -92,7 +96,7 @@ public:
~MmapAllocator() throw() {
if(m_data_ptr && *m_count == 0) {
munmap(m_data_ptr, m_map_size);
util::UnmapOrThrow(m_data_ptr, m_map_size);
if(!m_fixed && std::ftell(m_file_ptr) != -1)
std::fclose(m_file_ptr);
}
@ -119,13 +123,17 @@ public:
pointer allocate (size_type num, const void* = 0) {
m_map_size = num * sizeof(T);
#if defined(_WIN32) || defined(_WIN64)
// On Windows, MAP_SHARED is not defined and MapOrThrow ignores the flags.
const int map_shared = 0;
#else
const int map_shared = MAP_SHARED;
#endif
if(!m_fixed) {
size_t read = 0;
read += ftruncate(m_file_desc, m_map_size);
m_data_ptr = (char*)mmap(0, m_map_size, PROT_READ|PROT_WRITE, MAP_SHARED,
m_file_desc, 0);
if(m_data_ptr == MAP_FAILED)
std::cerr << "Error: mmapping" << std::endl;
m_data_ptr = (char *)util::MapOrThrow(
m_map_size, true, map_shared, false, m_file_desc, 0);
return (pointer)m_data_ptr;
} else {
size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
@ -133,8 +141,8 @@ public:
size_t map_size = m_map_size + relative_offset;
m_data_ptr = (char*)mmap(0, map_size, PROT_READ, MAP_SHARED,
m_file_desc, map_offset);
m_data_ptr = (char *)util::MapOrThrow(
m_map_size, false, map_shared, false, m_file_desc, map_offset);
return (pointer)(m_data_ptr + relative_offset);
}
@ -142,11 +150,11 @@ public:
void deallocate (pointer p, size_type num) {
if(!m_fixed) {
munmap(p, num * sizeof(T));
util::UnmapOrThrow(p, num * sizeof(T));
} else {
size_t map_offset = (m_data_offset / m_page_size) * m_page_size;
size_t relative_offset = m_data_offset - map_offset;
munmap((pointer)((char*)p - relative_offset), num * sizeof(T));
util::UnmapOrThrow((pointer)((char*)p - relative_offset), num * sizeof(T));
}
}

View File

@ -1,7 +1,9 @@
#include "FileHandler.h"
#include <cstdio>
#ifdef WIN32
// Workaround: plain Windows does not have popen()/pclose().
// (MinGW already #define's them, so skip the workaround there.)
#if defined(WIN32) && !defined(__MINGW32__)
#define popen(A, B) _popen(A, B)
#define pclose(A) _pclose(A)
#endif

View File

@ -6,6 +6,7 @@
#include "utils.h"
#include "FileHandler.h"
#include "util/exception.hh"
#include "util/random.hh"
using namespace Moses;
typedef uint64_t P; // largest input range is 2^64
@ -162,7 +163,7 @@ void Hash_shiftAddXOR<T>::initSeeds()
{
v_ = new T[this->H_];
for(count_t i=0; i < this->H_; i++)
v_[i] = Utils::rand<T>() + 1;
v_[i] = util::wide_rand<T>() + 1;
}
template <typename T>
T Hash_shiftAddXOR<T>::hash(const char* s, count_t h)
@ -187,9 +188,8 @@ void UnivHash_tableXOR<T>::initSeeds()
// fill with random values
for(count_t j=0; j < this->H_; j++) {
table_[j] = new T[tblLen_];
for(count_t i=0; i < tblLen_; i++) {
table_[j][i] = Utils::rand<T>(this->m_-1);
}
for(count_t i=0; i < tblLen_; i++)
table_[j][i] = util::wide_rand_excl(this->m_-1);
}
}
template <typename T>
@ -218,7 +218,7 @@ void UnivHash_noPrimes<T>::initSeeds()
{
a_ = new P[this->H_];
for(T i=0; i < this->H_; i++) {
a_[i] = Utils::rand<P>();
a_[i] = util::wide_rand<P>();
if(a_[i] % 2 == 0) a_[i]++; // a must be odd
}
}
@ -284,8 +284,8 @@ void UnivHash_linear<T>::initSeeds()
a_[i] = new T[MAX_NGRAM_ORDER];
b_[i] = new T[MAX_NGRAM_ORDER];
for(count_t j=0; j < MAX_NGRAM_ORDER; j++) {
a_[i][j] = 1 + Utils::rand<T>();
b_[i][j] = Utils::rand<T>();
a_[i][j] = 1 + util::wide_rand<T>();
b_[i][j] = util::wide_rand<T>();
}
}
}

View File

@ -302,7 +302,8 @@ float OnlineRLM<T>::getProb(const wordID_t* ngram, int len,
}
while(num_fnd > 1) { // get lower order count
//get sub-context of size one less than length found (exluding target)
if(((den_val = query(&ngram[len - num_fnd], num_fnd - 1)) > 0) &&
den_val = query(&ngram[len - num_fnd], num_fnd - 1);
if((den_val > 0) &&
(den_val >= in[len - num_fnd]) && (in[len - num_fnd] > 0)) {
break;
} else --num_fnd; // else backoff to lower ngram order

View File

@ -62,22 +62,6 @@ public:
str[i] = tolower(str[i]);
}
}
// TODO: interface with decent PRG
template<typename T>
static T rand(T mod_bnd = 0) {
T random = 0;
if(sizeof(T) <= 4) {
random = static_cast<T>(std::rand());
} else if(sizeof(T) == 8) {
random = static_cast<T>(std::rand());
random <<= 31;
random <<= 1;
random |= static_cast<T>(std::rand());
}
if(mod_bnd != 0)
return random % mod_bnd;
else return random;
}
};
#endif

View File

@ -1,4 +1,6 @@
#include "DynSuffixArray.h"
#include "util/random.hh"
#include <iostream>
#include <boost/foreach.hpp>
@ -315,33 +317,31 @@ int DynSuffixArray::Compare(int pos1, int pos2, int max)
return 0;
}
namespace
{
/// Helper: swap two entries in an int array.
inline void swap_ints(int array[], int one, int other)
{
const int tmp = array[one];
array[one] = array[other];
array[other] = tmp;
}
}
void DynSuffixArray::Qsort(int* array, int begin, int end)
{
if(end > begin) {
int index;
int index = util::rand_incl(begin, end);
{
index = begin + (rand() % (end - begin + 1));
int pivot = array[index];
{
int tmp = array[index];
array[index] = array[end];
array[end] = tmp;
}
const int pivot = array[index];
swap_ints(array, index, end);
for(int i=index=begin; i < end; ++i) {
if (Compare(array[i], pivot, 20) <= 0) {
{
int tmp = array[index];
array[index] = array[i];
array[i] = tmp;
index++;
}
swap_ints(array, index, i);
index++;
}
}
{
int tmp = array[index];
array[index] = array[end];
array[end] = tmp;
}
swap_ints(array, index, end);
}
Qsort(array, begin, index - 1);
Qsort(array, index + 1, end);

View File

@ -17,6 +17,7 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include "util/exception.hh"
#include "util/tokenize.hh"
#include "moses/TranslationModel/PhraseDictionaryMultiModelCounts.h"
using namespace std;
@ -30,29 +31,6 @@ void OutputVec(const vector<T> &vec)
cerr << endl;
}
// from phrase-extract/tables-core.cpp
inline vector<string> tokenize( const char* input )
{
vector< string > token;
bool betweenWords = true;
int start=0;
int i=0;
for(; input[i] != '\0'; i++) {
bool isSpace = (input[i] == ' ' || input[i] == '\t');
if (!isSpace && betweenWords) {
start = i;
betweenWords = false;
} else if (isSpace && !betweenWords) {
token.push_back( string( input+start, i-start ) );
betweenWords = true;
}
}
if (!betweenWords)
token.push_back( string( input+start, i-start ) );
return token;
}
namespace Moses
{
@ -464,7 +442,7 @@ void PhraseDictionaryMultiModelCounts::LoadLexicalTable( string &fileName, lexic
i++;
if (i%100000 == 0) cerr << "." << flush;
vector<string> token = tokenize( line.c_str() );
const vector<string> token = util::tokenize( line );
if (token.size() != 4) {
cerr << "line " << i << " in " << fileName
<< " has wrong number of tokens, skipping:\n"

View File

@ -1,11 +1,11 @@
// vim:tabstop=2
#include <cstdlib>
#include <boost/filesystem.hpp>
#include "PhraseDictionaryTransliteration.h"
#include "moses/TranslationModel/CYKPlusParser/ChartRuleLookupManagerSkeleton.h"
#include "moses/DecodeGraph.h"
#include "moses/DecodeStep.h"
#include "util/tempfile.hh"
using namespace std;
@ -70,11 +70,10 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
inputPath.SetTargetPhrases(*this, tpColl, NULL);
} else {
// TRANSLITERATE
const boost::filesystem::path
inFile = boost::filesystem::unique_path(),
outDir = boost::filesystem::unique_path();
const util::temp_file inFile;
const util::temp_dir outDir;
ofstream inStream(inFile.c_str());
ofstream inStream(inFile.path().c_str());
inStream << sourcePhrase.ToString() << endl;
inStream.close();
@ -84,14 +83,14 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
" --external-bin-dir " + m_externalDir +
" --input-extension " + m_inputLang +
" --output-extension " + m_outputLang +
" --oov-file " + inFile.native() +
" --out-dir " + outDir.native();
" --oov-file " + inFile.path() +
" --out-dir " + outDir.path();
int ret = system(cmd.c_str());
UTIL_THROW_IF2(ret != 0, "Transliteration script error");
TargetPhraseCollection *tpColl = new TargetPhraseCollection();
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir.native());
vector<TargetPhrase*> targetPhrases = CreateTargetPhrases(sourcePhrase, outDir.path());
vector<TargetPhrase*>::const_iterator iter;
for (iter = targetPhrases.begin(); iter != targetPhrases.end(); ++iter) {
TargetPhrase *tp = *iter;
@ -102,10 +101,6 @@ void PhraseDictionaryTransliteration::GetTargetPhraseCollection(InputPath &input
cache[hash] = value;
inputPath.SetTargetPhrases(*this, tpColl, NULL);
// clean up temporary files
remove(inFile.c_str());
boost::filesystem::remove_all(outDir);
}
}

View File

@ -45,6 +45,7 @@
#include "moses/TranslationModel/fuzzy-match/SentenceAlignment.h"
#include "util/file.hh"
#include "util/exception.hh"
#include "util/random.hh"
using namespace std;
@ -62,8 +63,8 @@ char *mkdtemp(char *tempbuf)
return NULL;
}
srand((unsigned)time(0));
rand_value = (int)((rand() / ((double)RAND_MAX+1.0)) * 1e6);
util::rand_init();
rand_value = util::rand_excl(1e6);
tempbase = strrchr(tempbuf, '/');
tempbase = tempbase ? tempbase+1 : tempbuf;
strcpy(tempbasebuf, tempbase);
@ -130,10 +131,6 @@ int removedirectoryrecursively(const char *dirname)
struct dirent *entry;
char path[PATH_MAX];
if (path == NULL) {
fprintf(stderr, "Out of memory error\n");
return 0;
}
dir = opendir(dirname);
if (dir == NULL) {
perror("Error opendir()");

View File

@ -2,19 +2,16 @@
#define __sampling_h
#include <boost/dynamic_bitset.hpp>
#include <vector>
#include "util/random.hh"
// Utility functions for proper sub-sampling.
// (c) 2007-2012 Ulrich Germann
namespace Moses
{
using namespace std;
inline
size_t
randInt(size_t N)
{
return N*(rand()/(RAND_MAX+1.));
}
using namespace std;
// select a random sample of size /s/ without restitution from the range of
// integers [0,N);
@ -35,15 +32,15 @@ randomSample(vector<idx_t>& v, size_t s, size_t N)
if (s*10<N) {
boost::dynamic_bitset<uint64_t> check(N,0);
for (size_t i = 0; i < v.size(); i++) {
size_t x = randInt(N);
while (check[x]) x = randInt(N);
size_t x = util::rand_excl(N);
while (check[x]) x = util::rand_excl(N);
check[x]=true;
v[i] = x;
}
} else {
size_t m=0;
for (size_t t = 0; m <= s && t < N; t++)
if (s==N || randInt(N-t) < s-m) v[m++] = t;
if (s==N || util::rand_excl(N-t) < s-m) v[m++] = t;
}
}

View File

@ -345,7 +345,7 @@
// {
// boost::lock_guard<boost::mutex> lock(stats->lock);
// if (stats->raw_cnt == ctr) ++stats->raw_cnt;
// size_t rnum = randInt(stats->raw_cnt - ctr++);
// size_t rnum = util::rand_excl(stats->raw_cnt - ctr++);
// // cout << stats->raw_cnt << " " << ctr-1 << " "
// // << rnum << " " << max_samples - stats->good << endl;
// if (rnum < max_samples - stats->good)

View File

@ -69,7 +69,7 @@ namespace ugdiss
// while (chosen < samplesize && next < stop)
// {
// root->readEntry(next,*this);
// if (randInt(N - sampled++) < samplesize - chosen)
// if (util::rand_excl(N - sampled++) < samplesize - chosen)
// {
// ++chosen;
// return true;

View File

@ -9,6 +9,7 @@
#include <iostream>
#include "util/exception.hh"
#include "moses/Util.h"
#include "util/random.hh"
//#include <cassert>
// #include "ug_bv_iter.h"
@ -896,13 +897,6 @@ namespace ugdiss
return bv;
}
inline
size_t
randInt(size_t N)
{
return size_t(N*(rand()/(RAND_MAX+1.)));
}
/// randomly select up to N occurrences of the sequence
template<typename Token>
sptr<vector<typename ttrack::Position> >
@ -924,8 +918,8 @@ namespace ugdiss
root->readEntry(I.next,I);
// t: expected number of remaining samples
double t = (stop - I.pos)/root->aveIndexEntrySize();
double r = t*rand()/(RAND_MAX+1.);
const double t = (stop - I.pos)/root->aveIndexEntrySize();
const double r = util::rand_excl(t);
if (r < N-m)
{
ret->at(m).offset = I.offset;

View File

@ -16,7 +16,7 @@ namespace Moses
{
using namespace bitext;
using namespace std;
// using namespace boost;
using namespace boost;
void
fillIdSeq(Phrase const& mophrase, size_t const ifactor,
@ -155,6 +155,10 @@ namespace Moses
input_factor = atoi(param.insert(dflt).first->second.c_str());
// shouldn't that be a string?
dflt = pair<string,string> ("output-factor","0");
output_factor = atoi(param.insert(dflt).first->second.c_str());
ofactor.assign(1,output_factor);
dflt = pair<string,string> ("smooth",".01");
m_lbop_conf = atof(param.insert(dflt).first->second.c_str());

View File

@ -31,7 +31,6 @@ namespace Moses
TrellisPath::TrellisPath(const Hypothesis *hypo)
: m_prevEdgeChanged(NOT_FOUND)
{
m_scoreBreakdown = hypo->GetScoreBreakdown();
m_totalScore = hypo->GetTotalScore();
// enumerate path using prevHypo
@ -41,10 +40,9 @@ TrellisPath::TrellisPath(const Hypothesis *hypo)
}
}
void TrellisPath::InitScore()
void TrellisPath::InitTotalScore()
{
m_totalScore = m_path[0]->GetWinningHypo()->GetTotalScore();
m_scoreBreakdown= m_path[0]->GetWinningHypo()->GetScoreBreakdown();
//calc score
size_t sizePath = m_path.size();
@ -53,12 +51,8 @@ void TrellisPath::InitScore()
const Hypothesis *winningHypo = hypo->GetWinningHypo();
if (hypo != winningHypo) {
m_totalScore = m_totalScore - winningHypo->GetTotalScore() + hypo->GetTotalScore();
m_scoreBreakdown.MinusEquals(winningHypo->GetScoreBreakdown());
m_scoreBreakdown.PlusEquals(hypo->GetScoreBreakdown());
}
}
}
TrellisPath::TrellisPath(const TrellisPath &copy, size_t edgeIndex, const Hypothesis *arc)
@ -80,7 +74,7 @@ TrellisPath::TrellisPath(const TrellisPath &copy, size_t edgeIndex, const Hypoth
prevHypo = prevHypo->GetPrevHypo();
}
InitScore();
InitTotalScore();
}
TrellisPath::TrellisPath(const vector<const Hypothesis*> edges)
@ -88,9 +82,7 @@ TrellisPath::TrellisPath(const vector<const Hypothesis*> edges)
{
m_path.resize(edges.size());
copy(edges.rbegin(),edges.rend(),m_path.begin());
InitScore();
InitTotalScore();
}
@ -172,6 +164,32 @@ void TrellisPath::CreateDeviantPaths(TrellisPathList &pathColl) const
}
}
const boost::shared_ptr<ScoreComponentCollection> TrellisPath::GetScoreBreakdown() const
{
if (!m_scoreBreakdown) {
float totalScore = m_path[0]->GetWinningHypo()->GetTotalScore(); // calculated for sanity check only
m_scoreBreakdown = boost::shared_ptr<ScoreComponentCollection>(new ScoreComponentCollection());
m_scoreBreakdown->PlusEquals(ScoreComponentCollection(m_path[0]->GetWinningHypo()->GetScoreBreakdown()));
//calc score
size_t sizePath = m_path.size();
for (size_t pos = 0 ; pos < sizePath ; pos++) {
const Hypothesis *hypo = m_path[pos];
const Hypothesis *winningHypo = hypo->GetWinningHypo();
if (hypo != winningHypo) {
totalScore = totalScore - winningHypo->GetTotalScore() + hypo->GetTotalScore();
m_scoreBreakdown->MinusEquals(winningHypo->GetScoreBreakdown());
m_scoreBreakdown->PlusEquals(hypo->GetScoreBreakdown());
}
}
assert(totalScore == m_totalScore);
}
return m_scoreBreakdown;
}
Phrase TrellisPath::GetTargetPhrase() const
{
Phrase targetPhrase(ARRAY_SIZE_INCR);

View File

@ -19,14 +19,14 @@ License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#ifndef moses_TrellisPath_h
#define moses_TrellisPath_h
#pragma once
#include <iostream>
#include <vector>
#include <limits>
#include "Hypothesis.h"
#include "TypeDef.h"
#include <boost/shared_ptr.hpp>
namespace Moses
{
@ -50,13 +50,13 @@ protected:
, or NOT_FOUND if this path is the best trans so consist of only hypos
*/
ScoreComponentCollection m_scoreBreakdown;
float m_totalScore;
mutable boost::shared_ptr<ScoreComponentCollection> m_scoreBreakdown;
//Used by Manager::LatticeSample()
explicit TrellisPath(const std::vector<const Hypothesis*> edges);
void InitScore();
void InitTotalScore();
public:
TrellisPath(); // not implemented
@ -91,9 +91,7 @@ public:
//! create a list of next best paths by wiggling 1 of the node at a time.
void CreateDeviantPaths(TrellisPathList &pathColl) const;
inline const ScoreComponentCollection &GetScoreBreakdown() const {
return m_scoreBreakdown;
}
const boost::shared_ptr<ScoreComponentCollection> GetScoreBreakdown() const;
//! get target words range of the hypo within n-best trellis. not necessarily the same as hypo.GetCurrTargetWordsRange()
WordsRange GetTargetWordsRange(const Hypothesis &hypo) const;
@ -123,4 +121,4 @@ inline std::ostream& operator<<(std::ostream& out, const TrellisPath& path)
}
}
#endif

View File

@ -502,13 +502,11 @@ inline std::string GetFirstString(const std::string& str, int& first_pos, const
template<class T>
T log_sum (T log_a, T log_b)
{
T v;
if (log_a < log_b) {
v = log_b+log ( 1 + exp ( log_a-log_b ));
return log_b + log1p(exp(log_a - log_b));
} else {
v = log_a+log ( 1 + exp ( log_b-log_a ));
return log_a + log1p(exp(log_b - log_a));
}
return ( v );
}
/**

View File

@ -105,13 +105,13 @@ const TrellisPath doMBR(const TrellisPathList& nBestList)
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
const TrellisPath &path = **iter;
float score = StaticData::Instance().GetMBRScale()
* path.GetScoreBreakdown().GetWeightedScore();
* path.GetScoreBreakdown()->GetWeightedScore();
if (maxScore < score) maxScore = score;
}
for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) {
const TrellisPath &path = **iter;
joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().GetWeightedScore() - maxScore);
joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown()->GetWeightedScore() - maxScore);
marginal += joint_prob;
joint_prob_vec.push_back(joint_prob);

View File

@ -166,7 +166,7 @@ namespace MosesServer
{
// should the score breakdown be reported in a more structured manner?
ostringstream buf;
path->GetScoreBreakdown().OutputAllFeatureScores(buf);
path->GetScoreBreakdown()->OutputAllFeatureScores(buf);
nBestXmlItem["fvals"] = xmlrpc_c::value_string(buf.str());
}

View File

@ -2,6 +2,7 @@
#include "ExtractionPhrasePair.h"
#include "tables-core.h"
#include "InputFileStream.h"
#include "util/tokenize.hh"
using namespace std;
@ -17,7 +18,7 @@ void Domain::load( const std::string &domainFileName )
string line;
while(getline(*fileP, line)) {
// read
vector< string > domainSpecLine = tokenize( line.c_str() );
const vector< string > domainSpecLine = util::tokenize( line );
int lineNumber;
if (domainSpecLine.size() != 2 ||
! sscanf(domainSpecLine[0].c_str(), "%d", &lineNumber)) {
@ -25,7 +26,7 @@ void Domain::load( const std::string &domainFileName )
exit(1);
}
// store
string &name = domainSpecLine[1];
const string &name = domainSpecLine[1];
spec.push_back( make_pair( lineNumber, name ));
if (name2id.find( name ) == name2id.end()) {
name2id[ name ] = list.size();

View File

@ -14,8 +14,6 @@
#include "ScoreFeature.h"
extern std::vector<std::string> tokenize( const char*);
namespace MosesTraining
{

View File

@ -24,6 +24,7 @@
#include <string>
#include "tables-core.h"
#include "util/tokenize.hh"
using namespace std;
@ -40,7 +41,7 @@ void addBoundaryWords(vector<string> &phrase)
bool SentenceAlignment::processTargetSentence(const char * targetString, int, bool boundaryRules)
{
target = tokenize(targetString);
target = util::tokenize(targetString);
if (boundaryRules)
addBoundaryWords(target);
return true;
@ -48,7 +49,7 @@ bool SentenceAlignment::processTargetSentence(const char * targetString, int, bo
bool SentenceAlignment::processSourceSentence(const char * sourceString, int, bool boundaryRules)
{
source = tokenize(sourceString);
source = util::tokenize(sourceString);
if (boundaryRules)
addBoundaryWords(source);
return true;
@ -89,7 +90,7 @@ bool SentenceAlignment::create(const char targetString[],
}
// reading in alignments
vector<string> alignmentSequence = tokenize( alignmentString );
vector<string> alignmentSequence = util::tokenize( alignmentString );
for(size_t i=0; i<alignmentSequence.size(); i++) {
int s,t;
// cout << "scaning " << alignmentSequence[i].c_str() << endl;

View File

@ -26,6 +26,7 @@
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "util/tokenize.hh"
using namespace std;
@ -49,7 +50,7 @@ bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetStrin
<< sentenceID << ": " << e.getMsg() << std::endl;
return false;
}
target = tokenize(targetStringCPP.c_str());
target = util::tokenize(targetStringCPP);
return true;
}
@ -70,11 +71,8 @@ bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceStrin
<< sentenceID << ": " << e.getMsg() << std::endl;
return false;
}
source = tokenize(sourceStringCPP.c_str());
source = util::tokenize(sourceStringCPP);
return true;
}
} // namespace

View File

@ -25,11 +25,10 @@
#include <cstdlib>
#include "InputFileStream.h"
#include "OutputFileStream.h"
#include "util/tokenize.hh"
using namespace std;
std::vector<std::string> tokenize( const char [] );
vector< string > splitLine(const char *line)
{
vector< string > item;
@ -109,7 +108,7 @@ int main(int argc, char* argv[])
if (! getLine(fileDirectP, itemDirect ))
break;
vector< string > count = tokenize( itemDirect[4].c_str() );
const vector< string > count = util::tokenize( itemDirect[4] );
float countEF = atof(count[0].c_str());
float countF = atof(count[1].c_str());
float prob = countF/countEF;

View File

@ -28,6 +28,7 @@
#include "tables-core.h"
#include "InputFileStream.h"
#include "util/tokenize.hh"
using namespace std;
@ -165,8 +166,8 @@ void processFiles( char* fileNameDirect, char* fileNameIndirect, char* fileNameC
fileConsolidated << " ||| " << reverseAlignment(itemDirect[3]);
// counts, for debugging
vector<string> directCounts = tokenize(itemDirect[4].c_str());
vector<string> indirectCounts = tokenize(itemIndirect[4].c_str());
const vector<string> directCounts = util::tokenize(itemDirect[4]);
const vector<string> indirectCounts = util::tokenize(itemIndirect[4]);
fileConsolidated << "||| " << directCounts[0] << " " << indirectCounts[0];
// output rule count if present in either file
if (indirectCounts.size() > 1) {
@ -199,7 +200,6 @@ bool getLine( istream &fileP, vector< string > &item )
vector< string > splitLine(const char *line)
{
vector< string > item;
bool betweenWords = true;
int start=0;
int i=0;
for(; line[i] != '\0'; i++) {
@ -223,10 +223,10 @@ string reverseAlignment(const string &alignments)
{
stringstream ret("");
vector<string> alignToks = tokenize(alignments.c_str());
const vector<string> alignToks = util::tokenize(alignments);
for (size_t i = 0; i < alignToks.size(); ++i) {
string &alignPair = alignToks[i];
const string &alignPair = alignToks[i];
vector<string> alignPoints;
Tokenize(alignPoints, alignPair, "-");
assert(alignPoints.size() == 2);

View File

@ -23,6 +23,7 @@
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "util/tokenize.hh"
#include <cassert>
#include <vector>
@ -56,7 +57,7 @@ std::auto_ptr<ParseTree> XmlTreeParser::Parse(const std::string &line)
m_tree.ConnectNodes();
SyntaxNode *root = m_tree.GetTop();
assert(root);
m_words = tokenize(m_line.c_str());
m_words = util::tokenize(m_line);
return ConvertTree(*root, m_words);
}

View File

@ -25,6 +25,7 @@
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "util/tokenize.hh"
#include "syntax-common/exception.h"
@ -51,7 +52,7 @@ std::auto_ptr<PcfgTree> XmlTreeParser::Parse(const std::string &line) {
// There is no XML tree.
return std::auto_ptr<PcfgTree>();
}
m_words = tokenize(m_line.c_str());
m_words = util::tokenize(m_line);
return ConvertTree(*root, m_words);
}

View File

@ -21,6 +21,7 @@
#include "relax-parse.h"
#include "tables-core.h"
#include "util/tokenize.hh"
using namespace std;
using namespace MosesTraining;
@ -44,7 +45,7 @@ int main(int argc, char* argv[])
map< string, int > topLabelCollection; // count of top labels, not used
SyntaxTree tree;
ProcessAndStripXMLTags( inBufferString, tree, labelCollection, topLabelCollection, false );
vector< string > inWords = tokenize( inBufferString.c_str() );
const vector< string > inWords = util::tokenize( inBufferString );
// output tree
// cerr << "BEFORE:" << endl << tree;
@ -104,7 +105,7 @@ void init(int argc, char* argv[])
}
}
void store( SyntaxTree &tree, vector< string > &words )
void store( SyntaxTree &tree, const vector< string > &words )
{
// output words
for( size_t i=0; i<words.size(); i++ ) {

View File

@ -39,7 +39,7 @@ char SAMTLevel = 0;
// functions
void init(int argc, char* argv[]);
void store( MosesTraining::SyntaxTree &tree, std::vector<std::string> &words );
void store( MosesTraining::SyntaxTree &tree, const std::vector<std::string> &words );
void LeftBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
void RightBinarize( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );
void SAMT( MosesTraining::SyntaxTree &tree, MosesTraining::ParentNodes &parents );

View File

@ -14,6 +14,7 @@
#include "AlignmentPhrase.h"
#include "tables-core.h"
#include "InputFileStream.h"
#include "util/tokenize.hh"
using namespace std;
using namespace MosesTraining;
@ -237,7 +238,7 @@ void processPhrasePairs( vector< PhraseAlignment > &phrasePair )
bool PhraseAlignment::create(const char line[], int lineID )
{
vector< string > token = tokenize( line );
const vector< string > token = util::tokenize( line );
int item = 1;
PHRASE phraseF, phraseE;
for (size_t j=0; j<token.size(); j++) {
@ -321,7 +322,7 @@ void LexicalTable::load( const string &filePath )
i++;
if (i%100000 == 0) cerr << "." << flush;
vector<string> token = tokenize( line.c_str() );
const vector<string> token = util::tokenize( line );
if (token.size() != 3) {
cerr << "line " << i << " in " << filePath << " has wrong number of tokens, skipping:\n" <<
token.size() << " " << token[0] << " " << line << endl;

View File

@ -3,6 +3,7 @@
#include "tables-core.h"
#include "XmlException.h"
#include "XmlTree.h"
#include "util/tokenize.hh"
#include <cassert>
#include <vector>
@ -24,7 +25,7 @@ StringTree *XmlTreeParser::Parse(const std::string &line) {
tree_.ConnectNodes();
SyntaxNode *root = tree_.GetTop();
assert(root);
words_ = tokenize(line_.c_str());
words_ = util::tokenize(line_);
return ConvertTree(*root, words_);
}

View File

@ -1,5 +1,6 @@
// $Id$
//#include "beammain.h"
#include "util/tokenize.hh"
#include "tables-core.h"
#define TABLE_LINE_MAX_LENGTH 1000
@ -7,37 +8,9 @@
using namespace std;
// as in beamdecoder/tables.cpp
vector<string> tokenize( const char* input )
{
vector< string > token;
bool betweenWords = true;
int start=0;
int i=0;
for(; input[i] != '\0'; i++) {
bool isSpace = (input[i] == ' ' || input[i] == '\t');
if (!isSpace && betweenWords) {
start = i;
betweenWords = false;
} else if (isSpace && !betweenWords) {
token.push_back( string( input+start, i-start ) );
betweenWords = true;
}
}
if (!betweenWords)
token.push_back( string( input+start, i-start ) );
return token;
}
namespace MosesTraining
{
bool isNonTerminal( const WORD &symbol )
{
return symbol.substr(0, 1) == "[" && symbol.substr(symbol.size()-1, 1) == "]";
}
WORD_ID Vocabulary::storeIfNew( const WORD& word )
{
map<WORD, WORD_ID>::iterator i = lookup.find( word );
@ -107,7 +80,7 @@ void DTable::load( const string& fileName )
abort();
}
vector<string> token = tokenize(line.c_str());
const vector<string> token = util::tokenize(line);
if (token.size() < 2) {
cerr << "line " << i << " in " << fileName << " too short, skipping\n";
continue;

View File

@ -12,8 +12,6 @@
#include <map>
#include <cmath>
extern std::vector<std::string> tokenize( const char*);
namespace MosesTraining
{

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";
use FindBin qw($RealBin);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use Getopt::Std;
getopts('q');

View File

@ -1,5 +1,7 @@
#!/usr/bin/env perl
use strict;
use warnings;
use strict;
my $file = shift(@ARGV);
open(MYFILE, $file);

View File

@ -1,6 +1,7 @@
#!/usr/bin/env perl
#input hindi word urdu word, delete all those entries that have number on any side
use warnings;
use utf8;
use Getopt::Std;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use utf8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use utf8;
require Encode;
use IO::Handle;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use utf8;
use strict;
use Getopt::Long "GetOptions";

View File

@ -14,6 +14,7 @@ use utf8;
# 23.01.2010: added NIST p-value and interval computation
###############################################
use warnings;
use strict;
#constants

View File

@ -4,6 +4,7 @@
#sentence-by-sentence: take in a system output, with any number of factors, and a reference translation, also maybe with factors, and show each sentence and its errors
#usage: sentence-by-sentence SYSOUT [REFERENCE]+ > sentences.html
use warnings;
use strict;
use Getopt::Long;

View File

@ -4,6 +4,7 @@
# Script to convert MOSES searchgraph to DOT format
#
use warnings;
use strict;
use File::Path;
use File::Basename;

View File

@ -5,7 +5,9 @@
#usage: show-phrases-used DECODER_OUTFILE > output.html
# where DECODER_OUTFILE is the output of moses with the -T (show alignments) option
use warnings;
use strict;
BEGIN
{
my $wd= `pawd 2>/dev/null`;

View File

@ -9,6 +9,7 @@
#similar function to filter-model-given-input.pl, but only operates
#on the phrase table and doesn't require that any subdirectories exist
use warnings;
use strict;
my $MAX_LENGTH = 10;

View File

@ -7,8 +7,15 @@ get-corpus
default-name: corpus/txt
rerun-on-change: input-extension output-extension
template: IN OUT $input-extension $output-extension
pre-tok-clean
in: raw-stem
out: pre-tok-cleaned
default-name: corpus/pre-tok-cleaned
pass-unless: pre-tok-clean
template: $pre-tok-clean IN $input-extension $output-extension OUT OUT.lines-retained
parallelizable: yes
tokenize
in: raw-stem
in: pre-tok-cleaned
out: tokenized-stem
default-name: corpus/tok
pass-unless: input-tokenizer output-tokenizer
@ -158,11 +165,18 @@ get-corpus
pass-unless: get-corpus-script
default-name: lm/txt
template: $get-corpus-script > OUT
use-parallel-corpus
in: parallel-corpus-stem
out: tokenized-corpus
default-name: lm/tok
ignore-unless: parallel-corpus-stem
template: ln -s IN.$output-extension OUT
tokenize
in: raw-corpus
out: tokenized-corpus
default-name: lm/tok
pass-unless: output-tokenizer
ignore-if: parallel-corpus-stem
template: $output-tokenizer < IN > OUT
parallelizable: yes
mock-parse
@ -204,8 +218,14 @@ split
default-name: lm/split
pass-unless: output-splitter
template: $output-splitter -model IN1.$output-extension < IN > OUT
strip
in: split-corpus
out: stripped-corpus
default-name: lm/stripped
pass-unless: mock-output-parser-lm
template: $moses-script-dir/training/strip-xml.perl < IN > OUT
train
in: split-corpus
in: stripped-corpus
out: lm
default-name: lm/lm
ignore-if: rlm-training
@ -220,7 +240,7 @@ randomize
pass-unless: lm-randomizer
ignore-if: rlm-training
train-randomized
in: split-corpus
in: stripped-corpus
out: rlm
default-name: lm/rlm
ignore-unless: rlm-training
@ -953,21 +973,21 @@ split-reference-devtest
ignore-unless: use-mira
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-splitter -model IN1.$output-extension < IN > OUT
reduce-reference
strip-reference
in: split-ref
out: reference
default-name: tuning/reference.reduced
default-name: tuning/reference.stripped
pass-unless: mock-output-parser-references
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $moses-script-dir/training/reduce-factors.perl --factor 0 --xml 1 --corpus IN --reduced-corpus OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
reduce-reference-devtest
template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
strip-reference-devtest
in: split-ref-devtest
out: reference
default-name: tuning/reference.devtest.reduced
default-name: tuning/reference.devtest.stripped
pass-unless: mock-output-parser-references
ignore-unless: use-mira
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $moses-script-dir/training/reduce-factors.perl --factor 0 --xml 1 --corpus IN --reduced-corpus OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
filter
in: input TRAINING:sigtest-filter-phrase-translation-table TRAINING:sigtest-filter-reordering-table TRAINING:corpus-mml-prefilter=OR=TRAINING:corpus-mml-postfilter=OR=TRAINING:domains TRAINING:transliteration-table
out: filtered-dir
@ -1224,13 +1244,13 @@ lowercase-reference
pass-if: recaser
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $output-lowercaser < IN > OUT
reduce-reference
strip-reference
in: lowercased-reference
out: reference
default-name: evaluation/reference
pass-unless: mock-output-parser-references
multiref: $moses-script-dir/ems/support/run-command-on-multiple-refsets.perl
template: $moses-script-dir/training/reduce-factors.perl --factor 0 --xml 1 --corpus IN --reduced-corpus OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
template: $moses-script-dir/training/strip-xml.perl < IN > OUT && $moses-script-dir/training/wrappers/mosesxml2brackets.py < IN > OUT.trees
wade
in: filtered-dir truecased-input tokenized-reference alignment system-output
out: wade-analysis

View File

@ -3,6 +3,7 @@
# Experiment Management System
# Documentation at http://www.statmt.org/moses/?n=FactoredTraining.EMS
use warnings;
use strict;
use Getopt::Long "GetOptions";
use FindBin qw($RealBin);

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my ($file,$step) = @ARGV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
# Create domain file from corpora

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
# Build necessary files for sparse lexical features

View File

@ -2,6 +2,7 @@
# $Id: consolidate-training-data.perl 928 2009-09-02 02:58:01Z philipp $
use warnings;
use strict;
my ($in,$out,$consolidated,@PART) = @ARGV;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $cores = 8;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
my $jobs = 20;

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
die("ERROR syntax: input-from-sgm.perl < in.sgm > in.txt")

View File

@ -1,5 +1,6 @@
#!/usr/bin/env perl
use warnings;
use strict;
use IPC::Open3;
use File::Temp qw/tempdir/;

View File

@ -1,10 +1,13 @@
#!/usr/bin/env perl
use warnings;
use strict;
use Getopt::Long "GetOptions";
Getopt::Long::config("no_auto_abbrev");
Getopt::Long::config("pass_through");
my ($TEXT,$ORDER,$BIN,$LM);
&GetOptions('text=s' => \$TEXT,
@ -15,8 +18,9 @@ my ($TEXT,$ORDER,$BIN,$LM);
die("ERROR: specify at least --bin BIN --text CORPUS --lm LM and --order N!")
unless defined($BIN) && defined($TEXT) && defined($LM) && defined($ORDER);
my $cmd = "$BIN --text $TEXT --order $ORDER --arpa $LM";
$cmd .= " " . join(' ', @ARGV) if scalar(@ARGV); # Pass remaining args through.
my $settings = join(' ', @ARGV);
#print STDERR "settngs=$settings \n";
my $cmd = "$BIN --text $TEXT --order $ORDER --arpa $LM $settings";
print "exec: $cmd\n";
`$cmd`;

Some files were not shown because too many files have changed in this diff Show More