diff --git a/Jamroot b/Jamroot index ccd792283..00a13d110 100644 --- a/Jamroot +++ b/Jamroot @@ -167,7 +167,7 @@ project : requirements ; #Add directories here if you want their incidental targets too (i.e. tests). -build-projects lm util phrase-extract search moses moses/LM mert moses-cmd moses-chart-cmd mira scripts regression-testing ; +build-projects lm util phrase-extract search moses moses/LM mert moses-cmd mira scripts regression-testing ; if [ option.get "with-mm" : : "yes" ] { @@ -195,7 +195,6 @@ else alias programs : lm//programs -moses-chart-cmd//moses_chart moses-cmd//programs OnDiskPt//CreateOnDiskPt OnDiskPt//queryOnDiskPt @@ -216,7 +215,7 @@ mm install-bin-libs programs ; -install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-chart-cmd moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ; +install-headers headers-base : [ path.glob-tree biconcor contrib lm mert misc moses-cmd OnDiskPt phrase-extract symal util : *.hh *.h ] : . ; install-headers headers-moses : moses//headers-to-install : moses ; alias install : prefix-bin prefix-lib headers-base headers-moses ; diff --git a/contrib/other-builds/moses-chart-cmd/.cproject b/contrib/other-builds/moses-chart-cmd/.cproject deleted file mode 100644 index e244f8ac8..000000000 --- a/contrib/other-builds/moses-chart-cmd/.cproject +++ /dev/null @@ -1,169 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/contrib/other-builds/moses-chart-cmd/.project b/contrib/other-builds/moses-chart-cmd/.project deleted file mode 100644 index 16eb28b21..000000000 --- a/contrib/other-builds/moses-chart-cmd/.project +++ /dev/null @@ -1,115 +0,0 @@ - - - moses-chart-cmd - - - lm - moses - OnDiskPt - search - util - - - - org.eclipse.cdt.managedbuilder.core.genmakebuilder - clean,full,incremental, - - - ?name? - - - - org.eclipse.cdt.make.core.append_environment - true - - - org.eclipse.cdt.make.core.autoBuildTarget - all - - - org.eclipse.cdt.make.core.buildArguments - -j3 - - - org.eclipse.cdt.make.core.buildCommand - make - - - org.eclipse.cdt.make.core.buildLocation - ${workspace_loc:/moses-chart-cmd/Debug} - - - org.eclipse.cdt.make.core.cleanBuildTarget - clean - - - org.eclipse.cdt.make.core.contents - org.eclipse.cdt.make.core.activeConfigSettings - - - org.eclipse.cdt.make.core.enableAutoBuild - false - - - org.eclipse.cdt.make.core.enableCleanBuild - true - - - org.eclipse.cdt.make.core.enableFullBuild - true - - - org.eclipse.cdt.make.core.fullBuildTarget - all - - - org.eclipse.cdt.make.core.stopOnError - true - - - org.eclipse.cdt.make.core.useDefaultBuildCmd - true - - - - - org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder - full,incremental, - - - - - - org.eclipse.cdt.core.cnature - org.eclipse.cdt.core.ccnature - org.eclipse.cdt.managedbuilder.core.managedBuildNature - org.eclipse.cdt.managedbuilder.core.ScannerConfigNature - - - - Jamfile - 1 - PARENT-3-PROJECT_LOC/moses-chart-cmd/Jamfile - - - Main.cpp - 1 - PARENT-3-PROJECT_LOC/moses-chart-cmd/Main.cpp - - - Main.h - 1 - PARENT-3-PROJECT_LOC/moses-chart-cmd/Main.h - - - mbr.cpp - 1 - PARENT-3-PROJECT_LOC/moses-chart-cmd/mbr.cpp - - - mbr.h - 1 - PARENT-3-PROJECT_LOC/moses-chart-cmd/mbr.h - - - diff --git a/moses-chart-cmd/Jamfile b/moses-chart-cmd/Jamfile deleted file mode 100644 index fe84871c7..000000000 --- a/moses-chart-cmd/Jamfile +++ /dev/null @@ -1,2 +0,0 @@ -exe moses_chart : Main.cpp mbr.cpp ../moses//moses $(TOP)//boost_iostreams ..//boost_filesystem ..//z ; - diff --git a/moses-chart-cmd/Main.cpp b/moses-chart-cmd/Main.cpp deleted file mode 100644 index 30f847079..000000000 --- a/moses-chart-cmd/Main.cpp +++ /dev/null @@ -1,169 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (c) 2006 University of Edinburgh -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the University of Edinburgh nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -// example file on how to use moses library - -#ifdef WIN32 -// Include Visual Leak Detector -//#include -#endif - -#include -#include -#include "Main.h" -#include "moses/TranslationAnalysis.h" -#include "mbr.h" -#include "moses/IOWrapper.h" - -#include "moses/FactorCollection.h" -#include "moses/HypergraphOutput.h" -#include "moses/Manager.h" -#include "moses/Phrase.h" -#include "moses/Util.h" -#include "moses/Timer.h" -#include "moses/Sentence.h" -#include "moses/ConfusionNet.h" -#include "moses/WordLattice.h" -#include "moses/TreeInput.h" -#include "moses/ThreadPool.h" -#include "moses/ChartManager.h" -#include "moses/ChartHypothesis.h" -#include "moses/FF/StatefulFeatureFunction.h" -#include "moses/FF/StatelessFeatureFunction.h" -#include "moses/TranslationTask.h" - -#include "util/usage.hh" -#include "util/exception.hh" - - - -using namespace std; -using namespace Moses; - -int main(int argc, char* argv[]) -{ - try { - IFVERBOSE(1) { - TRACE_ERR("command: "); - for(int i=0; i > hypergraphOutput; - if (staticData.GetOutputSearchGraphHypergraph()) { - hypergraphOutput.reset(new HypergraphOutput(3)); - } - - if (ioWrapper == NULL) - return EXIT_FAILURE; - -#ifdef WITH_THREADS - ThreadPool pool(staticData.ThreadCount()); -#endif - - // read each sentence & decode - InputType *source=NULL; - size_t lineCount = staticData.GetStartTranslationId(); - while(ioWrapper->ReadInput(*ioWrapper,staticData.GetInputType(),source)) { - source->SetTranslationId(lineCount); - IFVERBOSE(1) - ResetUserTime(); - - FeatureFunction::CallChangeSource(source); - - TranslationTask *task = new TranslationTask(source, *ioWrapper, hypergraphOutput); - source = NULL; // task will delete source -#ifdef WITH_THREADS - pool.Submit(task); // pool will delete task -#else - task->Run(); - delete task; -#endif - ++lineCount; - } - -#ifdef WITH_THREADS - pool.Stop(true); // flush remaining jobs -#endif - - delete ioWrapper; - FeatureFunction::Destroy(); - - IFVERBOSE(1) - PrintUserTime("End."); - - } catch (const std::exception &e) { - std::cerr << "Exception: " << e.what() << std::endl; - return EXIT_FAILURE; - } - - IFVERBOSE(1) util::PrintUsage(std::cerr); - -#ifndef EXIT_RETURN - //This avoids that detructors are called (it can take a long time) - exit(EXIT_SUCCESS); -#else - return EXIT_SUCCESS; -#endif -} - diff --git a/moses-chart-cmd/Main.h b/moses-chart-cmd/Main.h deleted file mode 100644 index 5c660b826..000000000 --- a/moses-chart-cmd/Main.h +++ /dev/null @@ -1,39 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (c) 2006 University of Edinburgh -All rights reserved. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - - * Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - * Neither the name of the University of Edinburgh nor the names of its contributors - may be used to endorse or promote products derived from this software - without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS -BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER -IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -POSSIBILITY OF SUCH DAMAGE. -***********************************************************************/ - -// example file on how to use moses library - -#pragma once - -#include "moses/StaticData.h" - -int main(int argc, char* argv[]); diff --git a/moses-chart-cmd/mbr.cpp b/moses-chart-cmd/mbr.cpp deleted file mode 100644 index 551378054..000000000 --- a/moses-chart-cmd/mbr.cpp +++ /dev/null @@ -1,172 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include "moses/TrellisPathList.h" -#include "moses/TrellisPath.h" -#include "moses/StaticData.h" -#include "moses/Util.h" - -#include "mbr.h" - -using namespace std ; -using namespace Moses; - - -/* Input : - 1. a sorted n-best list, with duplicates filtered out in the following format - 0 ||| amr moussa is currently on a visit to libya , tomorrow , sunday , to hold talks with regard to the in sudan . ||| 0 -4.94418 0 0 -2.16036 0 0 -81.4462 -106.593 -114.43 -105.55 -12.7873 -26.9057 -25.3715 -52.9336 7.99917 -24 ||| -4.58432 - - 2. a weight vector - 3. bleu order ( default = 4) - 4. scaling factor to weigh the weight vector (default = 1.0) - - Output : - translations that minimise the Bayes Risk of the n-best list - - -*/ - -int BLEU_ORDER = 4; -int SMOOTH = 1; -int DEBUG = 0; -float min_interval = 1e-4; -void extract_ngrams(const vector& sentence, map < vector < const Factor* >, int > & allngrams) -{ - vector< const Factor* > ngram; - for (int k = 0; k < BLEU_ORDER; k++) { - for(int i =0; i < max((int)sentence.size()-k,0); i++) { - for ( int j = i; j<= i+k; j++) { - ngram.push_back(sentence[j]); - } - ++allngrams[ngram]; - ngram.clear(); - } - } -} - -float calculate_score(const vector< vector > & sents, int ref, int hyp, vector < map < vector < const Factor *>, int > > & ngram_stats ) -{ - int comps_n = 2*BLEU_ORDER+1; - vector comps(comps_n); - float logbleu = 0.0, brevity; - - int hyp_length = sents[hyp].size(); - - for (int i =0; i ,int > & hyp_ngrams = ngram_stats[hyp] ; - map< vector < const Factor * >, int > & ref_ngrams = ngram_stats[ref] ; - - for (map< vector< const Factor * >, int >::iterator it = hyp_ngrams.begin(); - it != hyp_ngrams.end(); it++) { - map< vector< const Factor * >, int >::iterator ref_it = ref_ngrams.find(it->first); - if(ref_it != ref_ngrams.end()) { - comps[2* (it->first.size()-1)] += min(ref_it->second,it->second); - } - } - comps[comps_n-1] = sents[ref].size(); - - if (DEBUG) { - for ( int i = 0; i < comps_n; i++) - cerr << "Comp " << i << " : " << comps[i]; - } - - for (int i=0; i 0 ) - logbleu += log((float)comps[2*i]+SMOOTH)-log((float)comps[2*i+1]+SMOOTH); - else - logbleu += log((float)comps[2*i])-log((float)comps[2*i+1]); - } - logbleu /= BLEU_ORDER; - brevity = 1.0-(float)comps[comps_n-1]/comps[1]; // comps[comps_n-1] is the ref length, comps[1] is the test length - if (brevity < 0.0) - logbleu += brevity; - return exp(logbleu); -} - -vector doMBR(const TrellisPathList& nBestList) -{ -// cerr << "Sentence " << sent << " has " << sents.size() << " candidate translations" << endl; - float marginal = 0; - - vector joint_prob_vec; - vector< vector > translations; - float joint_prob; - vector< map < vector , int > > ngram_stats; - - TrellisPathList::const_iterator iter; - //TrellisPath* hyp = NULL; - for (iter = nBestList.begin() ; iter != nBestList.end() ; ++iter) { - const TrellisPath &path = **iter; - joint_prob = UntransformScore(StaticData::Instance().GetMBRScale() * path.GetScoreBreakdown().GetWeightedScore()); - marginal += joint_prob; - joint_prob_vec.push_back(joint_prob); - //Cache ngram counts - map < vector < const Factor *>, int > counts; - vector translation; - GetOutputFactors(path, translation); - - //TO DO - extract_ngrams(translation,counts); - ngram_stats.push_back(counts); - translations.push_back(translation); - } - - vector mbr_loss; - float bleu, weightedLoss; - float weightedLossCumul = 0; - float minMBRLoss = 1000000; - int minMBRLossIdx = -1; - - /* Main MBR computation done here */ - for (size_t i = 0; i < nBestList.GetSize(); i++) { - weightedLossCumul = 0; - for (size_t j = 0; j < nBestList.GetSize(); j++) { - if ( i != j) { - bleu = calculate_score(translations, j, i,ngram_stats ); - weightedLoss = ( 1 - bleu) * ( joint_prob_vec[j]/marginal); - weightedLossCumul += weightedLoss; - if (weightedLossCumul > minMBRLoss) - break; - } - } - if (weightedLossCumul < minMBRLoss) { - minMBRLoss = weightedLossCumul; - minMBRLossIdx = i; - } - } - /* Find sentence that minimises Bayes Risk under 1- BLEU loss */ - return translations[minMBRLossIdx]; -} - -void GetOutputFactors(const TrellisPath &path, vector &translation) -{ - const std::vector &edges = path.GetEdges(); - const std::vector& outputFactorOrder = StaticData::Instance().GetOutputFactorOrder(); - assert (outputFactorOrder.size() == 1); - - // print the surface factor of the translation - for (int currEdge = (int)edges.size() - 1 ; currEdge >= 0 ; currEdge--) { - const Hypothesis &edge = *edges[currEdge]; - const Phrase &phrase = edge.GetCurrTargetPhrase(); - size_t size = phrase.GetSize(); - for (size_t pos = 0 ; pos < size ; pos++) { - - const Factor *factor = phrase.GetFactor(pos, outputFactorOrder[0]); - translation.push_back(factor); - } - } -} - diff --git a/moses-chart-cmd/mbr.h b/moses-chart-cmd/mbr.h deleted file mode 100644 index cd40a13b1..000000000 --- a/moses-chart-cmd/mbr.h +++ /dev/null @@ -1,33 +0,0 @@ -// $Id$ - -/*********************************************************************** -Moses - factored phrase-based language decoder -Copyright (C) 2006 University of Edinburgh - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA -***********************************************************************/ - -#pragma once - -#include -#include -#include "moses/TrellisPathList.h" -#include "moses/TrellisPath.h" -#include "moses/Factor.h" - -std::vector doMBR(const Moses::TrellisPathList& nBestList); -void GetOutputFactors(const Moses::TrellisPath &path, std::vector &translation); -float calculate_score(const std::vector< std::vector > & sents, int ref, int hyp, std::vector < std::map < std::vector < const Moses::Factor *>, int > > & ngram_stats ); - diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index f7d9a44da..5758103f3 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -84,8 +84,8 @@ int main(int argc, char** argv) } // set number of significant decimals in output - fix(cout,PRECISION); - fix(cerr,PRECISION); + IOWrapper::FixPrecision(cout); + IOWrapper::FixPrecision(cerr); // load all the settings into the Parameter class // (stores them as strings, or array of strings) @@ -128,9 +128,17 @@ int main(int argc, char** argv) TRACE_ERR(weights); TRACE_ERR("\n"); } + boost::shared_ptr > hypergraphOutput; + boost::shared_ptr > hypergraphOutputChart; + if (staticData.GetOutputSearchGraphHypergraph()) { - hypergraphOutput.reset(new HypergraphOutput(PRECISION)); + if (staticData.IsChart()) { + hypergraphOutputChart.reset(new HypergraphOutput(PRECISION)); + } + else { + hypergraphOutput.reset(new HypergraphOutput(PRECISION)); + } } #ifdef WITH_THREADS @@ -149,10 +157,18 @@ int main(int argc, char** argv) FeatureFunction::CallChangeSource(source); // set up task of translating one sentence - TranslationTask* task = - new TranslationTask(source, *ioWrapper, - staticData.GetOutputSearchGraphSLF(), - hypergraphOutput); + TranslationTask* task; + if (staticData.IsChart()) { + // scfg + task = new TranslationTask(source, *ioWrapper, hypergraphOutputChart); + } + else { + // pb + task = new TranslationTask(source, *ioWrapper, + staticData.GetOutputSearchGraphSLF(), + hypergraphOutput); + } + // execute task #ifdef WITH_THREADS pool.Submit(task); diff --git a/moses-cmd/Main.h b/moses-cmd/Main.h index 362c1f245..49fee0219 100644 --- a/moses-cmd/Main.h +++ b/moses-cmd/Main.h @@ -1,3 +1,4 @@ +#pragma once // $Id$ /*********************************************************************** @@ -32,12 +33,10 @@ POSSIBILITY OF SUCH DAMAGE. // example file on how to use moses library -#ifndef moses_cmd_Main_h -#define moses_cmd_Main_h #include "moses/StaticData.h" class IOWrapper; int main(int argc, char* argv[]); -#endif + diff --git a/regression-testing/Jamfile b/regression-testing/Jamfile index 78349ea2c..b2ba7cce1 100644 --- a/regression-testing/Jamfile +++ b/regression-testing/Jamfile @@ -25,7 +25,7 @@ if $(with-regtest) { $(TOP)/regression-testing/run-single-test.perl --decoder=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<) } reg_test phrase : [ glob $(test-dir)/phrase.* ] : ../moses-cmd//moses : @reg_test_decode ; - reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-chart-cmd//moses_chart : @reg_test_decode ; + reg_test chart : [ glob $(test-dir)/chart.* ] : ../moses-cmd//moses : @reg_test_decode ; actions reg_test_score { $(TOP)/regression-testing/run-test-scorer.perl --scorer=$(>) --test=$(<:B) --data-dir=$(with-regtest) --test-dir=$(test-dir) && touch $(<)