From 8573a66da0363f7b60cba8abdf3fb57f702289e8 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Mon, 4 Mar 2013 09:15:46 -0500 Subject: [PATCH 1/3] Close hypergraph output files. --- moses-cmd/Main.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index 0e48ae64f..f93bc4fce 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -332,6 +332,10 @@ public: m_searchGraphSLFStream->close(); } + if (m_searchGraphHypergraphStream) { + m_searchGraphHypergraphStream->close(); + } + delete m_searchGraphSLFStream; delete m_searchGraphHypergraphStream; delete m_source; From 26bf04df5da3c756a37c93cbb4bcc0647df73aca Mon Sep 17 00:00:00 2001 From: Christian Buck Date: Mon, 4 Mar 2013 15:29:13 +0000 Subject: [PATCH 2/3] added unbuffered mode for casers (using -b) --- scripts/recaser/detruecase.perl | 7 ++++--- scripts/recaser/recase.perl | 6 ++++-- scripts/recaser/truecase.perl | 8 +++++--- 3 files changed, 13 insertions(+), 8 deletions(-) diff --git a/scripts/recaser/detruecase.perl b/scripts/recaser/detruecase.perl index 49c89c299..012c143ac 100755 --- a/scripts/recaser/detruecase.perl +++ b/scripts/recaser/detruecase.perl @@ -6,11 +6,12 @@ use Getopt::Long "GetOptions"; binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); - -my ($SRC,$INFILE); +my ($SRC,$INFILE,$UNBUFFERED); die("detruecase.perl < in > out") unless &GetOptions('headline=s' => \$SRC, - 'in=s' => \$INFILE); + 'in=s' => \$INFILE, + 'b|unbuffered' => \$UNBUFFERED); +if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; } my %SENTENCE_END = ("."=>1,":"=>1,"?"=>1,"!"=>1); my %DELAYED_SENTENCE_START = ("("=>1,"["=>1,"\""=>1,"'"=>1,"""=>1,"'"=>1,"["=>1,"]"=>1); diff --git a/scripts/recaser/recase.perl b/scripts/recaser/recase.perl index c83c30daa..2858cda61 100755 --- a/scripts/recaser/recase.perl +++ b/scripts/recaser/recase.perl @@ -4,7 +4,7 @@ use strict; use Getopt::Long "GetOptions"; -my ($SRC,$INFILE,$RECASE_MODEL); +my ($SRC,$INFILE,$RECASE_MODEL,$UNBUFFERED); my $MOSES = "moses"; my $LANGUAGE = "en"; # English by default; die("recase.perl --in file --model ini-file > out") @@ -12,9 +12,11 @@ die("recase.perl --in file --model ini-file > out") 'headline=s' => \$SRC, 'lang=s' => \$LANGUAGE, 'moses=s' => \$MOSES, - 'model=s' => \$RECASE_MODEL) + 'model=s' => \$RECASE_MODEL, + 'b|unbuffered' => \$UNBUFFERED) && defined($INFILE) && defined($RECASE_MODEL); +if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; } my %treated_languages = map { ($_,1) } qw/en cs/; die "I don't know any rules for $LANGUAGE. Use 'en' as the default." diff --git a/scripts/recaser/truecase.perl b/scripts/recaser/truecase.perl index 0e2df27a2..517f5c7a1 100755 --- a/scripts/recaser/truecase.perl +++ b/scripts/recaser/truecase.perl @@ -8,9 +8,11 @@ binmode(STDIN, ":utf8"); binmode(STDOUT, ":utf8"); # apply switches -my $MODEL; -die("truecase.perl --model truecaser < in > out") - unless &GetOptions('model=s' => \$MODEL); +my ($MODEL, $UNBUFFERED); +die("truecase.perl --model MODEL [-b] < in > out") + unless &GetOptions('model=s' => \$MODEL,'b|unbuffered' => \$UNBUFFERED) + && defined($MODEL); +if (defined($UNBUFFERED) && $UNBUFFERED) { $|=1; } my (%BEST,%KNOWN); open(MODEL,$MODEL) || die("ERROR: could not open '$MODEL'"); From ec69acf3d4a2a0561ae4bed147e12562fd449280 Mon Sep 17 00:00:00 2001 From: Lane Schwartz Date: Mon, 4 Mar 2013 12:07:37 -0500 Subject: [PATCH 3/3] Don't open all hypergraph output files at once. --- moses-cmd/IOWrapper.cpp | 18 ------------- moses-cmd/IOWrapper.h | 2 -- moses-cmd/Main.cpp | 56 +++++++++++++++++++---------------------- 3 files changed, 26 insertions(+), 50 deletions(-) diff --git a/moses-cmd/IOWrapper.cpp b/moses-cmd/IOWrapper.cpp index 6fffead46..f7fed9998 100644 --- a/moses-cmd/IOWrapper.cpp +++ b/moses-cmd/IOWrapper.cpp @@ -189,24 +189,6 @@ InputType*IOWrapper::GetInput(InputType* inputType) } } - ofstream* IOWrapper::GetOutputSearchGraphSLFStream(size_t sentenceNumber) { - const StaticData &staticData = StaticData::Instance(); - stringstream fileName; - fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << sentenceNumber << ".slf"; - std::ofstream *file = new std::ofstream; - file->open(fileName.str().c_str()); - return file; - } - - ofstream* IOWrapper::GetOutputSearchGraphHypergraphStream(size_t sentenceNumber) { - const StaticData &staticData = StaticData::Instance(); - stringstream fileName; - fileName << staticData.GetParam("output-search-graph-hypergraph")[0] << "/" << sentenceNumber; - std::ofstream *file = new std::ofstream; - file->open(fileName.str().c_str()); - return file; - } - ofstream* IOWrapper::GetOutputSearchGraphHypergraphWeightsStream() { const StaticData &staticData = StaticData::Instance(); stringstream fileName; diff --git a/moses-cmd/IOWrapper.h b/moses-cmd/IOWrapper.h index 0376eff6f..5decaa122 100644 --- a/moses-cmd/IOWrapper.h +++ b/moses-cmd/IOWrapper.h @@ -117,8 +117,6 @@ public: return *m_outputSearchGraphStream; } - std::ofstream *GetOutputSearchGraphSLFStream(size_t sentenceNumber); - std::ofstream *GetOutputSearchGraphHypergraphStream(size_t sentenceNumber); std::ofstream *GetOutputSearchGraphHypergraphWeightsStream(); std::ostream &GetDetailedTranslationReportingStream() { diff --git a/moses-cmd/Main.cpp b/moses-cmd/Main.cpp index f93bc4fce..5a33c214c 100644 --- a/moses-cmd/Main.cpp +++ b/moses-cmd/Main.cpp @@ -84,8 +84,8 @@ public: OutputCollector* detailedTranslationCollector, OutputCollector* alignmentInfoCollector, OutputCollector* unknownsCollector, - std::ofstream* searchGraphSLFStream, - std::ofstream* searchGraphHypergraphStream) : + bool outputSearchGraphSLF, + bool outputSearchGraphHypergraph) : m_source(source), m_lineNumber(lineNumber), m_outputCollector(outputCollector), m_nbestCollector(nbestCollector), m_latticeSamplesCollector(latticeSamplesCollector), @@ -93,8 +93,8 @@ public: m_detailedTranslationCollector(detailedTranslationCollector), m_alignmentInfoCollector(alignmentInfoCollector), m_unknownsCollector(unknownsCollector), - m_searchGraphSLFStream(searchGraphSLFStream), - m_searchGraphHypergraphStream(searchGraphHypergraphStream) {} + m_outputSearchGraphSLF(outputSearchGraphSLF), + m_outputSearchGraphHypergraph(outputSearchGraphHypergraph) {} /** Translate one sentence * gets called by main function implemented at end of this source file */ @@ -148,29 +148,39 @@ public: } // Output search graph in HTK standard lattice format (SLF) - if (m_searchGraphSLFStream) { - if (m_searchGraphSLFStream->is_open() && m_searchGraphSLFStream->good()) { + if (m_outputSearchGraphSLF) { + stringstream fileName; + fileName << staticData.GetParam("output-search-graph-slf")[0] << "/" << m_lineNumber << ".slf"; + std::ofstream *file = new std::ofstream; + file->open(fileName.str().c_str()); + if (file->is_open() && file->good()) { ostringstream out; fix(out,PRECISION); manager.OutputSearchGraphAsSLF(m_lineNumber, out); - *m_searchGraphSLFStream << out.str(); - m_searchGraphSLFStream -> flush(); + *file << out.str(); + file -> flush(); } else { TRACE_ERR("Cannot output HTK standard lattice for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl); } } // Output search graph in hypergraph format for Kenneth Heafield's lazy hypergraph decoder - if (m_searchGraphHypergraphStream) { - if (m_searchGraphHypergraphStream->is_open() && m_searchGraphHypergraphStream->good()) { + if (m_outputSearchGraphHypergraph) { + stringstream fileName; + fileName << staticData.GetParam("output-search-graph-hypergraph")[0] << "/" << m_lineNumber; + std::ofstream *file = new std::ofstream; + file->open(fileName.str().c_str()); + if (file->is_open() && file->good()) { ostringstream out; fix(out,PRECISION); manager.OutputSearchGraphAsHypergraph(m_lineNumber, out); - *m_searchGraphHypergraphStream << out.str(); - m_searchGraphHypergraphStream -> flush(); + *file << out.str(); + file -> flush(); } else { TRACE_ERR("Cannot output hypergraph for line " << m_lineNumber << " because the output file is not open or not ready for writing" << std::endl); } + file -> close(); + delete file; } // apply decision rule and output best translation(s) @@ -327,19 +337,7 @@ public: } ~TranslationTask() { - - if (m_searchGraphSLFStream) { - m_searchGraphSLFStream->close(); - } - - if (m_searchGraphHypergraphStream) { - m_searchGraphHypergraphStream->close(); - } - - delete m_searchGraphSLFStream; - delete m_searchGraphHypergraphStream; delete m_source; - } private: @@ -353,8 +351,8 @@ private: OutputCollector* m_detailedTranslationCollector; OutputCollector* m_alignmentInfoCollector; OutputCollector* m_unknownsCollector; - std::ofstream *m_searchGraphSLFStream; - std::ofstream *m_searchGraphHypergraphStream; + bool m_outputSearchGraphSLF; + bool m_outputSearchGraphHypergraph; std::ofstream *m_alignmentStream; @@ -643,10 +641,8 @@ int main(int argc, char** argv) detailedTranslationCollector.get(), alignmentInfoCollector.get(), unknownsCollector.get(), - staticData.GetOutputSearchGraphSLF() ? - ioWrapper->GetOutputSearchGraphSLFStream(lineCount) : NULL, - staticData.GetOutputSearchGraphHypergraph() ? - ioWrapper->GetOutputSearchGraphHypergraphStream(lineCount) : NULL); + staticData.GetOutputSearchGraphSLF(), + staticData.GetOutputSearchGraphHypergraph()); // execute task #ifdef WITH_THREADS pool.Submit(task);