diff --git a/contrib/other-builds/moses/moses.project b/contrib/other-builds/moses/moses.project
index 0fbd942c6..0ceb40723 100644
--- a/contrib/other-builds/moses/moses.project
+++ b/contrib/other-builds/moses/moses.project
@@ -775,6 +775,8 @@
+
+
diff --git a/contrib/other-builds/pruneGeneration/pruneGeneration.project b/contrib/other-builds/pruneGeneration/pruneGeneration.project
index 39109197a..6f8a6adf5 100644
--- a/contrib/other-builds/pruneGeneration/pruneGeneration.project
+++ b/contrib/other-builds/pruneGeneration/pruneGeneration.project
@@ -1,5 +1,22 @@
+
+
+
+
+
+
+
+
@@ -20,8 +37,16 @@
+
-
+
+
+
+
+
+
+
+
diff --git a/misc/pruneGeneration.cpp b/misc/pruneGeneration.cpp
index 98b21530c..e436263e9 100644
--- a/misc/pruneGeneration.cpp
+++ b/misc/pruneGeneration.cpp
@@ -3,8 +3,10 @@
#include
#include
#include
+#include
#include "pruneGeneration.h"
#include "moses/InputFileStream.h"
+#include "moses/OutputFileStream.h"
using namespace std;
@@ -12,8 +14,46 @@ int main(int argc, char **argv)
{
cerr << "Starting" << endl;
int limit = atoi(argv[1]);
+ string inPathStem = argv[2];
+ string outPathStem = argv[3];
- Process(limit, cin, cout);
+ namespace fs = boost::filesystem;
+
+ //cerr << "inPathStem=" << inPathStem << endl;
+ fs::path p(inPathStem);
+ fs::path dir = p.parent_path();
+ //cerr << "dir=" << dir << endl;
+
+ fs::path fileStem = p.filename();
+ string fileStemStr = fileStem.native();
+ size_t fileStemStrSize = fileStemStr.size();
+ //cerr << "fileStem=" << fileStemStr << endl;
+
+ // loop thru each file in directory
+ fs::directory_iterator end_iter;
+ for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
+ if (fs::is_regular_file(dir_iter->status())) {
+ fs::path currPath = *dir_iter;
+ string currPathStr = currPath.native();
+ //cerr << "currPathStr=" << currPathStr << endl;
+
+ fs::path currFile = currPath.filename();
+ string currFileStr = currFile.native();
+
+ if (currFileStr.find(fileStemStr) == 0) {
+ // found gen table we need
+ //cerr << "found=" << currPathStr << endl;
+ string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
+ string outPath = outPathStem + suffix;
+ cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
+
+ Moses::InputFileStream inStrme(currPathStr);
+ Moses::OutputFileStream outStrme(outPath);
+ Process(limit, inStrme, outStrme);
+
+ }
+ }
+ }
cerr << "Finished" << endl;
}
@@ -52,7 +92,7 @@ void Output(ostream &outStrme, vector &records, int limit)
for (size_t i = 0; i < limit && i < records.size(); ++i) {
const Rec &rec = records[i];
- cout << rec.line << endl;
+ outStrme << rec.line << endl;
}
}
diff --git a/moses/OutputFileStream.cpp b/moses/OutputFileStream.cpp
new file mode 100644
index 000000000..d7874b06f
--- /dev/null
+++ b/moses/OutputFileStream.cpp
@@ -0,0 +1,90 @@
+// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#include
+#include
+#include
+#include "OutputFileStream.h"
+#include "gzfilebuf.h"
+
+using namespace std;
+using namespace boost::algorithm;
+
+namespace Moses
+{
+OutputFileStream::OutputFileStream()
+ :boost::iostreams::filtering_ostream()
+ ,m_outFile(NULL)
+ ,m_open(false)
+{
+}
+
+OutputFileStream::OutputFileStream(const std::string &filePath)
+ :m_outFile(NULL)
+ ,m_open(false)
+{
+ Open(filePath);
+}
+
+OutputFileStream::~OutputFileStream()
+{
+ Close();
+}
+
+bool OutputFileStream::Open(const std::string &filePath)
+{
+ assert(!m_open);
+ if (filePath == std::string("-")) {
+ // Write to standard output. Leave m_outFile null.
+ this->push(std::cout);
+ } else {
+ m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
+ if (m_outFile->fail()) {
+ return false;
+ }
+
+ if (ends_with(filePath, ".gz")) {
+ this->push(boost::iostreams::gzip_compressor());
+ }
+ this->push(*m_outFile);
+ }
+
+ m_open = true;
+ return true;
+}
+
+void OutputFileStream::Close()
+{
+ if (!m_open) return;
+ this->flush();
+ if (m_outFile) {
+ this->pop(); // file
+
+ m_outFile->close();
+ delete m_outFile;
+ m_outFile = NULL;
+ }
+ m_open = false;
+}
+
+
+}
+
diff --git a/moses/OutputFileStream.h b/moses/OutputFileStream.h
new file mode 100644
index 000000000..b77741a73
--- /dev/null
+++ b/moses/OutputFileStream.h
@@ -0,0 +1,81 @@
+// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
+
+/***********************************************************************
+ Moses - factored phrase-based language decoder
+ Copyright (C) 2006 University of Edinburgh
+
+ This library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ This library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with this library; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ ***********************************************************************/
+
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+
+namespace Moses
+{
+
+/** Version of std::ostream with transparent compression.
+ *
+ * Transparently compresses output when writing to a file whose name ends in
+ * ".gz". Or, writes to stdout instead of a file when given a filename
+ * consisting of just a dash ("-").
+ */
+class OutputFileStream : public boost::iostreams::filtering_ostream
+{
+private:
+ /** File that needs flushing & closing when we close this stream.
+ *
+ * Is NULL when no file is opened, e.g. when writing to standard output.
+ */
+ std::ofstream *m_outFile;
+
+ /// Is this stream open?
+ bool m_open;
+
+public:
+ /** Create an unopened OutputFileStream.
+ *
+ * Until it's been opened, nothing can be done with this stream.
+ */
+ OutputFileStream();
+
+ /// Create an OutputFileStream, and open it by calling Open().
+ OutputFileStream(const std::string &filePath);
+ virtual ~OutputFileStream();
+
+ // TODO: Can we please just always throw an exception when this fails?
+ /** Open stream.
+ *
+ * If filePath is "-" (just a dash), this opens the stream for writing to
+ * standard output. Otherwise, it opens the given file. If the filename
+ * has the ".gz" suffix, output will be transparently compressed.
+ *
+ * Call Close() to close the file.
+ *
+ * Returns whether opening the file was successful. It may also throw an
+ * exception on failure.
+ */
+ bool Open(const std::string &filePath);
+
+ /// Flush and close stream. After this, the stream can be opened again.
+ void Close();
+};
+
+}
+