diff --git a/contrib/other-builds/moses/moses.project b/contrib/other-builds/moses/moses.project index 0fbd942c6..0ceb40723 100644 --- a/contrib/other-builds/moses/moses.project +++ b/contrib/other-builds/moses/moses.project @@ -775,6 +775,8 @@ + + diff --git a/contrib/other-builds/pruneGeneration/pruneGeneration.project b/contrib/other-builds/pruneGeneration/pruneGeneration.project index 39109197a..6f8a6adf5 100644 --- a/contrib/other-builds/pruneGeneration/pruneGeneration.project +++ b/contrib/other-builds/pruneGeneration/pruneGeneration.project @@ -1,5 +1,22 @@ + + + + + + + + @@ -20,8 +37,16 @@ + - + + + + + + + + diff --git a/misc/pruneGeneration.cpp b/misc/pruneGeneration.cpp index 98b21530c..e436263e9 100644 --- a/misc/pruneGeneration.cpp +++ b/misc/pruneGeneration.cpp @@ -3,8 +3,10 @@ #include #include #include +#include #include "pruneGeneration.h" #include "moses/InputFileStream.h" +#include "moses/OutputFileStream.h" using namespace std; @@ -12,8 +14,46 @@ int main(int argc, char **argv) { cerr << "Starting" << endl; int limit = atoi(argv[1]); + string inPathStem = argv[2]; + string outPathStem = argv[3]; - Process(limit, cin, cout); + namespace fs = boost::filesystem; + + //cerr << "inPathStem=" << inPathStem << endl; + fs::path p(inPathStem); + fs::path dir = p.parent_path(); + //cerr << "dir=" << dir << endl; + + fs::path fileStem = p.filename(); + string fileStemStr = fileStem.native(); + size_t fileStemStrSize = fileStemStr.size(); + //cerr << "fileStem=" << fileStemStr << endl; + + // loop thru each file in directory + fs::directory_iterator end_iter; + for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) { + if (fs::is_regular_file(dir_iter->status())) { + fs::path currPath = *dir_iter; + string currPathStr = currPath.native(); + //cerr << "currPathStr=" << currPathStr << endl; + + fs::path currFile = currPath.filename(); + string currFileStr = currFile.native(); + + if (currFileStr.find(fileStemStr) == 0) { + // found gen table we need + //cerr << "found=" << currPathStr << endl; + string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize); + string outPath = outPathStem + suffix; + cerr << "PRUNING " << currPathStr << " TO " << outPath << endl; + + Moses::InputFileStream inStrme(currPathStr); + Moses::OutputFileStream outStrme(outPath); + Process(limit, inStrme, outStrme); + + } + } + } cerr << "Finished" << endl; } @@ -52,7 +92,7 @@ void Output(ostream &outStrme, vector &records, int limit) for (size_t i = 0; i < limit && i < records.size(); ++i) { const Rec &rec = records[i]; - cout << rec.line << endl; + outStrme << rec.line << endl; } } diff --git a/moses/OutputFileStream.cpp b/moses/OutputFileStream.cpp new file mode 100644 index 000000000..d7874b06f --- /dev/null +++ b/moses/OutputFileStream.cpp @@ -0,0 +1,90 @@ +// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $ + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#include +#include +#include +#include "OutputFileStream.h" +#include "gzfilebuf.h" + +using namespace std; +using namespace boost::algorithm; + +namespace Moses +{ +OutputFileStream::OutputFileStream() + :boost::iostreams::filtering_ostream() + ,m_outFile(NULL) + ,m_open(false) +{ +} + +OutputFileStream::OutputFileStream(const std::string &filePath) + :m_outFile(NULL) + ,m_open(false) +{ + Open(filePath); +} + +OutputFileStream::~OutputFileStream() +{ + Close(); +} + +bool OutputFileStream::Open(const std::string &filePath) +{ + assert(!m_open); + if (filePath == std::string("-")) { + // Write to standard output. Leave m_outFile null. + this->push(std::cout); + } else { + m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary); + if (m_outFile->fail()) { + return false; + } + + if (ends_with(filePath, ".gz")) { + this->push(boost::iostreams::gzip_compressor()); + } + this->push(*m_outFile); + } + + m_open = true; + return true; +} + +void OutputFileStream::Close() +{ + if (!m_open) return; + this->flush(); + if (m_outFile) { + this->pop(); // file + + m_outFile->close(); + delete m_outFile; + m_outFile = NULL; + } + m_open = false; +} + + +} + diff --git a/moses/OutputFileStream.h b/moses/OutputFileStream.h new file mode 100644 index 000000000..b77741a73 --- /dev/null +++ b/moses/OutputFileStream.h @@ -0,0 +1,81 @@ +// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $ + +/*********************************************************************** + Moses - factored phrase-based language decoder + Copyright (C) 2006 University of Edinburgh + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + ***********************************************************************/ + +#pragma once + +#include +#include +#include +#include +#include + +namespace Moses +{ + +/** Version of std::ostream with transparent compression. + * + * Transparently compresses output when writing to a file whose name ends in + * ".gz". Or, writes to stdout instead of a file when given a filename + * consisting of just a dash ("-"). + */ +class OutputFileStream : public boost::iostreams::filtering_ostream +{ +private: + /** File that needs flushing & closing when we close this stream. + * + * Is NULL when no file is opened, e.g. when writing to standard output. + */ + std::ofstream *m_outFile; + + /// Is this stream open? + bool m_open; + +public: + /** Create an unopened OutputFileStream. + * + * Until it's been opened, nothing can be done with this stream. + */ + OutputFileStream(); + + /// Create an OutputFileStream, and open it by calling Open(). + OutputFileStream(const std::string &filePath); + virtual ~OutputFileStream(); + + // TODO: Can we please just always throw an exception when this fails? + /** Open stream. + * + * If filePath is "-" (just a dash), this opens the stream for writing to + * standard output. Otherwise, it opens the given file. If the filename + * has the ".gz" suffix, output will be transparently compressed. + * + * Call Close() to close the file. + * + * Returns whether opening the file was successful. It may also throw an + * exception on failure. + */ + bool Open(const std::string &filePath); + + /// Flush and close stream. After this, the stream can be opened again. + void Close(); +}; + +} +