prune multiple files at once. Make up for failure in ems to give the full path of the gen table

This commit is contained in:
Hieu Hoang 2015-06-25 13:02:29 +04:00
parent c80df1212e
commit 930dce10bf
5 changed files with 241 additions and 3 deletions

View File

@ -775,6 +775,8 @@
<File Name="../../../moses/WordsRange.h"/>
<File Name="../../../moses/XmlOption.cpp"/>
<File Name="../../../moses/XmlOption.h"/>
<File Name="../../../moses/OutputFileStream.cpp"/>
<File Name="../../../moses/OutputFileStream.h"/>
</VirtualDirectory>
<VirtualDirectory Name="PP">
<File Name="../../../moses/PP/CountsPhraseProperty.cpp"/>

View File

@ -1,5 +1,22 @@
<?xml version="1.0" encoding="UTF-8"?>
<CodeLite_Project Name="pruneGeneration" InternalType="Console">
<Plugins>
<Plugin Name="CMakePlugin">
<![CDATA[[{
"name": "Debug",
"enabled": false,
"buildDirectory": "build",
"sourceDirectory": "$(ProjectPath)",
"generator": "",
"buildType": "",
"arguments": [],
"parentProject": ""
}]]]>
</Plugin>
<Plugin Name="qmake">
<![CDATA[00010001N0005Debug000000000000]]>
</Plugin>
</Plugins>
<Description/>
<Dependencies/>
<VirtualDirectory Name="src">
@ -20,8 +37,16 @@
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
<IncludePath Value="."/>
<IncludePath Value="../../.."/>
<IncludePath Value="../../../boost/include"/>
</Compiler>
<Linker Options="" Required="yes"/>
<Linker Options="" Required="yes">
<LibraryPath Value="../../../boost/lib64"/>
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
<Library Value="boost_filesystem"/>
<Library Value="boost_system"/>
<Library Value="moses"/>
<Library Value="z"/>
</Linker>
<ResourceCompiler Options="" Required="no"/>
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
<Environment EnvVarSetName="&lt;Use Defaults&gt;" DbgSetName="&lt;Use Defaults&gt;">

View File

@ -3,8 +3,10 @@
#include <cassert>
#include <algorithm>
#include <functional>
#include <boost/filesystem.hpp>
#include "pruneGeneration.h"
#include "moses/InputFileStream.h"
#include "moses/OutputFileStream.h"
using namespace std;
@ -12,8 +14,46 @@ int main(int argc, char **argv)
{
cerr << "Starting" << endl;
int limit = atoi(argv[1]);
string inPathStem = argv[2];
string outPathStem = argv[3];
Process(limit, cin, cout);
namespace fs = boost::filesystem;
//cerr << "inPathStem=" << inPathStem << endl;
fs::path p(inPathStem);
fs::path dir = p.parent_path();
//cerr << "dir=" << dir << endl;
fs::path fileStem = p.filename();
string fileStemStr = fileStem.native();
size_t fileStemStrSize = fileStemStr.size();
//cerr << "fileStem=" << fileStemStr << endl;
// loop thru each file in directory
fs::directory_iterator end_iter;
for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
if (fs::is_regular_file(dir_iter->status())) {
fs::path currPath = *dir_iter;
string currPathStr = currPath.native();
//cerr << "currPathStr=" << currPathStr << endl;
fs::path currFile = currPath.filename();
string currFileStr = currFile.native();
if (currFileStr.find(fileStemStr) == 0) {
// found gen table we need
//cerr << "found=" << currPathStr << endl;
string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
string outPath = outPathStem + suffix;
cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
Moses::InputFileStream inStrme(currPathStr);
Moses::OutputFileStream outStrme(outPath);
Process(limit, inStrme, outStrme);
}
}
}
cerr << "Finished" << endl;
}
@ -52,7 +92,7 @@ void Output(ostream &outStrme, vector<Rec> &records, int limit)
for (size_t i = 0; i < limit && i < records.size(); ++i) {
const Rec &rec = records[i];
cout << rec.line << endl;
outStrme << rec.line << endl;
}
}

View File

@ -0,0 +1,90 @@
// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#include <iostream>
#include <boost/algorithm/string/predicate.hpp>
#include <boost/iostreams/filter/gzip.hpp>
#include "OutputFileStream.h"
#include "gzfilebuf.h"
using namespace std;
using namespace boost::algorithm;
namespace Moses
{
OutputFileStream::OutputFileStream()
:boost::iostreams::filtering_ostream()
,m_outFile(NULL)
,m_open(false)
{
}
OutputFileStream::OutputFileStream(const std::string &filePath)
:m_outFile(NULL)
,m_open(false)
{
Open(filePath);
}
OutputFileStream::~OutputFileStream()
{
Close();
}
bool OutputFileStream::Open(const std::string &filePath)
{
assert(!m_open);
if (filePath == std::string("-")) {
// Write to standard output. Leave m_outFile null.
this->push(std::cout);
} else {
m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
if (m_outFile->fail()) {
return false;
}
if (ends_with(filePath, ".gz")) {
this->push(boost::iostreams::gzip_compressor());
}
this->push(*m_outFile);
}
m_open = true;
return true;
}
void OutputFileStream::Close()
{
if (!m_open) return;
this->flush();
if (m_outFile) {
this->pop(); // file
m_outFile->close();
delete m_outFile;
m_outFile = NULL;
}
m_open = false;
}
}

81
moses/OutputFileStream.h Normal file
View File

@ -0,0 +1,81 @@
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
***********************************************************************/
#pragma once
#include <cstdlib>
#include <fstream>
#include <string>
#include <iostream>
#include <boost/iostreams/filtering_stream.hpp>
namespace Moses
{
/** Version of std::ostream with transparent compression.
*
* Transparently compresses output when writing to a file whose name ends in
* ".gz". Or, writes to stdout instead of a file when given a filename
* consisting of just a dash ("-").
*/
class OutputFileStream : public boost::iostreams::filtering_ostream
{
private:
/** File that needs flushing & closing when we close this stream.
*
* Is NULL when no file is opened, e.g. when writing to standard output.
*/
std::ofstream *m_outFile;
/// Is this stream open?
bool m_open;
public:
/** Create an unopened OutputFileStream.
*
* Until it's been opened, nothing can be done with this stream.
*/
OutputFileStream();
/// Create an OutputFileStream, and open it by calling Open().
OutputFileStream(const std::string &filePath);
virtual ~OutputFileStream();
// TODO: Can we please just always throw an exception when this fails?
/** Open stream.
*
* If filePath is "-" (just a dash), this opens the stream for writing to
* standard output. Otherwise, it opens the given file. If the filename
* has the ".gz" suffix, output will be transparently compressed.
*
* Call Close() to close the file.
*
* Returns whether opening the file was successful. It may also throw an
* exception on failure.
*/
bool Open(const std::string &filePath);
/// Flush and close stream. After this, the stream can be opened again.
void Close();
};
}