mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-08-16 06:50:32 +03:00
prune multiple files at once. Make up for failure in ems to give the full path of the gen table
This commit is contained in:
parent
c80df1212e
commit
930dce10bf
@ -775,6 +775,8 @@
|
||||
<File Name="../../../moses/WordsRange.h"/>
|
||||
<File Name="../../../moses/XmlOption.cpp"/>
|
||||
<File Name="../../../moses/XmlOption.h"/>
|
||||
<File Name="../../../moses/OutputFileStream.cpp"/>
|
||||
<File Name="../../../moses/OutputFileStream.h"/>
|
||||
</VirtualDirectory>
|
||||
<VirtualDirectory Name="PP">
|
||||
<File Name="../../../moses/PP/CountsPhraseProperty.cpp"/>
|
||||
|
@ -1,5 +1,22 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<CodeLite_Project Name="pruneGeneration" InternalType="Console">
|
||||
<Plugins>
|
||||
<Plugin Name="CMakePlugin">
|
||||
<![CDATA[[{
|
||||
"name": "Debug",
|
||||
"enabled": false,
|
||||
"buildDirectory": "build",
|
||||
"sourceDirectory": "$(ProjectPath)",
|
||||
"generator": "",
|
||||
"buildType": "",
|
||||
"arguments": [],
|
||||
"parentProject": ""
|
||||
}]]]>
|
||||
</Plugin>
|
||||
<Plugin Name="qmake">
|
||||
<![CDATA[00010001N0005Debug000000000000]]>
|
||||
</Plugin>
|
||||
</Plugins>
|
||||
<Description/>
|
||||
<Dependencies/>
|
||||
<VirtualDirectory Name="src">
|
||||
@ -20,8 +37,16 @@
|
||||
<Compiler Options="-g;-O0;-Wall" C_Options="-g;-O0;-Wall" Assembler="" Required="yes" PreCompiledHeader="" PCHInCommandLine="no" PCHFlags="" PCHFlagsPolicy="0">
|
||||
<IncludePath Value="."/>
|
||||
<IncludePath Value="../../.."/>
|
||||
<IncludePath Value="../../../boost/include"/>
|
||||
</Compiler>
|
||||
<Linker Options="" Required="yes"/>
|
||||
<Linker Options="" Required="yes">
|
||||
<LibraryPath Value="../../../boost/lib64"/>
|
||||
<LibraryPath Value="../../../contrib/other-builds/moses/Debug"/>
|
||||
<Library Value="boost_filesystem"/>
|
||||
<Library Value="boost_system"/>
|
||||
<Library Value="moses"/>
|
||||
<Library Value="z"/>
|
||||
</Linker>
|
||||
<ResourceCompiler Options="" Required="no"/>
|
||||
<General OutputFile="$(IntermediateDirectory)/$(ProjectName)" IntermediateDirectory="./Debug" Command="./$(ProjectName)" CommandArguments="" UseSeparateDebugArgs="no" DebugArguments="" WorkingDirectory="$(IntermediateDirectory)" PauseExecWhenProcTerminates="yes" IsGUIProgram="no" IsEnabled="yes"/>
|
||||
<Environment EnvVarSetName="<Use Defaults>" DbgSetName="<Use Defaults>">
|
||||
|
@ -3,8 +3,10 @@
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <boost/filesystem.hpp>
|
||||
#include "pruneGeneration.h"
|
||||
#include "moses/InputFileStream.h"
|
||||
#include "moses/OutputFileStream.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
@ -12,8 +14,46 @@ int main(int argc, char **argv)
|
||||
{
|
||||
cerr << "Starting" << endl;
|
||||
int limit = atoi(argv[1]);
|
||||
string inPathStem = argv[2];
|
||||
string outPathStem = argv[3];
|
||||
|
||||
Process(limit, cin, cout);
|
||||
namespace fs = boost::filesystem;
|
||||
|
||||
//cerr << "inPathStem=" << inPathStem << endl;
|
||||
fs::path p(inPathStem);
|
||||
fs::path dir = p.parent_path();
|
||||
//cerr << "dir=" << dir << endl;
|
||||
|
||||
fs::path fileStem = p.filename();
|
||||
string fileStemStr = fileStem.native();
|
||||
size_t fileStemStrSize = fileStemStr.size();
|
||||
//cerr << "fileStem=" << fileStemStr << endl;
|
||||
|
||||
// loop thru each file in directory
|
||||
fs::directory_iterator end_iter;
|
||||
for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
|
||||
if (fs::is_regular_file(dir_iter->status())) {
|
||||
fs::path currPath = *dir_iter;
|
||||
string currPathStr = currPath.native();
|
||||
//cerr << "currPathStr=" << currPathStr << endl;
|
||||
|
||||
fs::path currFile = currPath.filename();
|
||||
string currFileStr = currFile.native();
|
||||
|
||||
if (currFileStr.find(fileStemStr) == 0) {
|
||||
// found gen table we need
|
||||
//cerr << "found=" << currPathStr << endl;
|
||||
string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
|
||||
string outPath = outPathStem + suffix;
|
||||
cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;
|
||||
|
||||
Moses::InputFileStream inStrme(currPathStr);
|
||||
Moses::OutputFileStream outStrme(outPath);
|
||||
Process(limit, inStrme, outStrme);
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cerr << "Finished" << endl;
|
||||
}
|
||||
@ -52,7 +92,7 @@ void Output(ostream &outStrme, vector<Rec> &records, int limit)
|
||||
|
||||
for (size_t i = 0; i < limit && i < records.size(); ++i) {
|
||||
const Rec &rec = records[i];
|
||||
cout << rec.line << endl;
|
||||
outStrme << rec.line << endl;
|
||||
}
|
||||
}
|
||||
|
||||
|
90
moses/OutputFileStream.cpp
Normal file
90
moses/OutputFileStream.cpp
Normal file
@ -0,0 +1,90 @@
|
||||
// $Id: OutputFileStream.cpp 2780 2010-01-29 17:11:17Z bojar $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <boost/algorithm/string/predicate.hpp>
|
||||
#include <boost/iostreams/filter/gzip.hpp>
|
||||
#include "OutputFileStream.h"
|
||||
#include "gzfilebuf.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::algorithm;
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
OutputFileStream::OutputFileStream()
|
||||
:boost::iostreams::filtering_ostream()
|
||||
,m_outFile(NULL)
|
||||
,m_open(false)
|
||||
{
|
||||
}
|
||||
|
||||
OutputFileStream::OutputFileStream(const std::string &filePath)
|
||||
:m_outFile(NULL)
|
||||
,m_open(false)
|
||||
{
|
||||
Open(filePath);
|
||||
}
|
||||
|
||||
OutputFileStream::~OutputFileStream()
|
||||
{
|
||||
Close();
|
||||
}
|
||||
|
||||
bool OutputFileStream::Open(const std::string &filePath)
|
||||
{
|
||||
assert(!m_open);
|
||||
if (filePath == std::string("-")) {
|
||||
// Write to standard output. Leave m_outFile null.
|
||||
this->push(std::cout);
|
||||
} else {
|
||||
m_outFile = new ofstream(filePath.c_str(), ios_base::out | ios_base::binary);
|
||||
if (m_outFile->fail()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (ends_with(filePath, ".gz")) {
|
||||
this->push(boost::iostreams::gzip_compressor());
|
||||
}
|
||||
this->push(*m_outFile);
|
||||
}
|
||||
|
||||
m_open = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
void OutputFileStream::Close()
|
||||
{
|
||||
if (!m_open) return;
|
||||
this->flush();
|
||||
if (m_outFile) {
|
||||
this->pop(); // file
|
||||
|
||||
m_outFile->close();
|
||||
delete m_outFile;
|
||||
m_outFile = NULL;
|
||||
}
|
||||
m_open = false;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
81
moses/OutputFileStream.h
Normal file
81
moses/OutputFileStream.h
Normal file
@ -0,0 +1,81 @@
|
||||
// $Id: InputFileStream.h 2939 2010-02-24 11:15:44Z jfouet $
|
||||
|
||||
/***********************************************************************
|
||||
Moses - factored phrase-based language decoder
|
||||
Copyright (C) 2006 University of Edinburgh
|
||||
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License as published by the Free Software Foundation; either
|
||||
version 2.1 of the License, or (at your option) any later version.
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; if not, write to the Free Software
|
||||
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
***********************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdlib>
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <boost/iostreams/filtering_stream.hpp>
|
||||
|
||||
namespace Moses
|
||||
{
|
||||
|
||||
/** Version of std::ostream with transparent compression.
|
||||
*
|
||||
* Transparently compresses output when writing to a file whose name ends in
|
||||
* ".gz". Or, writes to stdout instead of a file when given a filename
|
||||
* consisting of just a dash ("-").
|
||||
*/
|
||||
class OutputFileStream : public boost::iostreams::filtering_ostream
|
||||
{
|
||||
private:
|
||||
/** File that needs flushing & closing when we close this stream.
|
||||
*
|
||||
* Is NULL when no file is opened, e.g. when writing to standard output.
|
||||
*/
|
||||
std::ofstream *m_outFile;
|
||||
|
||||
/// Is this stream open?
|
||||
bool m_open;
|
||||
|
||||
public:
|
||||
/** Create an unopened OutputFileStream.
|
||||
*
|
||||
* Until it's been opened, nothing can be done with this stream.
|
||||
*/
|
||||
OutputFileStream();
|
||||
|
||||
/// Create an OutputFileStream, and open it by calling Open().
|
||||
OutputFileStream(const std::string &filePath);
|
||||
virtual ~OutputFileStream();
|
||||
|
||||
// TODO: Can we please just always throw an exception when this fails?
|
||||
/** Open stream.
|
||||
*
|
||||
* If filePath is "-" (just a dash), this opens the stream for writing to
|
||||
* standard output. Otherwise, it opens the given file. If the filename
|
||||
* has the ".gz" suffix, output will be transparently compressed.
|
||||
*
|
||||
* Call Close() to close the file.
|
||||
*
|
||||
* Returns whether opening the file was successful. It may also throw an
|
||||
* exception on failure.
|
||||
*/
|
||||
bool Open(const std::string &filePath);
|
||||
|
||||
/// Flush and close stream. After this, the stream can be opened again.
|
||||
void Close();
|
||||
};
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user