2013-09-10 15:06:29 +04:00
|
|
|
// This program takes gzipped sorted files and merges them in sorted order
|
|
|
|
// to stdout. Written by Ulrich Germann
|
|
|
|
#include <boost/iostreams/filtering_stream.hpp>
|
|
|
|
#include <boost/shared_ptr.hpp>
|
|
|
|
#include <algorithm>
|
|
|
|
#include <string>
|
|
|
|
#include <vector>
|
2014-05-18 18:44:04 +04:00
|
|
|
#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"
|
2013-09-10 15:06:29 +04:00
|
|
|
using namespace std;
|
|
|
|
using namespace ugdiss;
|
|
|
|
using namespace boost::iostreams;
|
|
|
|
|
|
|
|
typedef boost::shared_ptr<filtering_istream> fptr;
|
|
|
|
|
|
|
|
class Part
|
|
|
|
{
|
|
|
|
string fname;
|
|
|
|
fptr f;
|
|
|
|
string my_lines[2];
|
|
|
|
size_t ctr;
|
|
|
|
public:
|
2015-05-15 20:09:38 +03:00
|
|
|
string const& line() const {
|
2013-09-10 15:06:29 +04:00
|
|
|
static string empty_line;
|
2015-04-30 08:05:11 +03:00
|
|
|
return f ? my_lines[ctr%2] : empty_line;
|
2013-09-10 15:06:29 +04:00
|
|
|
}
|
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
Part(string _fname) : ctr(0) {
|
2013-09-10 15:06:29 +04:00
|
|
|
fname = _fname;
|
|
|
|
f.reset(open_input_stream(fname));
|
|
|
|
if (!getline(*f, my_lines[0])) f.reset();
|
|
|
|
}
|
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
bool next() {
|
2013-09-10 15:06:29 +04:00
|
|
|
if (!f) return false;
|
2015-05-15 20:09:38 +03:00
|
|
|
if (!getline(*f, my_lines[++ctr%2])) {
|
|
|
|
f.reset();
|
|
|
|
--ctr;
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
assert(my_lines[(ctr-1)%2] <= my_lines[ctr%2]);
|
2013-09-10 15:06:29 +04:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
bool operator <(Part const& other) const {
|
|
|
|
return line() < other.line();
|
|
|
|
}
|
2013-09-10 15:06:29 +04:00
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
bool operator <=(Part const& other) const {
|
|
|
|
return line() <= other.line();
|
|
|
|
}
|
2013-09-10 15:06:29 +04:00
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
bool operator >(Part const& other) const {
|
|
|
|
return line() > other.line();
|
|
|
|
}
|
2013-09-10 15:06:29 +04:00
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
bool operator >=(Part const& other) const {
|
|
|
|
return line() >= other.line();
|
|
|
|
}
|
2013-09-10 15:06:29 +04:00
|
|
|
|
2015-05-15 20:09:38 +03:00
|
|
|
bool go(ostream& out) {
|
2013-09-10 15:06:29 +04:00
|
|
|
if (!f) return false;
|
|
|
|
#if 0
|
2015-05-15 20:09:38 +03:00
|
|
|
if (ctr) {
|
|
|
|
out << fname << "-" << ctr - 1 << "-";
|
|
|
|
out << my_lines[(ctr - 1)%2] << endl;
|
|
|
|
}
|
|
|
|
do {
|
|
|
|
out << fname << " " << ctr << " ";
|
|
|
|
out << line() << "\n";
|
|
|
|
} while (next() && my_lines[0] == my_lines[1]);
|
2013-09-10 15:06:29 +04:00
|
|
|
#else
|
2015-05-15 20:09:38 +03:00
|
|
|
do {
|
|
|
|
out << line() << "\n";
|
|
|
|
} while (next() && my_lines[0] == my_lines[1]);
|
2013-09-10 15:06:29 +04:00
|
|
|
out.flush();
|
|
|
|
#endif
|
|
|
|
return f != NULL;
|
|
|
|
}
|
2015-04-30 08:05:11 +03:00
|
|
|
|
2013-09-10 15:06:29 +04:00
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
int main(int argc, char* argv[])
|
|
|
|
{
|
|
|
|
vector<Part> parts;
|
|
|
|
for (int i = 1; i < argc; ++i)
|
|
|
|
parts.push_back(Part(argv[i]));
|
|
|
|
make_heap(parts.begin(), parts.end(), greater<Part>());
|
2015-05-15 20:09:38 +03:00
|
|
|
while (parts.size()) {
|
|
|
|
pop_heap(parts.begin(), parts.end(), greater<Part>());
|
|
|
|
if (parts.back().go(cout))
|
|
|
|
push_heap(parts.begin(), parts.end(), greater<Part>());
|
|
|
|
else parts.pop_back();
|
|
|
|
}
|
2013-09-10 15:06:29 +04:00
|
|
|
}
|