mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-12-26 05:14:36 +03:00
Modernize symal source code.
Part of symal was still written in K&R C (though with an anachronistic C99 version comment!). Updating it to ANSI C, adding const in many places for clarity. Reordering functions to eliminate forward declarations. Moving definition of Cmd_T into cmd.c, since it's local to that file. Adding some wrappers to reduce the casts around Cmd_T.p. Narrowing the scope of some variables. Also updating the C++: use anonymous namespace for "static" definitions, use enums and constants instead of preprocessor macros, use false/true as boolean constants, throw exceptions instead of print-and-exit, avoid use of "final" as an identifier since it's now a reserved word.
This commit is contained in:
parent
85c1af4d72
commit
91e699f90d
969
symal/cmd.c
969
symal/cmd.c
File diff suppressed because it is too large
Load Diff
43
symal/cmd.h
43
symal/cmd.h
@ -5,47 +5,32 @@
|
|||||||
|
|
||||||
#define CMD_H
|
#define CMD_H
|
||||||
|
|
||||||
#define CMDDOUBLETYPE 1
|
enum CommandType
|
||||||
#define CMDENUMTYPE 2
|
{
|
||||||
#define CMDINTTYPE 3
|
CMDDOUBLETYPE = 1,
|
||||||
#define CMDSTRINGTYPE 4
|
CMDENUMTYPE,
|
||||||
#define CMDSUBRANGETYPE 5
|
CMDINTTYPE,
|
||||||
#define CMDGTETYPE 6
|
CMDSTRINGTYPE,
|
||||||
#define CMDLTETYPE 7
|
CMDSUBRANGETYPE,
|
||||||
#define CMDSTRARRAYTYPE 8
|
CMDGTETYPE,
|
||||||
#define CMDBOOLTYPE 9
|
CMDLTETYPE,
|
||||||
|
CMDSTRARRAYTYPE,
|
||||||
|
CMDBOOLTYPE
|
||||||
|
};
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
const char *Name;
|
const char *Name;
|
||||||
int Idx;
|
int Idx;
|
||||||
} Enum_T;
|
} Enum_T;
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
int Type;
|
|
||||||
char *Name,
|
|
||||||
*ArgStr;
|
|
||||||
void *Val,
|
|
||||||
*p;
|
|
||||||
} Cmd_T;
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(__STDC__)
|
int DeclareParams(const char *, ...);
|
||||||
int DeclareParams(char *, ...);
|
int GetParams(int *n, char ***a, const char *CmdFileName);
|
||||||
#else
|
|
||||||
int DeclareParams();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
int GetParams(int *n, char ***a,char *CmdFileName),
|
|
||||||
SPrintParams(),
|
|
||||||
PrintParams();
|
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
227
symal/symal.cpp
227
symal/symal.cpp
@ -5,6 +5,7 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
|
#include <stdexcept>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <list>
|
#include <list>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
@ -15,21 +16,24 @@
|
|||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
||||||
#define MAX_WORD 10000 // maximum lengthsource/target strings
|
const int MAX_WORD = 10000; // maximum lengthsource/target strings
|
||||||
#define MAX_M 400 // maximum length of source strings
|
const int MAX_M = 400; // maximum length of source strings
|
||||||
#define MAX_N 400 // maximum length of target strings
|
const int MAX_N = 400; // maximum length of target strings
|
||||||
|
|
||||||
#define UNION 1
|
enum Alignment
|
||||||
#define INTERSECT 2
|
{
|
||||||
#define GROW 3
|
UNION = 1,
|
||||||
#define SRCTOTGT 4
|
INTERSECT,
|
||||||
#define TGTTOSRC 5
|
GROW,
|
||||||
#define BOOL_YES 1
|
SRCTOTGT,
|
||||||
#define BOOL_NO 0
|
TGTTOSRC,
|
||||||
|
};
|
||||||
|
|
||||||
#define END_ENUM { (char*)0, 0 }
|
const Enum_T END_ENUM = {'\0', 0};
|
||||||
|
|
||||||
static Enum_T AlignEnum [] = {
|
namespace
|
||||||
|
{
|
||||||
|
Enum_T AlignEnum [] = {
|
||||||
{ "union", UNION },
|
{ "union", UNION },
|
||||||
{ "u", UNION },
|
{ "u", UNION },
|
||||||
{ "intersect", INTERSECT},
|
{ "intersect", INTERSECT},
|
||||||
@ -43,18 +47,16 @@ static Enum_T AlignEnum [] = {
|
|||||||
END_ENUM
|
END_ENUM
|
||||||
};
|
};
|
||||||
|
|
||||||
static Enum_T BoolEnum [] = {
|
Enum_T BoolEnum [] = {
|
||||||
{ "true", BOOL_YES },
|
{ "true", true },
|
||||||
{ "yes", BOOL_YES },
|
{ "yes", true },
|
||||||
{ "y", BOOL_YES },
|
{ "y", true },
|
||||||
{ "false", BOOL_NO },
|
{ "false", false },
|
||||||
{ "no", BOOL_NO },
|
{ "no", false },
|
||||||
{ "n", BOOL_NO },
|
{ "n", false },
|
||||||
END_ENUM
|
END_ENUM
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// global variables and constants
|
// global variables and constants
|
||||||
|
|
||||||
int* fa; //counters of covered foreign positions
|
int* fa; //counters of covered foreign positions
|
||||||
@ -117,7 +119,7 @@ int getals(istream& inp,int& m, int *a,int& n, int *b)
|
|||||||
|
|
||||||
} else
|
} else
|
||||||
return 0;
|
return 0;
|
||||||
};
|
}
|
||||||
|
|
||||||
|
|
||||||
//compute union alignment
|
//compute union alignment
|
||||||
@ -226,7 +228,7 @@ int printsrctotgt(ostream& out,int m,int *a,int n,int* b)
|
|||||||
//to represent the grow alignment as the unionalignment of a
|
//to represent the grow alignment as the unionalignment of a
|
||||||
//directed and inverted alignment
|
//directed and inverted alignment
|
||||||
|
|
||||||
int printgrow(ostream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false)
|
int printgrow(ostream& out,int m,int *a,int n,int* b, bool diagonal=false,bool isfinal=false,bool bothuncovered=false)
|
||||||
{
|
{
|
||||||
|
|
||||||
ostringstream sout;
|
ostringstream sout;
|
||||||
@ -322,7 +324,7 @@ int printgrow(ostream& out,int m,int *a,int n,int* b, bool diagonal=false,bool f
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (final) {
|
if (isfinal) {
|
||||||
for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
|
for (k=unionalignment.begin(); k!=unionalignment.end(); k++)
|
||||||
if (A[k->first][k->second]==1) {
|
if (A[k->first][k->second]==1) {
|
||||||
point.first=k->first;
|
point.first=k->first;
|
||||||
@ -383,6 +385,7 @@ int printgrow(ostream& out,int m,int *a,int n,int* b, bool diagonal=false,bool f
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} // namespace
|
||||||
|
|
||||||
|
|
||||||
//Main file here
|
//Main file here
|
||||||
@ -395,7 +398,7 @@ int main(int argc, char** argv)
|
|||||||
char* input= NULL;
|
char* input= NULL;
|
||||||
char* output= NULL;
|
char* output= NULL;
|
||||||
int diagonal=false;
|
int diagonal=false;
|
||||||
int final=false;
|
int isfinal=false;
|
||||||
int bothuncovered=false;
|
int bothuncovered=false;
|
||||||
|
|
||||||
|
|
||||||
@ -403,8 +406,8 @@ int main(int argc, char** argv)
|
|||||||
"alignment", CMDENUMTYPE, &alignment, AlignEnum,
|
"alignment", CMDENUMTYPE, &alignment, AlignEnum,
|
||||||
"d", CMDENUMTYPE, &diagonal, BoolEnum,
|
"d", CMDENUMTYPE, &diagonal, BoolEnum,
|
||||||
"diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
|
"diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
|
||||||
"f", CMDENUMTYPE, &final, BoolEnum,
|
"f", CMDENUMTYPE, &isfinal, BoolEnum,
|
||||||
"final", CMDENUMTYPE, &final, BoolEnum,
|
"final", CMDENUMTYPE, &isfinal, BoolEnum,
|
||||||
"b", CMDENUMTYPE, &bothuncovered, BoolEnum,
|
"b", CMDENUMTYPE, &bothuncovered, BoolEnum,
|
||||||
"both", CMDENUMTYPE, &bothuncovered, BoolEnum,
|
"both", CMDENUMTYPE, &bothuncovered, BoolEnum,
|
||||||
"i", CMDSTRINGTYPE, &input,
|
"i", CMDSTRINGTYPE, &input,
|
||||||
@ -412,9 +415,9 @@ int main(int argc, char** argv)
|
|||||||
"v", CMDENUMTYPE, &verbose, BoolEnum,
|
"v", CMDENUMTYPE, &verbose, BoolEnum,
|
||||||
"verbose", CMDENUMTYPE, &verbose, BoolEnum,
|
"verbose", CMDENUMTYPE, &verbose, BoolEnum,
|
||||||
|
|
||||||
(char*)NULL);
|
NULL);
|
||||||
|
|
||||||
GetParams(&argc, &argv, (char*)NULL);
|
GetParams(&argc, &argv, NULL);
|
||||||
|
|
||||||
if (alignment==0) {
|
if (alignment==0) {
|
||||||
cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
|
cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
|
||||||
@ -426,92 +429,94 @@ int main(int argc, char** argv)
|
|||||||
istream *inp = &std::cin;
|
istream *inp = &std::cin;
|
||||||
ostream *out = &std::cout;
|
ostream *out = &std::cout;
|
||||||
|
|
||||||
if (input) {
|
try
|
||||||
fstream *fin = new fstream(input,ios::in);
|
{
|
||||||
if (!fin->is_open()) {
|
if (input) {
|
||||||
cerr << "cannot open " << input << "\n";
|
fstream *fin = new fstream(input,ios::in);
|
||||||
exit(1);
|
if (!fin->is_open()) throw runtime_error("cannot open " + string(input));
|
||||||
|
inp = fin;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (output) {
|
||||||
|
fstream *fout = new fstream(output,ios::out);
|
||||||
|
if (!fout->is_open()) throw runtime_error("cannot open " + string(output));
|
||||||
|
out = fout;
|
||||||
|
}
|
||||||
|
|
||||||
|
int a[MAX_M],b[MAX_N],m,n;
|
||||||
|
fa=new int[MAX_M+1];
|
||||||
|
ea=new int[MAX_N+1];
|
||||||
|
|
||||||
|
|
||||||
|
int sents = 0;
|
||||||
|
A=new int *[MAX_N+1];
|
||||||
|
for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
|
||||||
|
|
||||||
|
switch (alignment) {
|
||||||
|
case UNION:
|
||||||
|
cerr << "symal: computing union alignment\n";
|
||||||
|
while(getals(*inp,m,a,n,b)) {
|
||||||
|
prunionalignment(*out,m,a,n,b);
|
||||||
|
sents++;
|
||||||
|
}
|
||||||
|
cerr << "Sents: " << sents << endl;
|
||||||
|
break;
|
||||||
|
case INTERSECT:
|
||||||
|
cerr << "symal: computing intersect alignment\n";
|
||||||
|
while(getals(*inp,m,a,n,b)) {
|
||||||
|
printersect(*out,m,a,n,b);
|
||||||
|
sents++;
|
||||||
|
}
|
||||||
|
cerr << "Sents: " << sents << endl;
|
||||||
|
break;
|
||||||
|
case GROW:
|
||||||
|
cerr << "symal: computing grow alignment: diagonal ("
|
||||||
|
<< diagonal << ") final ("<< isfinal << ")"
|
||||||
|
<< "both-uncovered (" << bothuncovered <<")\n";
|
||||||
|
|
||||||
|
while(getals(*inp,m,a,n,b))
|
||||||
|
printgrow(*out,m,a,n,b,diagonal,isfinal,bothuncovered);
|
||||||
|
|
||||||
|
break;
|
||||||
|
case TGTTOSRC:
|
||||||
|
cerr << "symal: computing target-to-source alignment\n";
|
||||||
|
|
||||||
|
while(getals(*inp,m,a,n,b)) {
|
||||||
|
printtgttosrc(*out,m,a,n,b);
|
||||||
|
sents++;
|
||||||
|
}
|
||||||
|
cerr << "Sents: " << sents << endl;
|
||||||
|
break;
|
||||||
|
case SRCTOTGT:
|
||||||
|
cerr << "symal: computing source-to-target alignment\n";
|
||||||
|
|
||||||
|
while(getals(*inp,m,a,n,b)) {
|
||||||
|
printsrctotgt(*out,m,a,n,b);
|
||||||
|
sents++;
|
||||||
|
}
|
||||||
|
cerr << "Sents: " << sents << endl;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw runtime_error("Unknown alignment");
|
||||||
|
}
|
||||||
|
|
||||||
|
delete [] fa;
|
||||||
|
delete [] ea;
|
||||||
|
for (int i=1; i<=MAX_N; i++) delete [] A[i];
|
||||||
|
delete [] A;
|
||||||
|
|
||||||
|
if (inp != &std::cin) {
|
||||||
|
delete inp;
|
||||||
|
}
|
||||||
|
if (out != &std::cout) {
|
||||||
|
delete inp;
|
||||||
}
|
}
|
||||||
inp = fin;
|
|
||||||
}
|
}
|
||||||
|
catch (const std::exception &e)
|
||||||
if (output) {
|
{
|
||||||
fstream *fout = new fstream(output,ios::out);
|
cerr << e.what() << std::endl;
|
||||||
if (!fout->is_open()) {
|
|
||||||
cerr << "cannot open " << output << "\n";
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
out = fout;
|
|
||||||
}
|
|
||||||
|
|
||||||
int a[MAX_M],b[MAX_N],m,n;
|
|
||||||
fa=new int[MAX_M+1];
|
|
||||||
ea=new int[MAX_N+1];
|
|
||||||
|
|
||||||
|
|
||||||
int sents = 0;
|
|
||||||
A=new int *[MAX_N+1];
|
|
||||||
for (int i=1; i<=MAX_N; i++) A[i]=new int[MAX_M+1];
|
|
||||||
|
|
||||||
switch (alignment) {
|
|
||||||
case UNION:
|
|
||||||
cerr << "symal: computing union alignment\n";
|
|
||||||
while(getals(*inp,m,a,n,b)) {
|
|
||||||
prunionalignment(*out,m,a,n,b);
|
|
||||||
sents++;
|
|
||||||
}
|
|
||||||
cerr << "Sents: " << sents << endl;
|
|
||||||
break;
|
|
||||||
case INTERSECT:
|
|
||||||
cerr << "symal: computing intersect alignment\n";
|
|
||||||
while(getals(*inp,m,a,n,b)) {
|
|
||||||
printersect(*out,m,a,n,b);
|
|
||||||
sents++;
|
|
||||||
}
|
|
||||||
cerr << "Sents: " << sents << endl;
|
|
||||||
break;
|
|
||||||
case GROW:
|
|
||||||
cerr << "symal: computing grow alignment: diagonal ("
|
|
||||||
<< diagonal << ") final ("<< final << ")"
|
|
||||||
<< "both-uncovered (" << bothuncovered <<")\n";
|
|
||||||
|
|
||||||
while(getals(*inp,m,a,n,b))
|
|
||||||
printgrow(*out,m,a,n,b,diagonal,final,bothuncovered);
|
|
||||||
|
|
||||||
break;
|
|
||||||
case TGTTOSRC:
|
|
||||||
cerr << "symal: computing target-to-source alignment\n";
|
|
||||||
|
|
||||||
while(getals(*inp,m,a,n,b)) {
|
|
||||||
printtgttosrc(*out,m,a,n,b);
|
|
||||||
sents++;
|
|
||||||
}
|
|
||||||
cerr << "Sents: " << sents << endl;
|
|
||||||
break;
|
|
||||||
case SRCTOTGT:
|
|
||||||
cerr << "symal: computing source-to-target alignment\n";
|
|
||||||
|
|
||||||
while(getals(*inp,m,a,n,b)) {
|
|
||||||
printsrctotgt(*out,m,a,n,b);
|
|
||||||
sents++;
|
|
||||||
}
|
|
||||||
cerr << "Sents: " << sents << endl;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
exit(1);
|
exit(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
delete [] fa;
|
|
||||||
delete [] ea;
|
|
||||||
for (int i=1; i<=MAX_N; i++) delete [] A[i];
|
|
||||||
delete [] A;
|
|
||||||
|
|
||||||
if (inp != &std::cin) {
|
|
||||||
delete inp;
|
|
||||||
}
|
|
||||||
if (out != &std::cout) {
|
|
||||||
delete inp;
|
|
||||||
}
|
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user