A tool to compute symmetric alignments from GIZA++ alignments.

git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@716 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
mfederico 2006-08-14 15:03:49 +00:00
parent 032e0688ce
commit e72010d6ce
4 changed files with 1098 additions and 0 deletions

View File

@ -0,0 +1,13 @@
CPP=g++ -g
CC=gcc -g
all: symal
clean:
rm -f *.o
cmd.o: cmd.c cmd.h
$(CC) -c -o cmd.o cmd.c
symal: symal.cpp cmd.o
$(CPP) -o $@ $(@).cpp cmd.o

View File

@ -0,0 +1,642 @@
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#include "cmd.h"
static Enum_T BoolEnum[] = {
{ "FALSE", 0 },
{ "TRUE", 1 },
{ 0, 0 }
};
#ifdef NEEDSTRDUP
char *strdup();
#endif
#define FALSE 0
#define TRUE 1
#define LINSIZ 10240
#define MAXPARAM 256
static char *GetLine(),
**str2array();
static int Scan(),
SetParam(),
SetEnum(),
SetSubrange(),
SetStrArray(),
SetGte(),
SetLte(),
CmdError(),
EnumError(),
SubrangeError(),
GteError(),
LteError(),
PrintParam(),
PrintEnum(),
PrintStrArray();
static Cmd_T cmds[MAXPARAM+1];
static char *SepString = " \t\n";
#if defined(__STDC__)
#include <stdarg.h>
int DeclareParams(char *ParName, ...)
#else
#include <varargs.h>
int DeclareParams(ParName, va_alist)
char *ParName;
va_dcl
#endif
{
va_list args;
static int ParamN = 0;
int j,
c;
char *s;
#if defined(__STDC__)
va_start(args, ParName);
#else
va_start(args);
#endif
for(;ParName;) {
if(ParamN==MAXPARAM) {
fprintf(stderr, "Too many parameters !!\n");
break;
}
for(j=0,c=1; j<ParamN&&(c=strcmp(cmds[j].Name,ParName))<0; j++)
;
if(!c) {
fprintf(stderr,
"Warning: parameter \"%s\" declared twice.\n",
ParName);
}
for(c=ParamN; c>j; c--) {
cmds[c] = cmds[c-1];
}
cmds[j].Name = ParName;
cmds[j].Type = va_arg(args, int);
cmds[j].Val = va_arg(args, void *);
switch(cmds[j].Type) {
case CMDENUMTYPE: /* get the pointer to Enum_T struct */
cmds[j].p = va_arg(args, void *);
break;
case CMDSUBRANGETYPE: /* get the two extremes */
cmds[j].p = (void*) calloc(2, sizeof(int));
((int*)cmds[j].p)[0] = va_arg(args, int);
((int*)cmds[j].p)[1] = va_arg(args, int);
break;
case CMDGTETYPE: /* get lower or upper bound */
case CMDLTETYPE:
cmds[j].p = (void*) calloc(1, sizeof(int));
((int*)cmds[j].p)[0] = va_arg(args, int);
break;
case CMDSTRARRAYTYPE: /* get the separators string */
cmds[j].p = (s=va_arg(args, char*))
? (void*)strdup(s) : 0;
break;
case CMDBOOLTYPE:
cmds[j].Type = CMDENUMTYPE;
cmds[j].p = BoolEnum;
break;
case CMDDOUBLETYPE: /* nothing else is needed */
case CMDINTTYPE:
case CMDSTRINGTYPE:
break;
default:
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
"DeclareParam()", "Unknown Type",
cmds[j].Type, "for parameter", cmds[j].Name);
exit(1);
}
ParamN++;
ParName = va_arg(args, char *);
}
cmds[ParamN].Name = NULL;
va_end(args);
return 0;
}
int GetParams(n, a, CmdFileName)
int *n;
char ***a;
char *CmdFileName;
{
char *Line,
*ProgName;
int argc = *n;
char **argv = *a,
*s;
FILE *fp;
int IsPipe;
#ifdef MSDOS
#define PATHSEP '\\'
char *dot = NULL;
#else
#define PATHSEP '/'
#endif
if(!(Line=malloc(LINSIZ))) {
fprintf(stderr, "GetParams(): Unable to alloc %d bytes\n",
LINSIZ);
exit(1);
}
if((ProgName=strrchr(*argv, PATHSEP))) {
++ProgName;
} else {
ProgName = *argv;
}
#ifdef MSDOS
if(dot=strchr(ProgName, '.')) *dot = 0;
#endif
--argc;
++argv;
for(;;) {
if(argc && argv[0][0]=='-' && argv[0][1]=='=') {
CmdFileName = argv[0]+2;
++argv;
--argc;
}
if(!CmdFileName) {
break;
}
IsPipe = !strncmp(CmdFileName, "@@", 2);
fp = IsPipe
? popen(CmdFileName+2, "r")
: strcmp(CmdFileName, "-")
? fopen(CmdFileName, "r")
: stdin;
if(!fp) {
fprintf(stderr, "Unable to open command file %s\n",
CmdFileName);
exit(1);
}
while(GetLine(fp, LINSIZ, Line) && strcmp(Line, "\\End")) {
if(Scan(ProgName, cmds, Line)) {
CmdError(Line);
}
}
if(fp!=stdin) {
if(IsPipe) pclose(fp); else fclose(fp);
}
CmdFileName = NULL;
}
while(argc && **argv=='-' && (s=strchr(*argv, '='))) {
*s = ' ';
sprintf(Line, "%s/%s", ProgName, *argv+1);
*s = '=';
if(Scan(ProgName, cmds, Line)) CmdError(*argv);
--argc;
++argv;
}
*n = argc;
*a = argv;
#ifdef MSDOS
if(dot) *dot = '.';
#endif
free(Line);
return 0;
}
int PrintParams(ValFlag, fp)
int ValFlag;
FILE *fp;
{
int i;
fflush(fp);
if(ValFlag) {
fprintf(fp, "Parameters Values:\n");
} else {
fprintf(fp, "Parameters:\n");
}
for(i=0; cmds[i].Name; i++) PrintParam(cmds+i, ValFlag, fp);
fprintf(fp, "\n");
fflush(fp);
return 0;
}
int SPrintParams(a, pfx)
char ***a,
*pfx;
{
int l,
n;
Cmd_T *cmd;
if(!pfx) pfx="";
l = strlen(pfx);
for(n=0, cmd=cmds; cmd->Name; cmd++) n += !!cmd->ArgStr;
a[0] = calloc(n, sizeof(char*));
for(n=0, cmd=cmds; cmd->Name; cmd++) {
if(!cmd->ArgStr) continue;
a[0][n] = malloc(strlen(cmd->Name)+strlen(cmd->ArgStr)+l+2);
sprintf(a[0][n], "%s%s=%s", pfx, cmd->Name, cmd->ArgStr);
++n;
}
return n;
}
static int CmdError(opt)
char *opt;
{
fprintf(stderr, "Invalid option \"%s\"\n", opt);
fprintf(stderr, "This program expectes the following parameters:\n");
PrintParams(FALSE, stderr);
exit(0);
}
static int PrintParam(cmd, ValFlag, fp)
Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
fprintf(fp, "%4s", "");
switch(cmd->Type) {
case CMDDOUBLETYPE:
fprintf(fp, "%s", cmd->Name);
if(ValFlag) fprintf(fp, ": %22.15e", *(double *)cmd->Val);
fprintf(fp, "\n");
break;
case CMDENUMTYPE:
PrintEnum(cmd, ValFlag, fp);
break;
case CMDINTTYPE:
case CMDSUBRANGETYPE:
case CMDGTETYPE:
case CMDLTETYPE:
fprintf(fp, "%s", cmd->Name);
if(ValFlag) fprintf(fp, ": %d", *(int *)cmd->Val);
fprintf(fp, "\n");
break;
case CMDSTRINGTYPE:
fprintf(fp, "%s", cmd->Name);
if(ValFlag) {
if(*(char **)cmd->Val) {
fprintf(fp, ": \"%s\"", *(char **)cmd->Val);
} else {
fprintf(fp, ": %s", "NULL");
}
}
fprintf(fp, "\n");
break;
case CMDSTRARRAYTYPE:
PrintStrArray(cmd, ValFlag, fp);
break;
default:
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
"PrintParam",
"Unknown Type",
cmd->Type,
"for parameter",
cmd->Name);
exit(1);
}
return 0;
}
static char *GetLine(fp, n, Line)
FILE *fp;
int n;
char *Line;
{
int j,
l,
offs=0;
for(;;) {
if(!fgets(Line+offs, n-offs, fp)) {
return NULL;
}
if(Line[offs]=='#') continue;
l = strlen(Line+offs)-1;
Line[offs+l] = 0;
for(j=offs; Line[j] && isspace(Line[j]); j++, l--)
;
if(l<1) continue;
if(j > offs) {
char *s = Line+offs,
*q = Line+j;
while((*s++=*q++))
;
}
if(Line[offs+l-1]=='\\') {
offs += l;
Line[offs-1] = ' ';
} else {
break;
}
}
return Line;
}
static int Scan(ProgName, cmds, Line)
char *ProgName,
*Line;
Cmd_T *cmds;
{
char *q,
*p;
int i,
hl,
HasToMatch = FALSE,
c0,
c;
p = Line+strspn(Line, SepString);
if(!(hl=strcspn(p, SepString))) {
return 0;
}
if((q=strchr(p, '/')) && q-p<hl) {
*q = 0;
if(strcmp(p, ProgName)) {
*q = '/';
return 0;
}
*q = '/';
HasToMatch=TRUE;
p = q+1;
}
if(!(hl = strcspn(p, SepString))) {
return 0;
}
c0 = p[hl];
p[hl] = 0;
for(i=0, c=1; cmds[i].Name&&(c=strcmp(cmds[i].Name, p))<0; i++)
;
p[hl] = c0;
if(!c) return SetParam(cmds+i, p+hl+strspn(p+hl, SepString));
return HasToMatch && c;
}
static int SetParam(cmd, s)
Cmd_T *cmd;
char *s;
{
if(!*s && cmd->Type != CMDSTRINGTYPE) {
fprintf(stderr,
"WARNING: No value specified for parameter \"%s\"\n",
cmd->Name);
return 0;
}
switch(cmd->Type) {
case CMDDOUBLETYPE:
if(sscanf(s, "%lf", (double*)cmd->Val)!=1) {
fprintf(stderr,
"Float value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
break;
case CMDENUMTYPE:
SetEnum(cmd, s);
break;
case CMDINTTYPE:
if(sscanf(s, "%d", (int*)cmd->Val)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
break;
case CMDSTRINGTYPE:
*(char **)cmd->Val = (strcmp(s, "<NULL>") && strcmp(s, "NULL"))
? strdup(s)
: 0;
break;
case CMDSTRARRAYTYPE:
SetStrArray(cmd, s);
break;
case CMDGTETYPE:
SetGte(cmd, s);
break;
case CMDLTETYPE:
SetLte(cmd, s);
break;
case CMDSUBRANGETYPE:
SetSubrange(cmd, s);
break;
default:
fprintf(stderr, "%s: %s %d %s \"%s\"\n",
"SetParam",
"Unknown Type",
cmd->Type,
"for parameter",
cmd->Name);
exit(1);
}
cmd->ArgStr = strdup(s);
return 0;
}
static int SetEnum(cmd, s)
Cmd_T *cmd;
char *s;
{
Enum_T *en;
for(en=(Enum_T *)cmd->p; en->Name; en++) {
if(*en->Name && !strcmp(s, en->Name)) {
*(int *) cmd->Val = en->Idx;
return 0;
}
}
return EnumError(cmd, s);
}
static int SetSubrange(cmd, s)
Cmd_T *cmd;
char *s;
{
int n;
if(sscanf(s, "%d", &n)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
if(n < *(int *)cmd->p || n > *((int *)cmd->p+1)) {
return SubrangeError(cmd, n);
}
*(int *)cmd->Val = n;
return 0;
}
static int SetGte(cmd, s)
Cmd_T *cmd;
char *s;
{
int n;
if(sscanf(s, "%d", &n)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
if(n<*(int *)cmd->p) {
return GteError(cmd, n);
}
*(int *)cmd->Val = n;
return 0;
}
static int SetStrArray(cmd, s)
Cmd_T *cmd;
char *s;
{
*(char***)cmd->Val = str2array(s, (char*)cmd->p);
return 0;
}
static int SetLte(cmd, s)
Cmd_T *cmd;
char *s;
{
int n;
if(sscanf(s, "%d", &n)!=1) {
fprintf(stderr,
"Integer value required for parameter \"%s\"\n",
cmd->Name);
exit(1);
}
if(n > *(int *)cmd->p) {
return LteError(cmd, n);
}
*(int *)cmd->Val = n;
return 0;
}
static int EnumError(cmd, s)
Cmd_T *cmd;
char *s;
{
Enum_T *en;
fprintf(stderr,
"Invalid value \"%s\" for parameter \"%s\"\n", s, cmd->Name);
fprintf(stderr, "Valid values are:\n");
for(en=(Enum_T *)cmd->p; en->Name; en++) {
if(*en->Name) {
fprintf(stderr, " %s\n", en->Name);
}
}
fprintf(stderr, "\n");
exit(1);
}
static int GteError(cmd, n)
Cmd_T *cmd;
int n;
{
fprintf(stderr,
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
fprintf(stderr, "Valid values must be greater than or equal to %d\n",
*(int *)cmd->p);
exit(1);
}
static int LteError(cmd, n)
Cmd_T *cmd;
int n;
{
fprintf(stderr,
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
fprintf(stderr, "Valid values must be less than or equal to %d\n",
*(int *)cmd->p);
exit(1);
}
static int SubrangeError(cmd, n)
Cmd_T *cmd;
int n;
{
fprintf(stderr,
"Value %d out of range for parameter \"%s\"\n", n, cmd->Name);
fprintf(stderr, "Valid values range from %d to %d\n",
*(int *)cmd->p, *((int *)cmd->p+1));
exit(1);
}
static int PrintEnum(cmd, ValFlag, fp)
Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
Enum_T *en;
fprintf(fp, "%s", cmd->Name);
if(ValFlag) {
for(en=(Enum_T *)cmd->p; en->Name; en++) {
if(*en->Name && en->Idx==*(int *)cmd->Val) {
fprintf(fp, ": %s", en->Name);
}
}
}
fprintf(fp, "\n");
return 0;
}
static int PrintStrArray(cmd, ValFlag, fp)
Cmd_T *cmd;
int ValFlag;
FILE *fp;
{
char *indent,
**s = *(char***)cmd->Val;
int l = 4+strlen(cmd->Name);
fprintf(fp, "%s", cmd->Name);
indent = malloc(l+2);
memset(indent, ' ', l+1);
indent[l+1] = 0;
if(ValFlag) {
fprintf(fp, ": %s", s ? (*s ? *s++ : "NULL") : "");
if(s) while(*s) {
fprintf(fp, "\n%s %s", indent, *s++);
}
}
free(indent);
fprintf(fp, "\n");
return 0;
}
static char **str2array(s, sep)
char *s,
*sep;
{
char *p,
**a;
int n = 0,
l;
if(!sep) sep = SepString;
p = s += strspn(s, sep);
while(*p) {
p += strcspn(p, sep);
p += strspn(p, sep);
++n;
}
a = calloc(n+1, sizeof(char *));
p = s;
n = 0;
while(*p) {
l = strcspn(p, sep);
a[n] = malloc(l+1);
memcpy(a[n], p, l);
a[n][l] = 0;
++n;
p += l;
p += strspn(p, sep);
}
return a;
}

View File

@ -0,0 +1,49 @@
#if !defined(CMD_H)
#define CMD_H
#define CMDDOUBLETYPE 1
#define CMDENUMTYPE 2
#define CMDINTTYPE 3
#define CMDSTRINGTYPE 4
#define CMDSUBRANGETYPE 5
#define CMDGTETYPE 6
#define CMDLTETYPE 7
#define CMDSTRARRAYTYPE 8
#define CMDBOOLTYPE 9
typedef struct {
char *Name;
int Idx;
} Enum_T;
typedef struct {
int Type;
char *Name,
*ArgStr;
void *Val,
*p;
} Cmd_T;
#ifdef __cplusplus
extern "C" {
#endif
#if defined(__STDC__)
int DeclareParams(char *, ...);
#else
int DeclareParams();
#endif
int GetParams(int *n, char ***a,char *CmdFileName),
SPrintParams(),
PrintParams();
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,394 @@
using namespace std;
#include <iomanip>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <list>
#include <vector>
#include <set>
#include <algorithm>
#include "cmd.h"
#define MAX_WORD 100 //maximum lengthsource/target strings
#define MAX_M 200 //maximum length of source strings
#define MAX_N 200 //maximum length of target strings
#define UNION 1
#define INTERSECT 2
#define GROW 3
#define BOOL_YES 1
#define BOOL_NO 0
#define END_ENUM { (char*)0, 0 }
static Enum_T AlignEnum [] = {
{ "union", UNION },
{ "u", UNION },
{ "intersect", INTERSECT},
{ "i", INTERSECT},
{ "grow", GROW },
{ "g", GROW },
END_ENUM
};
static Enum_T BoolEnum [] = {
{ "true", BOOL_YES },
{ "yes", BOOL_YES },
{ "y", BOOL_YES },
{ "false", BOOL_NO },
{ "no", BOOL_NO },
{ "n", BOOL_NO },
END_ENUM
};
// global variables and constants
int* fa; //counters of covered foreign positions
int* ea; //counters of covered english positions
int** A; //alignment matrix with information symmetric/direct/inverse alignments
int verbose=0;
//read an alignment pair from the input stream.
int getals(fstream& inp,int& m, int *a,int& n, int *b)
{
char w[MAX_WORD], dummy[10];
int i,j,freq;
if (inp >> freq){
//target sentence
inp >> n; assert(n<MAX_N);
for (i=1;i<=n;i++){
inp >> setw(MAX_WORD) >> w;
assert(strlen(w)<MAX_WORD-1);
}
inp >> dummy; //# separator
// inverse alignment
for (i=1;i<=n;i++) inp >> b[i];
//source sentence
inp >> m; assert(m<MAX_M);
for (j=1;j<=m;j++){
inp >> setw(MAX_WORD) >> w;
assert(strlen(w)<MAX_WORD-1);
}
inp >> dummy; //# separator
// direct alignment
for (j=1;j<=m;j++) {
inp >> a[j];
assert(0<=a[j] && a[j]<=n);
}
//check inverse alignemnt
for (i=1;i<=n;i++)
assert(0<=b[i] && b[i]<=m);
return 1;
}
else
return 0;
};
//compute union alignment
int prunionalignment(fstream& out,int m,int *a,int n,int* b){
ostringstream sout;
for (int j=1;j<=m;j++)
if (a[j])
sout << j-1 << "-" << a[j]-1 << " ";
for (int i=1;i<=n;i++)
if (b[i] && a[b[i]]!=i)
sout << b[i]-1 << "-" << i-1 << " ";
//fix the last " "
string str = sout.str();
str.replace(str.length()-1,1,"\n");
out << str;
return 1;
}
//Compute unionalignment Alignment
int printersect(fstream& out,int m,int *a,int n,int* b){
ostringstream sout;
for (int j=1;j<=m;j++)
if (a[j] && b[a[j]]==j)
sout << j-1 << "-" << a[j]-1 << " ";
//fix the last " "
string str = sout.str();
str.replace(str.length()-1,1,"\n");
out << str;
return 1;
}
//Compute Grow Diagonal Alignment
//Nice property: you will never introduce more points
//than the unionalignment alignemt. Hence, you will always be able
//to represent the grow alignment as the unionalignment of a
//directed and inverted alignment
int printgrow(fstream& out,int m,int *a,int n,int* b, bool diagonal=false,bool final=false,bool bothuncovered=false){
ostringstream sout;
vector <pair <int,int> > neighbors; //neighbors
pair <int,int> entry;
neighbors.push_back(make_pair(-1,-0));
neighbors.push_back(make_pair(0,-1));
neighbors.push_back(make_pair(1,0));
neighbors.push_back(make_pair(0,1));
if (diagonal){
neighbors.push_back(make_pair(-1,-1));
neighbors.push_back(make_pair(-1,1));
neighbors.push_back(make_pair(1,-1));
neighbors.push_back(make_pair(1,1));
}
int i,j,o;
//covered foreign and english positions
memset(fa,0,(m+1)*sizeof(int));
memset(ea,0,(n+1)*sizeof(int));
//matrix to quickly check if one point is in the symmetric
//alignment (value=2), direct alignment (=1) and inverse alignment
for (int i=1;i<=n;i++) memset(A[i],0,(m+1)*sizeof(int));
set <pair <int,int> > currentpoints; //symmetric alignment
set <pair <int,int> > unionalignment; //union alignment
pair <int,int> point; //variable to store points
set<pair <int,int> >::const_iterator k; //iterator over sets
//fill in the alignments
for (j=1;j<=m;j++){
if (a[j]){
unionalignment.insert(make_pair(a[j],j));
if (b[a[j]]==j){
fa[j]=1;ea[a[j]]=1;
A[a[j]][j]=2;
currentpoints.insert(make_pair(a[j],j));
}
else
A[a[j]][j]=-1;
}
}
for (i=1;i<=n;i++)
if (b[i] && a[b[i]]!=i){ //not intersection
unionalignment.insert(make_pair(i,b[i]));
A[i][b[i]]=1;
}
int added=1;
while (added){
added=0;
///scan the current alignment
for (k=currentpoints.begin();k!=currentpoints.end();k++){
//cout << "{"<< (k->second)-1 << "-" << (k->first)-1 << "}";
for (o=0;o<neighbors.size();o++){
//cout << "go over check all neighbors\n";
point.first=k->first+neighbors[o].first;
point.second=k->second+neighbors[o].second;
//cout << point.second-1 << " " << point.first-1 << "\n";
//check if neighbor is inside 'matrix'
if (point.first>0 && point.first <=n && point.second>0 && point.second<=m)
//check if neighbor is in the unionalignment alignment
if (b[point.first]==point.second || a[point.second]==point.first){
//cout << "In unionalignment ";cout.flush();
//check if it connects at least one uncovered word
if (!(ea[point.first] && fa[point.second]))
{
//insert point in currentpoints!
currentpoints.insert(point);
A[point.first][point.second]=2;
ea[point.first]=1; fa[point.second]=1;
added=1;
//cout << "added grow: " << point.second-1 << "-" << point.first-1 << "\n";cout.flush();
}
}
}
}
}
if (final){
for (k=unionalignment.begin();k!=unionalignment.end();k++)
if (A[k->first][k->second]==1)
{
point.first=k->first;point.second=k->second;
//one of the two words is not covered yet
//cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
(!bothuncovered && !(ea[point.first] && fa[point.second])))
{
//add it!
currentpoints.insert(point);
A[point.first][point.second]=2;
//keep track of new covered positions
ea[point.first]=1;fa[point.second]=1;
//added=1;
//cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
}
}
for (k=unionalignment.begin();k!=unionalignment.end();k++)
if (A[k->first][k->second]==-1)
{
point.first=k->first;point.second=k->second;
//one of the two words is not covered yet
//cout << "{" << point.second-1 << "-" << point.first-1 << "} ";
if ((bothuncovered && !ea[point.first] && !fa[point.second]) ||
(!bothuncovered && !(ea[point.first] && fa[point.second])))
{
//add it!
currentpoints.insert(point);
A[point.first][point.second]=2;
//keep track of new covered positions
ea[point.first]=1;fa[point.second]=1;
//added=1;
//cout << "added final: " << point.second-1 << "-" << point.first-1 << "\n";
}
}
}
for (k=currentpoints.begin();k!=currentpoints.end();k++)
sout << k->second-1 << "-" << k->first-1 << " ";
//fix the last " "
string str = sout.str();
str.replace(str.length()-1,1,"\n");
out << str;
out.flush();
return 1;
return 1;
}
//Main file here
int main(int argc, char** argv){
int alignment=0;
char* input="/dev/stdin";
char* output="/dev/stdout";
int diagonal=false;
int final=false;
int bothuncovered=false;
DeclareParams("a", CMDENUMTYPE, &alignment, AlignEnum,
"alignment", CMDENUMTYPE, &alignment, AlignEnum,
"d", CMDENUMTYPE, &diagonal, BoolEnum,
"diagonal", CMDENUMTYPE, &diagonal, BoolEnum,
"f", CMDENUMTYPE, &final, BoolEnum,
"final", CMDENUMTYPE, &final, BoolEnum,
"b", CMDENUMTYPE, &bothuncovered, BoolEnum,
"both", CMDENUMTYPE, &bothuncovered, BoolEnum,
"i", CMDSTRINGTYPE, &input,
"o", CMDSTRINGTYPE, &output,
"v", CMDENUMTYPE, &verbose, BoolEnum,
"verbose", CMDENUMTYPE, &verbose, BoolEnum,
(char *)NULL);
GetParams(&argc, &argv, (char*) NULL);
if (alignment==0){
cerr << "usage: symal [-i=<inputfile>] [-o=<outputfile>] -a=[u|i|g] -d=[yes|no] -b=[yes|no] -f=[yes|no] \n"
<< "Input file or std must be in .bal format (see script giza2bal.pl).\n";
exit(0);
}
fstream inp(input,ios::in);
fstream out(output,ios::out);
if (!inp.is_open()){
cerr << "cannot open " << input << "\n";
exit(0);
}
if (!out.is_open()){
cerr << "cannot open " << output << "\n";
exit(0);
}
int a[MAX_M],b[MAX_N],m,n;
fa=new int[MAX_M+1];
ea=new int[MAX_N+1];
A=new int *[MAX_N+1];
for (int i=1;i<=MAX_N;i++) A[i]=new int[MAX_M+1];
switch (alignment){
case UNION:
cerr << "symal: computing union alignment\n";
while(getals(inp,m,a,n,b)) prunionalignment(out,m,a,n,b);
break;
case INTERSECT:
cerr << "symal: computing intersect alignment\n";
while(getals(inp,m,a,n,b)) printersect(out,m,a,n,b);
break;
case GROW:
cerr << "symal: computing grow alignment: diagonal ("
<< diagonal << ") final ("<< final << ")"
<< "both-uncovered (" << bothuncovered <<")\n";
while(getals(inp,m,a,n,b))
printgrow(out,m,a,n,b,diagonal,final,bothuncovered);
break;
default:
exit(0);
}
delete [] fa; delete [] ea;
for (int i=1;i<=MAX_N;i++) delete [] A[i];
delete [] A;
exit(1);
}