mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-11-13 10:40:52 +03:00
*** empty log message ***
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@677 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
5973058403
commit
4d0922afab
@ -3,6 +3,7 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
// create sentences with all features combined from files with individual tags
|
||||
class CombineTags
|
||||
{
|
||||
public static void main(String[] args) throws Exception
|
||||
@ -12,17 +13,18 @@ class CombineTags
|
||||
Vector vecInstream = new Vector();
|
||||
for (int i = 0 ; i < args.length ; i++)
|
||||
{
|
||||
BufferedReader inStream = new BufferedReader(new FileReader(args[i]));
|
||||
InputStreamReader temp = new InputStreamReader(new FileInputStream(args[i]), "Latin1");
|
||||
BufferedReader inStream = new BufferedReader(temp);
|
||||
vecInstream.add(inStream);
|
||||
}
|
||||
PrintStream outStream = System.out;
|
||||
OutputStreamWriter outStream = new OutputStreamWriter((OutputStream)System.out, "Latin1");
|
||||
|
||||
new CombineTags(vecInstream, outStream);
|
||||
|
||||
System.err.println("End...");
|
||||
}
|
||||
|
||||
public CombineTags(Vector vecInstream , PrintStream outStream) throws Exception
|
||||
public CombineTags(Vector vecInstream , OutputStreamWriter outStream) throws Exception
|
||||
{
|
||||
BufferedReader inFile = (BufferedReader) vecInstream.get(0);
|
||||
String inLine;
|
||||
@ -74,10 +76,14 @@ class CombineTags
|
||||
outLine += otherTag + "|";
|
||||
}
|
||||
outLine = outLine.substring(0, outLine.length() - 1) + " ";
|
||||
outStream.print(outLine);
|
||||
outStream.write(outLine);
|
||||
}
|
||||
outStream.println();
|
||||
outStream.write("\n");
|
||||
}
|
||||
// close stream
|
||||
outStream.flush();
|
||||
outStream.close();
|
||||
outStream = null;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -4,14 +4,18 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
//input is the sentences with all features combined
|
||||
//output shrunked sentences with only those words we are interested in
|
||||
public class ProcessShallowParse
|
||||
{
|
||||
public static void main(String[] args) throws Exception
|
||||
{
|
||||
System.err.println("Starting...");
|
||||
|
||||
InputStreamReader inStream = args.length > 0 ? new FileReader(args[0]) : new InputStreamReader(System.in);
|
||||
OutputStreamWriter outStream = args.length > 1 ? new FileWriter(args[1]) : new OutputStreamWriter(System.out);
|
||||
InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
|
||||
, "Latin1");
|
||||
OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
|
||||
, "Latin1");
|
||||
|
||||
new ProcessShallowParse2(inStream, outStream);
|
||||
|
||||
@ -59,7 +63,7 @@ class ProcessShallowParse2
|
||||
String factoredWord = st.nextToken();
|
||||
ret += Output(factoredWord);
|
||||
}
|
||||
outFile.write(i++ + " " + ret);
|
||||
outFile.write(ret);
|
||||
if (ret.length() > 0)
|
||||
outFile.write("\n");
|
||||
}
|
||||
@ -78,7 +82,7 @@ class ProcessShallowParse2
|
||||
if (posImproved.indexOf("ART-SB") == 0
|
||||
|| posImproved.indexOf("NN-NK_NP-SB") == 0)
|
||||
{
|
||||
ret = posImproved + "|" + morph + " ";
|
||||
ret = posImproved + "_" + morph + " ";
|
||||
}
|
||||
else if (posImproved.indexOf("VAFIN-HD") == 0
|
||||
|| posImproved.indexOf("VVFIN-HD") == 0
|
||||
@ -90,7 +94,7 @@ class ProcessShallowParse2
|
||||
|| posImproved.indexOf("PPER-EP") == 0
|
||||
)
|
||||
{
|
||||
ret = posImproved + "|" + surface + " ";
|
||||
ret = surface + " ";
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
@ -3,23 +3,28 @@
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
// create pos-tag sentences from LISP-like input tree.
|
||||
// NN-NK tag augmented with NP-SP if parent is NP-SB
|
||||
class TagHierarchy
|
||||
{
|
||||
public static void main(String[] args) throws Exception
|
||||
{
|
||||
System.err.println("Starting...");
|
||||
|
||||
InputStreamReader inStream = args.length > 0 ? new FileReader(args[0]) : new InputStreamReader(System.in);
|
||||
PrintStream outStream = args.length > 1 ? new PrintStream(new File(args[1])) : System.out;
|
||||
InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
|
||||
, "Latin1");
|
||||
OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
|
||||
, "Latin1");
|
||||
|
||||
new TagHierarchy(inStream, outStream);
|
||||
|
||||
System.err.println("End...");
|
||||
}
|
||||
|
||||
public TagHierarchy(Reader inStream, PrintStream outStream) throws Exception
|
||||
public TagHierarchy(Reader inStream, OutputStreamWriter outStream) throws Exception
|
||||
{
|
||||
BufferedReader inFile = new BufferedReader(inStream);
|
||||
BufferedReader inFile = new BufferedReader(inStream);
|
||||
BufferedWriter outFile = new BufferedWriter(outStream);
|
||||
|
||||
// tokenise
|
||||
String inLine;
|
||||
@ -29,13 +34,16 @@ class TagHierarchy
|
||||
if (inLine.equals("null"))
|
||||
{
|
||||
nullLines++;
|
||||
outStream.println("null");
|
||||
outFile.write("null\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
OutputHierarchy2(inLine, outStream);
|
||||
OutputHierarchy2(inLine, outFile);
|
||||
}
|
||||
}
|
||||
outFile.flush();
|
||||
outFile.close();
|
||||
outFile = null;
|
||||
System.err.println(nullLines + " null lines\n");
|
||||
}
|
||||
|
||||
@ -69,7 +77,7 @@ class TagHierarchy
|
||||
outFile.write('\n');
|
||||
}
|
||||
|
||||
public void OutputHierarchy2(String inLine, PrintStream outFile) throws Exception
|
||||
public void OutputHierarchy2(String inLine, BufferedWriter outFile) throws Exception
|
||||
{
|
||||
int level = 0;
|
||||
Stack prevTags = new Stack();
|
||||
@ -96,7 +104,7 @@ class TagHierarchy
|
||||
int firstBracket = parsed.indexOf(')');
|
||||
int noBracket = parsed.length() - firstBracket;
|
||||
String word = parsed.substring(0, firstBracket);
|
||||
outFile.print(currTag + " ");
|
||||
outFile.write(currTag + " ");
|
||||
|
||||
level -= noBracket;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user