mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-10-26 19:37:58 +03:00
delete eclipse build
git-svn-id: https://mosesdecoder.svn.sourceforge.net/svnroot/mosesdecoder/trunk@3423 1f5c12ca-751b-0410-a591-d2e778427230
This commit is contained in:
parent
bdfa2beab8
commit
c73dddfb5b
@ -1,6 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" path=""/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
|
||||
<classpathentry kind="output" path=""/>
|
||||
</classpath>
|
@ -1 +0,0 @@
|
||||
*.class
|
@ -1,17 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>java-utils</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
@ -1,89 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
// create sentences with all features combined from files with individual tags
|
||||
class CombineTags
|
||||
{
|
||||
public static void main(String[] args) throws Exception
|
||||
{
|
||||
System.err.println("Starting...");
|
||||
|
||||
Vector vecInstream = new Vector();
|
||||
for (int i = 0 ; i < args.length ; i++)
|
||||
{
|
||||
InputStreamReader temp = new InputStreamReader(new FileInputStream(args[i]), "Latin1");
|
||||
BufferedReader inStream = new BufferedReader(temp);
|
||||
vecInstream.add(inStream);
|
||||
}
|
||||
OutputStreamWriter outStream = new OutputStreamWriter((OutputStream)System.out, "Latin1");
|
||||
|
||||
new CombineTags(vecInstream, outStream);
|
||||
|
||||
System.err.println("End...");
|
||||
}
|
||||
|
||||
public CombineTags(Vector vecInstream , OutputStreamWriter outStream) throws Exception
|
||||
{
|
||||
BufferedReader inFile = (BufferedReader) vecInstream.get(0);
|
||||
String inLine;
|
||||
while ((inLine = inFile.readLine()) != null)
|
||||
{
|
||||
Vector phrases = new Vector();
|
||||
|
||||
// do 1st stream
|
||||
Vector phrase = new Vector();
|
||||
StringTokenizer st = new StringTokenizer(inLine);
|
||||
while (st.hasMoreTokens())
|
||||
{
|
||||
String tag = st.nextToken();
|
||||
phrase.add(tag);
|
||||
}
|
||||
phrases.add(phrase);
|
||||
|
||||
// read other stream
|
||||
for (int i = 1 ; i < vecInstream.size() ; i++)
|
||||
{
|
||||
BufferedReader otherFile = (BufferedReader) vecInstream.get(i);
|
||||
String otherLine = otherFile.readLine();
|
||||
StringTokenizer otherSt = new StringTokenizer(otherLine);
|
||||
Vector otherPhrase = new Vector();
|
||||
|
||||
while (otherSt.hasMoreTokens())
|
||||
{
|
||||
String tag = otherSt.nextToken();
|
||||
otherPhrase.add(tag);
|
||||
}
|
||||
phrases.add(otherPhrase);
|
||||
}
|
||||
|
||||
// combine
|
||||
phrase = (Vector) phrases.get(0);
|
||||
|
||||
for (int pos = 0 ; pos < phrase.size() ; pos++)
|
||||
{
|
||||
String outLine = (String) phrase.get(pos) + "|";
|
||||
|
||||
for (int stream = 1 ; stream < phrases.size() ; stream++)
|
||||
{
|
||||
Vector otherPhrase = (Vector) phrases.get(stream);
|
||||
String otherTag;
|
||||
if (otherPhrase.size() <= pos)
|
||||
otherTag = (String) otherPhrase.get(0);
|
||||
else
|
||||
otherTag = (String) otherPhrase.get(pos);
|
||||
outLine += otherTag + "|";
|
||||
}
|
||||
outLine = outLine.substring(0, outLine.length() - 1) + " ";
|
||||
outStream.write(outLine);
|
||||
}
|
||||
outStream.write("\n");
|
||||
}
|
||||
// close stream
|
||||
outStream.flush();
|
||||
outStream.close();
|
||||
outStream = null;
|
||||
}
|
||||
}
|
||||
|
@ -1,82 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
//input is the sentences with all features combined
|
||||
//output sentences combination of morphology, lopar tags and parsed tags
|
||||
// used to create generation table
|
||||
public class ProcessShallowParse
|
||||
{
|
||||
public static void main(String[] args) throws Exception
|
||||
{
|
||||
System.err.println("Starting...");
|
||||
|
||||
InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
|
||||
, "Latin1");
|
||||
OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
|
||||
, "Latin1");
|
||||
|
||||
new ProcessShallowParse2(inStream, outStream);
|
||||
|
||||
System.err.println("End...");
|
||||
}
|
||||
}
|
||||
|
||||
class ProcessShallowParse2
|
||||
{ // factored sentence
|
||||
|
||||
public ProcessShallowParse2(Reader inStream, Writer outStream) throws Exception
|
||||
{
|
||||
BufferedReader inFile = new BufferedReader(inStream);
|
||||
BufferedWriter outFile = new BufferedWriter(outStream);
|
||||
|
||||
// tokenise
|
||||
String inLine;
|
||||
int i = 1;
|
||||
while ((inLine = inFile.readLine()) != null)
|
||||
{
|
||||
StringTokenizer st = new StringTokenizer(inLine);
|
||||
String ret = "";
|
||||
while (st.hasMoreTokens())
|
||||
{
|
||||
String factoredWord = st.nextToken();
|
||||
ret += Output(factoredWord);
|
||||
}
|
||||
outFile.write(ret + "\n");
|
||||
i++;
|
||||
}
|
||||
outFile.flush();
|
||||
outFile.close();
|
||||
outFile = null;
|
||||
System.err.print("no of lines = " + i);
|
||||
}
|
||||
|
||||
protected String Output(String factoredWord) throws Exception
|
||||
{
|
||||
StringTokenizer st = new StringTokenizer(factoredWord, "|");
|
||||
|
||||
String surface = st.nextToken();
|
||||
String posNormal = st.nextToken();
|
||||
String morph = st.nextToken();
|
||||
String posImproved = st.nextToken();
|
||||
String ret = "";
|
||||
|
||||
if (posImproved.equals("ART-SB")
|
||||
|| posImproved.equals("NN-NK_NP-SB"))
|
||||
{
|
||||
ret = posImproved + "_" + morph + " ";
|
||||
}
|
||||
else if (posImproved.equals("???"))
|
||||
{
|
||||
ret = "??? ";
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = surface + " ";
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
}
|
@ -1,48 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
//used to create language model
|
||||
public class ShrinkSentence
|
||||
{
|
||||
public static void main(String[] args) throws Exception
|
||||
{
|
||||
System.err.println("Starting...");
|
||||
|
||||
InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
|
||||
, "Latin1");
|
||||
OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
|
||||
, "Latin1");
|
||||
|
||||
new ShrinkSentence(inStream, outStream);
|
||||
|
||||
System.err.println("End...");
|
||||
}
|
||||
|
||||
public ShrinkSentence(Reader inStream, Writer outStream) throws Exception
|
||||
{
|
||||
BufferedReader inFile = new BufferedReader(inStream);
|
||||
BufferedWriter outFile = new BufferedWriter(outStream);
|
||||
|
||||
// tokenise
|
||||
String inLine;
|
||||
int i = 1;
|
||||
while ((inLine = inFile.readLine()) != null)
|
||||
{
|
||||
StringTokenizer st = new StringTokenizer(inLine);
|
||||
while (st.hasMoreTokens())
|
||||
{
|
||||
String word = st.nextToken();
|
||||
if (!word.equals("???"))
|
||||
outFile.write(word + " ");
|
||||
}
|
||||
outFile.write("\n");
|
||||
i++;
|
||||
}
|
||||
outFile.flush();
|
||||
outFile.close();
|
||||
outFile = null;
|
||||
System.err.print("no of lines = " + i);
|
||||
}
|
||||
}
|
@ -1,135 +0,0 @@
|
||||
// $Id$
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
// create pos-tag sentences from LISP-like input tree.
|
||||
// NN-NK tag augmented with NP-SP if parent is NP-SB
|
||||
class TagHierarchy
|
||||
{
|
||||
public static void main(String[] args) throws Exception
|
||||
{
|
||||
System.err.println("Starting...");
|
||||
|
||||
InputStreamReader inStream = new InputStreamReader(args.length > 0 ? new FileInputStream(args[0]) : System.in
|
||||
, "Latin1");
|
||||
OutputStreamWriter outStream = new OutputStreamWriter(args.length > 1 ? new FileOutputStream(args[1]) : (OutputStream) System.out
|
||||
, "Latin1");
|
||||
|
||||
new TagHierarchy(inStream, outStream);
|
||||
|
||||
System.err.println("End...");
|
||||
}
|
||||
|
||||
public TagHierarchy(Reader inStream, OutputStreamWriter outStream) throws Exception
|
||||
{
|
||||
BufferedReader inFile = new BufferedReader(inStream);
|
||||
BufferedWriter outFile = new BufferedWriter(outStream);
|
||||
|
||||
// tokenise
|
||||
String inLine;
|
||||
int nullLines = 0;
|
||||
while ((inLine = inFile.readLine()) != null)
|
||||
{
|
||||
if (inLine.equals("null"))
|
||||
{
|
||||
nullLines++;
|
||||
outFile.write("null\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
OutputHierarchy2(inLine, outFile);
|
||||
}
|
||||
}
|
||||
outFile.flush();
|
||||
outFile.close();
|
||||
outFile = null;
|
||||
System.err.println(nullLines + " null lines\n");
|
||||
}
|
||||
|
||||
// indent parsed tree to make it easier to look at
|
||||
public void OutputHierarchy(String inLine, BufferedWriter outFile) throws Exception
|
||||
{
|
||||
int level = 0;
|
||||
StringTokenizer st = new StringTokenizer(inLine);
|
||||
while (st.hasMoreTokens())
|
||||
{
|
||||
String parsed = st.nextToken();
|
||||
if (parsed.substring(0, 1).compareTo("(") == 0)
|
||||
{ // start of new node
|
||||
outFile.write('\n');
|
||||
for (int currLevel = 0 ; currLevel < level ; currLevel++)
|
||||
{
|
||||
outFile.write(' ');
|
||||
}
|
||||
String tag = parsed.substring(1, parsed.length());
|
||||
outFile.write(tag);
|
||||
level++;
|
||||
}
|
||||
else
|
||||
{ // closing nodes
|
||||
int firstBracket = parsed.indexOf(')');
|
||||
int noBracket = parsed.length() - firstBracket;
|
||||
String tag = parsed.substring(0, firstBracket);
|
||||
outFile.write(" == " + tag);
|
||||
level -= noBracket;
|
||||
}
|
||||
}
|
||||
outFile.write('\n');
|
||||
}
|
||||
|
||||
public void OutputHierarchy2(String inLine, BufferedWriter outFile) throws Exception
|
||||
{
|
||||
int level = 0;
|
||||
Stack prevTags = new Stack();
|
||||
|
||||
StringTokenizer st = new StringTokenizer(inLine);
|
||||
|
||||
while (st.hasMoreTokens())
|
||||
{
|
||||
String parsed = st.nextToken();
|
||||
if (parsed.substring(0, 1).compareTo("(") == 0)
|
||||
{ // start of new node
|
||||
String tag = parsed.substring(1, parsed.length());
|
||||
prevTags.push(tag);
|
||||
level++;
|
||||
}
|
||||
else
|
||||
{ // closing nodes
|
||||
|
||||
String parentTag = (String) prevTags.get(prevTags.size() - 2)
|
||||
, currTag = (String) prevTags.get(prevTags.size() - 1);
|
||||
if (currTag.equals("NN-NK") && parentTag.equals("NP-SB"))
|
||||
currTag += "_" + parentTag;
|
||||
|
||||
int firstBracket = parsed.indexOf(')');
|
||||
int noBracket = parsed.length() - firstBracket;
|
||||
String word = parsed.substring(0, firstBracket);
|
||||
|
||||
if (currTag.equals("ART-SB")
|
||||
|| currTag.equals("NN-NK_NP-SB")
|
||||
|| currTag.equals("VAFIN-HD")
|
||||
|| currTag.equals("VVFIN-HD")
|
||||
|| currTag.equals("VMFIN-HD")
|
||||
|| currTag.equals("PPER-SB")
|
||||
|| currTag.equals("PRELS-SB")
|
||||
|| currTag.equals("PDS-SB")
|
||||
|| currTag.equals("PPER-PH")
|
||||
|| currTag.equals("PPER-EP")
|
||||
)
|
||||
outFile.write(currTag + " ");
|
||||
else
|
||||
outFile.write("??? ");
|
||||
|
||||
level -= noBracket;
|
||||
|
||||
// pop the rest
|
||||
for (int i = 0 ; i < noBracket ; ++i)
|
||||
{
|
||||
prevTags.pop();
|
||||
}
|
||||
}
|
||||
}
|
||||
outFile.write('\n');
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user