mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
Initial check-in of moses.py
This commit is contained in:
parent
b136e292af
commit
5c90a09d5a
228
scripts/server/moses.py
Normal file
228
scripts/server/moses.py
Normal file
@ -0,0 +1,228 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Python utilities for moses
|
||||
#
|
||||
# This package mostly wraps standard Moses utilities into pipes.
|
||||
#
|
||||
# Written by Ulrich Germann
|
||||
#
|
||||
# This package borrows from scripts written by Christian Buck
|
||||
#
|
||||
# The package assumes that there is a complete moses installation
|
||||
# (including scripts) under one root directory,
|
||||
# e.g., via
|
||||
# bjam --with-xmlrpc-c=... [...] --install-scripts --prefix=${HOME}/moses
|
||||
# By default, this root directory is "${HOME}/moses".
|
||||
|
||||
import xmlrpclib,datetime,argparse,time,os,sys
|
||||
from subprocess import *
|
||||
from unicodedata import normalize
|
||||
|
||||
moses_root = os.environ.get('MOSES_ROOT',os.environ.get('HOME')+"/moses")
|
||||
|
||||
class ProcessWrapper:
|
||||
|
||||
def __init__(self,cmd=[]):
|
||||
self.process = None
|
||||
self.cmd = cmd
|
||||
return
|
||||
|
||||
def start(self, stdin=PIPE, stdout=PIPE):
|
||||
if self.process:
|
||||
raise Exception("Process is already running")
|
||||
self.process = Popen(cmd, stdin = stdin, stdout = stdout)
|
||||
return
|
||||
|
||||
def __del__(self):
|
||||
if self.process:
|
||||
self.process.terminate()
|
||||
pass
|
||||
return
|
||||
pass
|
||||
|
||||
class LineProcessor(ProcessWrapper):
|
||||
|
||||
def __call__(self,input):
|
||||
if not self.process: self.start()
|
||||
self.process.stdin.write("%s\n"%input.strip())
|
||||
self.process.stdin.flush()
|
||||
return self.process.stdout.readline().strip()
|
||||
pass
|
||||
|
||||
class SentenceSplitter(ProcessWrapper):
|
||||
"""
|
||||
Wrapper for standard Moses sentence splitter
|
||||
"""
|
||||
def __init__(self,lang):
|
||||
ssplit_cmd = moses_root+"/scripts/ems/support/split-sentences.perl"
|
||||
self.cmd = [ssplit_cmd, "-b", "-q", "-l",lang]
|
||||
return
|
||||
|
||||
def __call__(self,input):
|
||||
if not self.process:
|
||||
self.start()
|
||||
pass
|
||||
self.process.stdin.write(input.strip() + "\n<P>\n")
|
||||
self.process.stdin.flush()
|
||||
x = self.process.stdout.readline().strip()
|
||||
ret = []
|
||||
while x != '<P>' and x != '':
|
||||
ret.append(x)
|
||||
x = self.process.stdout.readline().strip()
|
||||
pass
|
||||
return ret
|
||||
|
||||
class Pretokenizer(LineProcessor):
|
||||
"""
|
||||
Pretokenizer wrapper; the pretokenizer fixes known issues with the input.
|
||||
"""
|
||||
def __init__(self,lang):
|
||||
pretok_cmd = moses_root+"/scripts/tokenizer/pre-tokenizer.perl"
|
||||
self.cmd = [pretok_cmd,"-b", "-q", "-l",lang]
|
||||
self.process = None
|
||||
return
|
||||
pass
|
||||
|
||||
class Tokenizer(LineProcessor):
|
||||
"""
|
||||
Tokenizer wrapper; the pretokenizer fixes known issues with the input.
|
||||
"""
|
||||
def __init__(self,lang,args=["-a","-no-escape"]):
|
||||
tok_cmd = moses_root+"/scripts/tokenizer/tokenizer.perl"
|
||||
self.cmd = [tok_cmd,"-b", "-q", "-l", lang] + args
|
||||
return
|
||||
|
||||
class TrueCaser(LineProcessor):
|
||||
"""
|
||||
Truecaser wrapper.
|
||||
"""
|
||||
def __init__(self,model):
|
||||
trucase_cmd = moses_root+"/scripts/recaser/truecase.perl"
|
||||
self.cmd = [truecase_cmd,"-b", "--model",model]
|
||||
return
|
||||
pass
|
||||
|
||||
class LineProcessorPipeline:
|
||||
"""
|
||||
Line processor: one line in, one line out
|
||||
"""
|
||||
def __init__(self,parts=[]):
|
||||
self.chain = [LineProcessor(p.cmd) for p in parts]
|
||||
return
|
||||
|
||||
def start(self):
|
||||
if len(self.chain) == 0:
|
||||
return
|
||||
if self.chain[0].process:
|
||||
return
|
||||
self.chain[0].start()
|
||||
for i in xrange(1,len(self.chain)):
|
||||
self.chain[i].start(stdin = self.chain[i-1].process.stdout)
|
||||
pass
|
||||
return
|
||||
|
||||
def __call__(self,input):
|
||||
if len(self.chain) == 0:
|
||||
return input
|
||||
self.start()
|
||||
self.chain[0].process.stdin.write("%s\n"%input.strip())
|
||||
self.chain[0].process.stdin.flush()
|
||||
return self.chain[0].process.stdout.readline().strip()
|
||||
|
||||
pass
|
||||
|
||||
def find_free_port(p):
|
||||
"""
|
||||
Find a free port, starting at /p/.
|
||||
Return the free port, or False if none found.
|
||||
"""
|
||||
ret = p
|
||||
while ret - p < 20:
|
||||
devnull = open(os.devnull,"w")
|
||||
n = Popen(["netstat","-tnp"],stdout=PIPE,stderr=devnull)
|
||||
if n.communicate()[0].find(":%d "%ret) < 0:
|
||||
return p
|
||||
ret += 1
|
||||
pass
|
||||
return False
|
||||
|
||||
class MosesServer(ProcessWrapper):
|
||||
|
||||
def __init__(self,args=["-fd", "\n"]):
|
||||
self.process = None
|
||||
mserver_cmd = moses_root+"/bin/mosesserver"
|
||||
self.cmd = [mserver_cmd] + args
|
||||
self.url = None
|
||||
self.proxy = None
|
||||
return
|
||||
|
||||
def start(self,config=None,args=[],port=7447,debug=False):
|
||||
self.cmd.extend(args)
|
||||
if config:
|
||||
if "-f" in args:
|
||||
raise Exception("Config file specified twice")
|
||||
else:
|
||||
self.cmd.extend(["-f",config])
|
||||
pass
|
||||
pass
|
||||
self.port = find_free_port(port)
|
||||
if not self.port:
|
||||
raise Excpetion("Cannot find free port for moses server!")
|
||||
self.cmd.extend(["--server-port", "%d"%self.port])
|
||||
if debug:
|
||||
print >>sys.stderr,self.cmd
|
||||
self.process = Popen(self.cmd)
|
||||
else:
|
||||
devnull = open(os.devnull,"w")
|
||||
self.process = Popen(self.cmd, stderr=devnull, stdout=devnull)
|
||||
pass
|
||||
|
||||
if self.process.poll():
|
||||
raise Exception("FATAL ERROR: Could not launch moses server!")
|
||||
if debug:
|
||||
print >>sys.stderr,"MOSES port is %d."%self.port
|
||||
print >>sys.stderr,"Moses poll status is", self.process.poll()
|
||||
pass
|
||||
|
||||
self.url = "http://localhost:%d/RPC2"%self.port
|
||||
self.connect(self.url)
|
||||
|
||||
return True
|
||||
|
||||
def connect(self,url):
|
||||
if url[:4] != "http": url = "http://%s"%url
|
||||
if url[-5:] != "/RPC2": url += "/RPC2"
|
||||
self.url = url
|
||||
self.proxy = xmlrpclib.ServerProxy(self.url)
|
||||
return
|
||||
|
||||
def translate(self,input):
|
||||
attempts = 0
|
||||
while attempts < 100:
|
||||
try:
|
||||
if type(input) is unicode:
|
||||
# if the server does not expect unicode, provide a
|
||||
# properly encoded string!
|
||||
param = {'text': input.strip().encode('utf8')}
|
||||
return self.proxy.translate(param)['text'].decode('utf8')
|
||||
|
||||
elif type(input) is str:
|
||||
param = {'text': input.strip()}
|
||||
return self.proxy.translate(param)['text']
|
||||
|
||||
elif type(input) is list:
|
||||
return [self.translate(x) for x in input]
|
||||
elif type(input) is dict:
|
||||
return self.proxy.translate(input)
|
||||
else:
|
||||
raise Exception("Can't handle input of this type!")
|
||||
except:
|
||||
attempts += 1
|
||||
print >>sys.stderr, "WAITING", attempts
|
||||
time.sleep(1)
|
||||
pass
|
||||
pass
|
||||
raise Exception("Translation request failed")
|
||||
pass
|
||||
|
Loading…
Reference in New Issue
Block a user