Code refactoring of scripts/server/sim-pe.py.

This commit is contained in:
Ulrich Germann 2014-05-18 21:48:17 +01:00
parent 5c90a09d5a
commit a79ba985e9

View File

@ -6,76 +6,20 @@
# updates the dynamic phrase tables in the moses server.
import xmlrpclib,datetime,argparse,sys,os,time
import moses
from moses import MosesServer
from subprocess import *
mserver = moses.MosesServer()
# We must perform some custom argument processing, as moses parameter
# specifications do not comply with the standards used in standard
# argument parsing packages; an isolated double dash separates script
# arguments from moses arguments
MosesProcess = None
NBestFile = None
def shutdown():
if MosesProcess:
if args.debug:
print >>sys.stderr,"shutting down moses server"
pass
MosesProcess.terminate()
pass
return
def find_free_port(p):
"""
Find a free port, starting at /p/.
Return the free port, or False if none found.
"""
ret = p
while ret - p < 20:
devnull = open(os.devnull,"w")
n = Popen(["netstat","-tnp"],stdout=PIPE,stderr=devnull)
if n.communicate()[0].find(":%d "%ret) < 0:
return p
ret += 1
pass
return False
def launch_moses(mo_args):
"""
Spawn a moses server process. Return URL of said process.
"""
global MosesProcess
try:
port_index = mo_args.index("--server-port") + 1
except:
mo_args.extend(["--server-port","7777"])
port_index = len(mo_args) - 1
pass
port = find_free_port(int(mo_args[port_index]))
if not port:
print >>sys.stderr, "FATAL ERROR: No available port for moses server!"
sys.exit(1)
pass
if args.debug:
MosesProcess = Popen([args.servercmd] + mo_args)
else:
devnull = open(os.devnull,"w")
MosesProcess = Popen([args.servercmd] + mo_args,
stderr=devnull, stdout=devnull)
if MosesProcess.poll():
print >>sys.stderr, "FATAL ERROR: Could not launch moses server!"
sys.exit(1)
pass
if args.debug:
print >>sys.stderr,"MOSES port is %d."%port
print >>sys.stderr,"Moses poll status is", MosesProcess.poll()
pass
return "http://localhost:%d"%port
def split_args(all_args):
"""
Split argument list all_args into script-specific
and moses-specific arguments.
Split argument list all_args into arguments specific to this script and
arguments relating to the moses server. An isolated double dash acts as
the separator between the two types of arguments.
"""
my_args = []
mo_args = []
@ -94,30 +38,27 @@ def split_args(all_args):
i = 0
while i < len(mo_args):
if mo_args[i] == "-i" or mo_args[i] == "-input-file":
my_args.extend(["--src",m_args[i+1]])
my_args.extend(["-i",mo_args[i+1]])
mo_args[i:i+2] = []
elif mo_args[i] == "-inputtype":
if mo_args[i+1] != "0":
# not yet supported! Therefore:
errmsg = "FATAL ERROR: "
errmsg += "%s only supports plain text input at this point."
print >>sys.stderr,errmsg%sys.argv[0]
sys.exit(1)
pass
errmsg = "FATAL ERROR: %s "%sys.argv[0]
errmsg += "only supports plain text input at this point."
raise Exception(errsmg)
my_args.extend(["--input-type",mo_args[i+1]])
mo_args[i:i+2] = []
elif mo_args[i] == "-lattice-samples":
my_args.extend(["--lattice-sample",mo_args[i+2]])
my_args.extend(["--lattice-sample-file",mo_args[i+1]])
mo_args[i:i+3] = []
# not yet supported! Therefore:
errmsg = "FATAL ERROR: "
errmsg += "%s does not yet support lattice sampling."
print >>sys.stderr,errmsg%sys.argv[0]
sys.exit(1)
# my_args.extend(["--lattice-sample",mo_args[i+2]])
# my_args.extend(["--lattice-sample-file",mo_args[i+1]])
# mo_args[i:i+3] = []
# This is not yet supported! Therefore:
errmsg = "FATAL ERROR: %s "%sys.argv[0]
errmsg += "does not yet support lattice sampling."
raise Exception(errsmg)
elif mo_args[i] == "-n-best-list":
my_args.extend(["--nbest",mo_args[i+2]])
my_args.extend(["--nbest-file",mo_args[i+1]])
@ -139,12 +80,9 @@ def interpret_args(my_args):
"""
aparser = argparse.ArgumentParser()
# interfacing with moses
# aparser.add_argument("-m","--moses-cmd",default="moses",dest="mosescmd",
# help="path to standard moses command")
aparser.add_argument("-s","--server-cmd",default="mosesserver",
dest="servercmd", help="path to moses server command")
aparser.add_argument("-u","--url",help="URL of external moses server.")
aparser.add_argument("--url",help="URL of external moses server.")
# input / output
aparser.add_argument("-i","--input",help="source file",default="-")
@ -166,28 +104,40 @@ def interpret_args(my_args):
help="size of nbest list")
aparser.add_argument("-N","--nbest-file",dest="nbestFile",default=0,
help="output file for nbest list")
aparser.add_argument("-U","--nbest-distinct",type=bool,dest="U",default=False,
aparser.add_argument("-u","--nbest-distinct",type=bool,dest="U",default=False,
help="report all factors")
return aparser.parse_args(my_args)
def translate(proxy,args,s):
param = {'text':s.strip()}
if args.A: param['align'] = True
if args.T: param['topt'] = True
if args.F: param['report-all-factors'] = True
def translate(proxy, args, line):
if type(line) is unicode:
param = { 'text' : line.strip().encode('utf8') }
elif type(line) is str:
param = { 'text' : line.strip().encode('utf8') }
else:
raise Exception("Can't handle input")
if args.A: param['align'] = True
if args.T: param['topt'] = True
if args.F: param['report-all-factors'] = True
if args.nbest:
param['nbest'] = int(args.nbest)
param['add-score-breakdown'] = True
pass
if args.U: param['nbest-distinct'] = True
try:
ret = proxy.translate(param)
except:
return None
if args.U:
param['nbest-distinct'] = True
pass
return ret
attempts = 0
while attempts < 120:
try:
return proxy.translate(param)
except:
print >>sys.stderr, "Waiting", proxy
attempts += 1
time.sleep(5)
pass
pass
raise Exception("Exception: could not reach translation server.")
def read_data(fname):
"""
@ -205,10 +155,6 @@ def read_data(fname):
def repack_result(id,result):
global args
if args.nbest:
if not NBestFile:
shutdown()
assert NBestFile
sys.exit(1)
for h in result['nbest']:
fields = (id,h['hyp'],h['fvals'],h['totalScore'])
print >>NBestFile,"%d ||| %s ||| %s ||| %f"%fields
@ -221,14 +167,14 @@ def repack_result(id,result):
k = 0
for a in result['align']:
k = a['tgt-start']
if k: print " ".join(t[i:k]),span,
if k: print " ".join(t[i:k]).encode('utf8'),span,
i = k
span = "|%d %d|"%(a['src-start'],a['src-end'])
pass
print " ".join(t[k:]),span
print " ".join(t[k:]).encode('utf8'),span
pass
else:
print result['text']
print result['text'].encode('utf8')
pass
return
@ -237,9 +183,12 @@ if __name__ == "__main__":
global args
args = interpret_args(my_args)
if "-show-weights" in mo_args:
# this is for use during tuning, where moses is called to get a list of
# feature names
devnull = open(os.devnull,"w")
mo = Popen([args.servercmd] + mo_args,stdout=PIPE,stderr=devnull)
mo = Popen(mserver.cmd + mo_args,stdout=PIPE,stderr=devnull)
print mo.communicate()[0].strip()
sys.exit(0)
pass
@ -251,16 +200,14 @@ if __name__ == "__main__":
NBestFile = sys.stdout
pass
pass
if "url" not in args or not args.url:
url = launch_moses(mo_args)
else:
url = args.url
pass
if url[:4] != "http": url = "http://%s"%url
if url[-5:] != "/RPC2": url += "/RPC2"
proxy = xmlrpclib.ServerProxy(url)
ret = None
if args.url:
mserver.connect(args.url)
else:
mserver.start(args=mo_args,debug=args.debug)
pass
ref = None
aln = None
if args.ref: ref = read_data(args.ref)
if args.aln: aln = read_data(args.aln)
@ -269,23 +216,22 @@ if __name__ == "__main__":
line = sys.stdin.readline()
id = 0
while line:
result = translate(proxy,args,line)
result = translate(mserver.proxy,args,line)
repack_result(id,result)
line = sys.stdin.readline()
id += 1
pass
pass
else:
src = read_data(args.src)
src = read_data(args.input)
for i in xrange(len(src)):
result = translate(mserver.proxy,args,src[i])
repack_result(id,result)
if ref and aln:
result = proxy.updater({'source' : src[i],
'target' : ref[i],
'alignment' : aln[i]})
repack_result(i,result)
result = mserver.proxy.updater({'source' : src[i],
'target' : ref[i],
'alignment' : aln[i]})
pass
pass
pass
pass
shutdown()