mirror of
https://github.com/moses-smt/mosesdecoder.git
synced 2024-09-20 07:42:21 +03:00
Code refactoring of scripts/server/sim-pe.py.
This commit is contained in:
parent
5c90a09d5a
commit
a79ba985e9
@ -6,76 +6,20 @@
|
||||
# updates the dynamic phrase tables in the moses server.
|
||||
|
||||
import xmlrpclib,datetime,argparse,sys,os,time
|
||||
import moses
|
||||
from moses import MosesServer
|
||||
from subprocess import *
|
||||
mserver = moses.MosesServer()
|
||||
|
||||
# We must perform some custom argument processing, as moses parameter
|
||||
# specifications do not comply with the standards used in standard
|
||||
# argument parsing packages; an isolated double dash separates script
|
||||
# arguments from moses arguments
|
||||
|
||||
MosesProcess = None
|
||||
NBestFile = None
|
||||
|
||||
def shutdown():
|
||||
if MosesProcess:
|
||||
if args.debug:
|
||||
print >>sys.stderr,"shutting down moses server"
|
||||
pass
|
||||
MosesProcess.terminate()
|
||||
pass
|
||||
return
|
||||
|
||||
def find_free_port(p):
|
||||
"""
|
||||
Find a free port, starting at /p/.
|
||||
Return the free port, or False if none found.
|
||||
"""
|
||||
ret = p
|
||||
while ret - p < 20:
|
||||
devnull = open(os.devnull,"w")
|
||||
n = Popen(["netstat","-tnp"],stdout=PIPE,stderr=devnull)
|
||||
if n.communicate()[0].find(":%d "%ret) < 0:
|
||||
return p
|
||||
ret += 1
|
||||
pass
|
||||
return False
|
||||
|
||||
def launch_moses(mo_args):
|
||||
"""
|
||||
Spawn a moses server process. Return URL of said process.
|
||||
"""
|
||||
global MosesProcess
|
||||
try:
|
||||
port_index = mo_args.index("--server-port") + 1
|
||||
except:
|
||||
mo_args.extend(["--server-port","7777"])
|
||||
port_index = len(mo_args) - 1
|
||||
pass
|
||||
port = find_free_port(int(mo_args[port_index]))
|
||||
if not port:
|
||||
print >>sys.stderr, "FATAL ERROR: No available port for moses server!"
|
||||
sys.exit(1)
|
||||
pass
|
||||
if args.debug:
|
||||
MosesProcess = Popen([args.servercmd] + mo_args)
|
||||
else:
|
||||
devnull = open(os.devnull,"w")
|
||||
MosesProcess = Popen([args.servercmd] + mo_args,
|
||||
stderr=devnull, stdout=devnull)
|
||||
if MosesProcess.poll():
|
||||
print >>sys.stderr, "FATAL ERROR: Could not launch moses server!"
|
||||
sys.exit(1)
|
||||
pass
|
||||
if args.debug:
|
||||
print >>sys.stderr,"MOSES port is %d."%port
|
||||
print >>sys.stderr,"Moses poll status is", MosesProcess.poll()
|
||||
pass
|
||||
return "http://localhost:%d"%port
|
||||
|
||||
def split_args(all_args):
|
||||
"""
|
||||
Split argument list all_args into script-specific
|
||||
and moses-specific arguments.
|
||||
Split argument list all_args into arguments specific to this script and
|
||||
arguments relating to the moses server. An isolated double dash acts as
|
||||
the separator between the two types of arguments.
|
||||
"""
|
||||
my_args = []
|
||||
mo_args = []
|
||||
@ -94,30 +38,27 @@ def split_args(all_args):
|
||||
i = 0
|
||||
while i < len(mo_args):
|
||||
if mo_args[i] == "-i" or mo_args[i] == "-input-file":
|
||||
my_args.extend(["--src",m_args[i+1]])
|
||||
my_args.extend(["-i",mo_args[i+1]])
|
||||
mo_args[i:i+2] = []
|
||||
|
||||
elif mo_args[i] == "-inputtype":
|
||||
if mo_args[i+1] != "0":
|
||||
# not yet supported! Therefore:
|
||||
errmsg = "FATAL ERROR: "
|
||||
errmsg += "%s only supports plain text input at this point."
|
||||
print >>sys.stderr,errmsg%sys.argv[0]
|
||||
sys.exit(1)
|
||||
pass
|
||||
errmsg = "FATAL ERROR: %s "%sys.argv[0]
|
||||
errmsg += "only supports plain text input at this point."
|
||||
raise Exception(errsmg)
|
||||
my_args.extend(["--input-type",mo_args[i+1]])
|
||||
mo_args[i:i+2] = []
|
||||
|
||||
elif mo_args[i] == "-lattice-samples":
|
||||
my_args.extend(["--lattice-sample",mo_args[i+2]])
|
||||
my_args.extend(["--lattice-sample-file",mo_args[i+1]])
|
||||
mo_args[i:i+3] = []
|
||||
# not yet supported! Therefore:
|
||||
errmsg = "FATAL ERROR: "
|
||||
errmsg += "%s does not yet support lattice sampling."
|
||||
print >>sys.stderr,errmsg%sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
# my_args.extend(["--lattice-sample",mo_args[i+2]])
|
||||
# my_args.extend(["--lattice-sample-file",mo_args[i+1]])
|
||||
# mo_args[i:i+3] = []
|
||||
# This is not yet supported! Therefore:
|
||||
errmsg = "FATAL ERROR: %s "%sys.argv[0]
|
||||
errmsg += "does not yet support lattice sampling."
|
||||
raise Exception(errsmg)
|
||||
|
||||
elif mo_args[i] == "-n-best-list":
|
||||
my_args.extend(["--nbest",mo_args[i+2]])
|
||||
my_args.extend(["--nbest-file",mo_args[i+1]])
|
||||
@ -139,12 +80,9 @@ def interpret_args(my_args):
|
||||
"""
|
||||
aparser = argparse.ArgumentParser()
|
||||
|
||||
# interfacing with moses
|
||||
# aparser.add_argument("-m","--moses-cmd",default="moses",dest="mosescmd",
|
||||
# help="path to standard moses command")
|
||||
aparser.add_argument("-s","--server-cmd",default="mosesserver",
|
||||
dest="servercmd", help="path to moses server command")
|
||||
aparser.add_argument("-u","--url",help="URL of external moses server.")
|
||||
aparser.add_argument("--url",help="URL of external moses server.")
|
||||
|
||||
# input / output
|
||||
aparser.add_argument("-i","--input",help="source file",default="-")
|
||||
@ -166,28 +104,40 @@ def interpret_args(my_args):
|
||||
help="size of nbest list")
|
||||
aparser.add_argument("-N","--nbest-file",dest="nbestFile",default=0,
|
||||
help="output file for nbest list")
|
||||
aparser.add_argument("-U","--nbest-distinct",type=bool,dest="U",default=False,
|
||||
aparser.add_argument("-u","--nbest-distinct",type=bool,dest="U",default=False,
|
||||
help="report all factors")
|
||||
|
||||
return aparser.parse_args(my_args)
|
||||
|
||||
def translate(proxy,args,s):
|
||||
param = {'text':s.strip()}
|
||||
if args.A: param['align'] = True
|
||||
if args.T: param['topt'] = True
|
||||
if args.F: param['report-all-factors'] = True
|
||||
def translate(proxy, args, line):
|
||||
if type(line) is unicode:
|
||||
param = { 'text' : line.strip().encode('utf8') }
|
||||
elif type(line) is str:
|
||||
param = { 'text' : line.strip().encode('utf8') }
|
||||
else:
|
||||
raise Exception("Can't handle input")
|
||||
if args.A: param['align'] = True
|
||||
if args.T: param['topt'] = True
|
||||
if args.F: param['report-all-factors'] = True
|
||||
if args.nbest:
|
||||
param['nbest'] = int(args.nbest)
|
||||
param['add-score-breakdown'] = True
|
||||
pass
|
||||
if args.U: param['nbest-distinct'] = True
|
||||
|
||||
try:
|
||||
ret = proxy.translate(param)
|
||||
except:
|
||||
return None
|
||||
if args.U:
|
||||
param['nbest-distinct'] = True
|
||||
pass
|
||||
return ret
|
||||
attempts = 0
|
||||
while attempts < 120:
|
||||
try:
|
||||
return proxy.translate(param)
|
||||
except:
|
||||
print >>sys.stderr, "Waiting", proxy
|
||||
attempts += 1
|
||||
time.sleep(5)
|
||||
pass
|
||||
pass
|
||||
raise Exception("Exception: could not reach translation server.")
|
||||
|
||||
|
||||
def read_data(fname):
|
||||
"""
|
||||
@ -205,10 +155,6 @@ def read_data(fname):
|
||||
def repack_result(id,result):
|
||||
global args
|
||||
if args.nbest:
|
||||
if not NBestFile:
|
||||
shutdown()
|
||||
assert NBestFile
|
||||
sys.exit(1)
|
||||
for h in result['nbest']:
|
||||
fields = (id,h['hyp'],h['fvals'],h['totalScore'])
|
||||
print >>NBestFile,"%d ||| %s ||| %s ||| %f"%fields
|
||||
@ -221,14 +167,14 @@ def repack_result(id,result):
|
||||
k = 0
|
||||
for a in result['align']:
|
||||
k = a['tgt-start']
|
||||
if k: print " ".join(t[i:k]),span,
|
||||
if k: print " ".join(t[i:k]).encode('utf8'),span,
|
||||
i = k
|
||||
span = "|%d %d|"%(a['src-start'],a['src-end'])
|
||||
pass
|
||||
print " ".join(t[k:]),span
|
||||
print " ".join(t[k:]).encode('utf8'),span
|
||||
pass
|
||||
else:
|
||||
print result['text']
|
||||
print result['text'].encode('utf8')
|
||||
pass
|
||||
return
|
||||
|
||||
@ -237,9 +183,12 @@ if __name__ == "__main__":
|
||||
|
||||
global args
|
||||
args = interpret_args(my_args)
|
||||
|
||||
if "-show-weights" in mo_args:
|
||||
# this is for use during tuning, where moses is called to get a list of
|
||||
# feature names
|
||||
devnull = open(os.devnull,"w")
|
||||
mo = Popen([args.servercmd] + mo_args,stdout=PIPE,stderr=devnull)
|
||||
mo = Popen(mserver.cmd + mo_args,stdout=PIPE,stderr=devnull)
|
||||
print mo.communicate()[0].strip()
|
||||
sys.exit(0)
|
||||
pass
|
||||
@ -251,16 +200,14 @@ if __name__ == "__main__":
|
||||
NBestFile = sys.stdout
|
||||
pass
|
||||
pass
|
||||
if "url" not in args or not args.url:
|
||||
url = launch_moses(mo_args)
|
||||
else:
|
||||
url = args.url
|
||||
pass
|
||||
if url[:4] != "http": url = "http://%s"%url
|
||||
if url[-5:] != "/RPC2": url += "/RPC2"
|
||||
proxy = xmlrpclib.ServerProxy(url)
|
||||
|
||||
ret = None
|
||||
if args.url:
|
||||
mserver.connect(args.url)
|
||||
else:
|
||||
mserver.start(args=mo_args,debug=args.debug)
|
||||
pass
|
||||
|
||||
ref = None
|
||||
aln = None
|
||||
if args.ref: ref = read_data(args.ref)
|
||||
if args.aln: aln = read_data(args.aln)
|
||||
@ -269,23 +216,22 @@ if __name__ == "__main__":
|
||||
line = sys.stdin.readline()
|
||||
id = 0
|
||||
while line:
|
||||
result = translate(proxy,args,line)
|
||||
result = translate(mserver.proxy,args,line)
|
||||
repack_result(id,result)
|
||||
line = sys.stdin.readline()
|
||||
id += 1
|
||||
pass
|
||||
pass
|
||||
else:
|
||||
src = read_data(args.src)
|
||||
src = read_data(args.input)
|
||||
for i in xrange(len(src)):
|
||||
result = translate(mserver.proxy,args,src[i])
|
||||
repack_result(id,result)
|
||||
if ref and aln:
|
||||
result = proxy.updater({'source' : src[i],
|
||||
'target' : ref[i],
|
||||
'alignment' : aln[i]})
|
||||
repack_result(i,result)
|
||||
result = mserver.proxy.updater({'source' : src[i],
|
||||
'target' : ref[i],
|
||||
'alignment' : aln[i]})
|
||||
pass
|
||||
pass
|
||||
pass
|
||||
pass
|
||||
|
||||
shutdown()
|
||||
|
Loading…
Reference in New Issue
Block a user