From 5c90a09d5a41c61c8cfe0405cc69f92df5fc37a4 Mon Sep 17 00:00:00 2001 From: Ulrich Germann Date: Sun, 18 May 2014 21:45:42 +0100 Subject: [PATCH] Initial check-in of moses.py --- scripts/server/moses.py | 228 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 228 insertions(+) create mode 100644 scripts/server/moses.py diff --git a/scripts/server/moses.py b/scripts/server/moses.py new file mode 100644 index 000000000..e87e48017 --- /dev/null +++ b/scripts/server/moses.py @@ -0,0 +1,228 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +# Python utilities for moses +# +# This package mostly wraps standard Moses utilities into pipes. +# +# Written by Ulrich Germann +# +# This package borrows from scripts written by Christian Buck +# +# The package assumes that there is a complete moses installation +# (including scripts) under one root directory, +# e.g., via +# bjam --with-xmlrpc-c=... [...] --install-scripts --prefix=${HOME}/moses +# By default, this root directory is "${HOME}/moses". + +import xmlrpclib,datetime,argparse,time,os,sys +from subprocess import * +from unicodedata import normalize + +moses_root = os.environ.get('MOSES_ROOT',os.environ.get('HOME')+"/moses") + +class ProcessWrapper: + + def __init__(self,cmd=[]): + self.process = None + self.cmd = cmd + return + + def start(self, stdin=PIPE, stdout=PIPE): + if self.process: + raise Exception("Process is already running") + self.process = Popen(cmd, stdin = stdin, stdout = stdout) + return + + def __del__(self): + if self.process: + self.process.terminate() + pass + return + pass + +class LineProcessor(ProcessWrapper): + + def __call__(self,input): + if not self.process: self.start() + self.process.stdin.write("%s\n"%input.strip()) + self.process.stdin.flush() + return self.process.stdout.readline().strip() + pass + +class SentenceSplitter(ProcessWrapper): + """ + Wrapper for standard Moses sentence splitter + """ + def __init__(self,lang): + ssplit_cmd = moses_root+"/scripts/ems/support/split-sentences.perl" + self.cmd = [ssplit_cmd, "-b", "-q", "-l",lang] + return + + def __call__(self,input): + if not self.process: + self.start() + pass + self.process.stdin.write(input.strip() + "\n

\n") + self.process.stdin.flush() + x = self.process.stdout.readline().strip() + ret = [] + while x != '

' and x != '': + ret.append(x) + x = self.process.stdout.readline().strip() + pass + return ret + +class Pretokenizer(LineProcessor): + """ + Pretokenizer wrapper; the pretokenizer fixes known issues with the input. + """ + def __init__(self,lang): + pretok_cmd = moses_root+"/scripts/tokenizer/pre-tokenizer.perl" + self.cmd = [pretok_cmd,"-b", "-q", "-l",lang] + self.process = None + return + pass + +class Tokenizer(LineProcessor): + """ + Tokenizer wrapper; the pretokenizer fixes known issues with the input. + """ + def __init__(self,lang,args=["-a","-no-escape"]): + tok_cmd = moses_root+"/scripts/tokenizer/tokenizer.perl" + self.cmd = [tok_cmd,"-b", "-q", "-l", lang] + args + return + +class TrueCaser(LineProcessor): + """ + Truecaser wrapper. + """ + def __init__(self,model): + trucase_cmd = moses_root+"/scripts/recaser/truecase.perl" + self.cmd = [truecase_cmd,"-b", "--model",model] + return + pass + +class LineProcessorPipeline: + """ + Line processor: one line in, one line out + """ + def __init__(self,parts=[]): + self.chain = [LineProcessor(p.cmd) for p in parts] + return + + def start(self): + if len(self.chain) == 0: + return + if self.chain[0].process: + return + self.chain[0].start() + for i in xrange(1,len(self.chain)): + self.chain[i].start(stdin = self.chain[i-1].process.stdout) + pass + return + + def __call__(self,input): + if len(self.chain) == 0: + return input + self.start() + self.chain[0].process.stdin.write("%s\n"%input.strip()) + self.chain[0].process.stdin.flush() + return self.chain[0].process.stdout.readline().strip() + + pass + +def find_free_port(p): + """ + Find a free port, starting at /p/. + Return the free port, or False if none found. + """ + ret = p + while ret - p < 20: + devnull = open(os.devnull,"w") + n = Popen(["netstat","-tnp"],stdout=PIPE,stderr=devnull) + if n.communicate()[0].find(":%d "%ret) < 0: + return p + ret += 1 + pass + return False + +class MosesServer(ProcessWrapper): + + def __init__(self,args=["-fd", "\n"]): + self.process = None + mserver_cmd = moses_root+"/bin/mosesserver" + self.cmd = [mserver_cmd] + args + self.url = None + self.proxy = None + return + + def start(self,config=None,args=[],port=7447,debug=False): + self.cmd.extend(args) + if config: + if "-f" in args: + raise Exception("Config file specified twice") + else: + self.cmd.extend(["-f",config]) + pass + pass + self.port = find_free_port(port) + if not self.port: + raise Excpetion("Cannot find free port for moses server!") + self.cmd.extend(["--server-port", "%d"%self.port]) + if debug: + print >>sys.stderr,self.cmd + self.process = Popen(self.cmd) + else: + devnull = open(os.devnull,"w") + self.process = Popen(self.cmd, stderr=devnull, stdout=devnull) + pass + + if self.process.poll(): + raise Exception("FATAL ERROR: Could not launch moses server!") + if debug: + print >>sys.stderr,"MOSES port is %d."%self.port + print >>sys.stderr,"Moses poll status is", self.process.poll() + pass + + self.url = "http://localhost:%d/RPC2"%self.port + self.connect(self.url) + + return True + + def connect(self,url): + if url[:4] != "http": url = "http://%s"%url + if url[-5:] != "/RPC2": url += "/RPC2" + self.url = url + self.proxy = xmlrpclib.ServerProxy(self.url) + return + + def translate(self,input): + attempts = 0 + while attempts < 100: + try: + if type(input) is unicode: + # if the server does not expect unicode, provide a + # properly encoded string! + param = {'text': input.strip().encode('utf8')} + return self.proxy.translate(param)['text'].decode('utf8') + + elif type(input) is str: + param = {'text': input.strip()} + return self.proxy.translate(param)['text'] + + elif type(input) is list: + return [self.translate(x) for x in input] + elif type(input) is dict: + return self.proxy.translate(input) + else: + raise Exception("Can't handle input of this type!") + except: + attempts += 1 + print >>sys.stderr, "WAITING", attempts + time.sleep(1) + pass + pass + raise Exception("Translation request failed") + pass +