2013-04-22 15:21:59 +04:00
# -*- coding: utf-8 -*-
#
# Sample python client. Additionally to basic functionality, shows how translation model weights can be provided to multimodel phrase table type,
# and how translation model weights can be optimized on tuning set of phrase pairs.
# translate_concurrent() shows how to use multiple moses server threads.
#
import sys
import gzip
from multiprocessing import Pool
if sys . version_info < ( 3 , 0 ) :
import xmlrpclib
else :
import xmlrpc . client as xmlrpclib
2014-01-02 16:38:39 +04:00
def translate ( input_object , server , weights = None , model_name = None ) :
2014-01-02 15:10:59 +04:00
""" translate each sentence in an input_object (list, file-like object or other object that iterates over sentences)
server is a xmlrpclib . ServerProxy
2014-01-02 16:38:39 +04:00
model_name is the name of the PhraseDictionaryMultiModel ( Counts ) feature function that the weights should be applied to . It is defined in the moses . ini
2014-01-02 15:10:59 +04:00
weights is a list of floats ( one float per model , or one float per model per feature )
"""
2013-04-22 15:21:59 +04:00
for line in input_object :
params = { }
params [ ' text ' ] = line
if weights :
2014-01-02 16:38:39 +04:00
if not model_name :
sys . stderr . write ( " Error: if you define weights, you need to specify the feature to which the weights are to be applied (e.g. PhraseDictionaryMultiModel0) \n " )
sys . exit ( 1 )
params [ ' model_name ' ] = model_name
2013-07-30 19:02:34 +04:00
params [ ' lambda ' ] = weights
2013-04-22 15:21:59 +04:00
print server . translate ( params )
2013-07-30 15:54:50 +04:00
def optimize ( phrase_pairs , server , model_name ) :
2013-04-22 15:21:59 +04:00
params = { }
params [ ' phrase_pairs ' ] = phrase_pairs
2013-07-30 15:54:50 +04:00
params [ ' model_name ' ] = model_name
2013-04-22 15:21:59 +04:00
weights = server . optimize ( params )
2013-05-16 16:11:03 +04:00
sys . stderr . write ( ' weight vector (set lambda in moses.ini to this value to set as default): ' )
sys . stderr . write ( ' , ' . join ( map ( str , weights ) ) + ' \n ' )
2013-04-22 15:21:59 +04:00
return weights
def read_phrase_pairs ( input_object ) :
pairs = [ ]
for line in input_object :
line = line . split ( ' ||| ' )
pairs . append ( ( line [ 0 ] , line [ 1 ] ) )
return pairs
#same functionality as translate(), but using multiple concurrent connections to server
def translate_concurrent ( input_object , url , weights = None , num_processes = 8 ) :
pool = Pool ( processes = num_processes )
text_args = [ ( line , weights , url ) for line in input_object ]
for translated_line in pool . imap ( translate_single_line , text_args ) :
print translated_line
def translate_single_line ( args ) :
line , weights , url = args
server = xmlrpclib . ServerProxy ( url )
params = { }
params [ ' text ' ] = line
if weights :
2013-07-30 19:02:34 +04:00
params [ ' lambda ' ] = weights
2013-04-22 15:21:59 +04:00
return server . translate ( params ) [ ' text ' ]
if __name__ == ' __main__ ' :
url = " http://localhost:8111/RPC2 "
server = xmlrpclib . ServerProxy ( url )
phrase_pairs = read_phrase_pairs ( gzip . open ( ' /path/to/moses-regression-tests/models/multimodel/extract.sorted.gz ' ) )
2013-07-30 15:54:50 +04:00
weights = optimize ( phrase_pairs , server , ' PhraseDictionaryMultiModelCounts0 ' )
2013-04-22 15:21:59 +04:00
2014-01-02 16:38:39 +04:00
translate ( sys . stdin , server , weights , ' PhraseDictionaryMultiModelCounts0 ' )