From d2bc6a2584f18c7037bea65df8566e21ee53c190 Mon Sep 17 00:00:00 2001 From: Nadir Durrani Date: Thu, 4 Jul 2013 19:58:19 +0100 Subject: [PATCH] In EMS --- scripts/OSM/OSM-Train | Bin 26391 -> 0 bytes scripts/OSM/OSM-Train.sh | 45 +++++++++++++++--------------- scripts/ems/experiment.meta | 6 ++-- scripts/ems/experiment.perl | 4 ++- scripts/training/train-model.perl | 12 +++++++- 5 files changed, 39 insertions(+), 28 deletions(-) delete mode 100755 scripts/OSM/OSM-Train diff --git a/scripts/OSM/OSM-Train b/scripts/OSM/OSM-Train deleted file mode 100755 index 5e9b607aa9776733fa6b8bd4ac70927040e313a2..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 26391 zcmeHvdw5jUx%ZlxY!V=sfrwm{0m7Y35(FAA;BW~}Fi0f9T5ppf8Nz6o%r`T^V9`b~ zYK+koi?#N&##XJ?ewrSwN8(qEVjHAYqt%MFN267mh?jWBddc~{>s@PR&rarP>mTQN zzUN`(+527Z@4c?I*WP>0-r4JWC1q>vHk;7JE>;NQ2D3d9;=K?VXG&IK#bUJZh=pRF z7zM<@=8+PjT+{TZa;<5G(sO{OVJo&XP;B?eil);T7Mkh|iBh~o@u(`(iABt6Duhb{ zWJkI7Etf24m8@tgC?S-2l$0zkxu^8V&bdOepjEP>X|GC5Q{6stLtCY4U!~mxj(86d zae>??s195HR;-5VjSE?!aiK?6G_7PMinnrzqnNXnyTiCg1X@5 zWlIX`YVzxXp@zo%#^Po9%a*vK_3p*8-A*Jzx@VlVNv5Rxk@BVEN3^1$qJ%b5u!%8t z!j8j6hl1=DHe9~<#+t@h5ZJ_?nR@ih!(?O~2Ctw?oZ4$N#*mAH@ z(fvpDDe#lv$A!&Wo(C{eKaWe1PjL$WSEkTkm_q;CDfD+B zpP3ltJLh_2s&~r05Pwtn)$aG5WFBb6!W6{#;k_ta|B^8@XD}9xPzRDsW%qvdvZ9KDL84^10 ztcvKGA`Vm$D-PC2eVhH!z>?BXFc!z7%lzSRAXGzvVe+p>l0hnpRh-pDkiRY%+u_?@$o8hhs7ESdORDP|VzO2j zEgjw&B^!aAgcb&?ef97j@NX-vuJn~uEcNNOEkQ#^?8uR9WuKPlo}?BeI!(8wqP{L# zQoeR$MSX=&H5;OMv!maN1fqe+_CSd|rq+WH_Yyw{(h}or8xJwmYO66^h$|AlO`%I^ zsB8>G8@2^#I99V78e10#p)0E!{XTyz&=`z~=nf1->RwV)gLIoe7!qf!UhO-{T`0~d zD_ynPx7fYdy;Q6|_uQ4MN_|D{lNdhtT&OI;xnP%nbXIW9j=PHkb=veEr>~M4lG6;oT$? zZk{)}OgP2RWu6Jwb0?A&nsAz9=u&LLX+ENh$Ap{b6J8Te^AlalO*qY6bUD|A(;PvU zDif~fCM4Tx!u4E)@URJ|W3Ef138y)hE=?xfeE+%5gzLE(*|eK*ng{7}s|lxhkS-l2 zoaRuv++)JcbGJ?t?$XMjkCkK4-cIHR89D*+AbA%IsefPeXK|SMon2o`&W?x8!dl-a&k) z3NDh=selzhj z6bC$#KaY4Cf&+z;-$*I8bSko zl3zeP4V{5r$qXuRNyBf z%x(kgz4FM><=`Iw!r|R@#O6KuP&98#&h#|`gQ(l)Z7~|4G<4kWq*ALZx++APss-SfPy8Q!)%QrOnqEp?vpw)FJRCbfjs z9A^KZoP0aUNg+Ft!nWqlgp${WfoP zFG6>apSGWHPS4Tw5Sp)&^1WXU4Ur}~QfjuFGBuIl(ZY{*%JC=5WHvTO3+j9cK0!7!nWqBgp-bB zt2>x|Rylc|*m!d$u@fn5Yc5YXIX~Iz0%kqR$$Fw~%|fDkr~&)lP{-sPoP!VzN{u$s z=o*kBIY9o((mpBO)4@tow?)cbIAdYm%K}pCI+C#dskGk1(q>NP&vb0_IXx)=8BWp8jp7}fY1c=IOp3T2P23Ldv^J^B?Py?mn5{{zD`$ZRXPZ2bALYtE zs$y%^3sRTm-(u@>wkEZ%+=TUSr1c(_ntNdlw?qoil4fp!i~@E$vmBV7EO)XsshP+26S&vGS|nLZUA7*@)`Q$(q-GvASF?4Wlt?Qyh z=4)Bnon$R_+4_2xcd|99b!|;pXRx$`t=oCPOI@}ulkz<*Hy_MPsmpQ6CPVb@BcDl@ zEpfeOmZFjLHfA|1jDKePlyoPvgNoh8tn5tKUc@Z-W>5bHX6Xo!XAiS{q`nAPLr zs!v(#AgCM&JT5MQ*1pF0FnLEy$)WP^ly^TKCbuTK>t#%O=s-)UK2%bdyXyo@f*>E1 z1EG!7%){izY~7bMOr$PbzsB-jwk9?6Fu9PW-AUF`m#yz$c_&+wn)$4p&eDz~YpKiD z)huskYf>{0lh-l%L%W*TS`8Da%ht2mI?UFjX1<>MoUNaz8{Qoe`f<{|pRCy0Q%fDF{rKY>}c?CBrHEJdT^{@KS;S+)RdKeJ6r z`ZBZaiap3|hhiUPwo|dcVzyhcKVi04u{Sc?r`Yc?JE+(&vvPnT4L`Fy!g~5QGOLI8 z&p+bakAUtw5FgLepcTLDDgWI(Vw-S+?;DhT+tW`oe)LZ#4|US$axV1fo4|W4?O;Q7 z(4;PR`(pe)fOR`tlbZRUb+UC6Tk8Qs99w_P@-SPITGt7Q*b$aiv9%sB3G0VhUe4B} z)-@|(y@aJ6wx%X~+uEcq$8KbKAzPDL7f(v%&F@qEE<(FpY^^evx@>)lwBEyV^Kc8$ z(NKmsba#I!S+?xy{~@z7DM+tlmb2>V-@$AZ1%>otX2Xg-pV=nGmNDC|*yYT2DAvtv zr(&lw+pSn9vmWXc8ebnUy%%&}A-a$+kbfao-#FQSNfSD;6J}k@6Fq%!tn4fLQHm)s zAE)(A>JX3Pj-+3xFeM%HLAr@e%h^;NB&o~Y{3n)s*qYSL2kC5<7P7TENK%)r_psc> z)}&^h<>j)}$<~~9o783NZ?jymHL001e-S^dahwOKN89$O%%v_{PiE^rDQ_dSuJS~e z{gADD*;-{Tb=mqADc{3#^Y9u)hYGW6GQ^QPV^Fee+0$RlEJY*fd}cYTp8hG!I;laB zj%Aix+|&Q%`_fgRlD^BVN3pLmTdvq=nXOW6H?v{I{#vpaD~NIzi#%$mefk~;U%@IJ zYQXF}NA?Zo>>1RpCGFm}^&MU3B|2=&Xu>xEH}}R-_R>y)PSYMyFH8=4r3(5K(YBIq zYtR4(?L^S-t_u@EKalt);O3yaI8Y}A?a>}le^l0$W>0^n3R=l3eb%6Zc#?x;dJ(j* zt11z6CkF*?4w}tDdnxEf?Gg30$w7bkE_d&1p!;g!l+R=*wBl!SG}$DL*>ZJ&J|PeK z$7oOgVP?Zh`VzBpxFh-jX65(=yO&vcDuMkavz^NJ$INys_6D&0G~>!A>A3|Sdw5&+ zBeZ^CaUbco9HJHr%25jSBZV6w9LU4NF^2VT_DczV4$sgS*1y852);WG>tExg1b;UU z>)+Rl2`-Pry(-~X-WeJ?;$X*p3hOapg9>}lgb6j9y&c%Vi)baAI~9DLN!6vWmut9}gY{G<^8J%OooC;fI!dwb-o3KKKO#wDgjt8)O5IhQY zm{iLZ_Ws+6o~TmTD<&+gupSfEq_77~Si8b*H(?zLYc*k=3cJ*Vbt|meg!L+Hg9+o#HK3j37_t5Vpl zCM>M5t4&yw!onu3U11lPunvWJO<1SGmYA?^g}HzY(2HYkUax{jn^gN0_DO%D$%6`e z-Gu3z>C-06sqTAwO_;u|-etn{z4iMhOy6KHH(~lN8#H11R=dfB>HF=<1jaYrLIQ%a zvAC7u0y^EK(Lq2q0cWs=ypw(P_jtQ;gM`LA1k7d)dE0r#q|rwj2NOX2uJ^DBD_0l9 zpP8^Kh23bv!V0_0gf%H_iwSF2*hUl9p|BMutW#kNOjx(Vjt8c0D)*sE56Ft5b+wz6Cr2%)hO})^Ocey9!Fe>TS`Mrfy zTQNa^`D;{fdBW@1#8Ualv`d1AysZFR*TaSUm&9YmtZW z7pO|~5Z*kv&R=-}XFo!YxDmH*LJaA#eOQcaON{%Wp<=B5W-%5Gi!sF#V_hN!_Ycxb zv_rqM@wU|E9V%<7$-@jR|JCDMqO5K9OGJS8m$k(52FqG@<{kC6T#$#iJ?|8k&3|iU z%U7i>pL!qv%2C?#QvZ>^p^uv1jZG_hh2Hy>wfv*3<?yufm`|*zFNaSsA z>jimuCs3E?ENiXFn^cCHtE@GaH?OR9XI>#Xw|IZf^d@?j@e$tFmXvRsHqUN%VG--i`BHaJQ%nx$>hf;SvP_ zY+JBJt0Td1ELyOkVtv7ux?p%^U9dK^Ef9)9YX#)^&yL!VOO!O?UstMQ!BDNM0=v3E ztUeT_oT(%^39x2h+PDKm;9--w?XQm5+bRpx(%@+a^P)1k2V7*Q#L19|>MYkwn4fV5neoL$I#KhnhdK z!zJ9PxJ87_wxGvVXA0Mv`VhK5QEXEkr&fZ+4;gX6n?pmLpnGstIu}=t7eO0A-v$i> zPs0qT5A+_;4B(l#ofm>Og5C<6fg4jdC>1<~m&JD3Mqz8j{}E=UXS72`|4T?)1U}{a z?UmWi)z{dwMs0$El98e9vhp%%neL!}sd8xhMUdrTyA#9zWW04vtIYQZ?Zdar;rO|8 z3_PvG?_60(m)bWC@$LjoJ6xF1U$-mm28VqQMJBzS*lvYh4e5P0OiyM=dgSK?t(RV8 z^(XbHjcI7_QoL92pdQchyNPz@LB0v{31)dY%PGzp$PXb-3(3#E!MJ9WFWa+rSNcu% z8y#(F_Kn%-FzV+z=y`BV_)H^ZE<;1Mw)AyI&kS*s&3JQ$*lSySom<>wH$Ki6zq1=( z%n;W*jNLQEvkv3lnc@d&M)yqdpJ_&WzIZy#_;Qgrl4jgIOSBrs&`fc!VH}w$o-mA; zW{T%zy-(ur8>I8K@h#{-Gs-wTOT0JAcz%|+A>G(FOWc)i{Bo9fNY?kK6aIWU>AaqP z3iKb&FrLm6FJu^d^Tfv)#$9=0_h{q$dE&m&##ggMm#jZEn(#xTN$2mQN&nN)=fL*w zV~ktph~}}z)pNvMV~xStV*gmaqH{+MI5&J%Cv7@y4**E@~B%@y}Jjc4bI15V@7x#FaGvcqF5c1DDDH3D2Ll78_kQvBa@8}7K%fYjsIRK zKAmhlwNTtL#rVxa@z50Gj)mfdDMs5u@!=HX^95q}RO77$qI0V8+yc=%)%fiKF)-D* zcY!!6>(@>r{FZ5?bJsM|e`p%n?w>|}o}WfOU!O+)2d7c2p=lKN#uF%q+fG=5d|p4n z_{Ade*$Kuii^Q%IseaRmg#Ye)V+1|1jM+0;}o9UlxgXW&QK% z#lRnR(K&L^W!#xBUUnIeE3Gt8{ zKizRgW-S6OUbR!S+Zf241-jHYP0(+9C?xUvR6a(J^}w+nIMxHldf-?O9P5E&J#efC zj`hH?9yrzm|KIcg9wN)Key%4eXN@O;VDLyo7Ap;^ha4KGZ-l65p1uj9lF3B4@Q9oW z{xL?D1M@wSrZFoz?aY7j^-w)QO%CSqTTGTiqnOv<=5#AL{u4r$4^=$_9x&r)knjZ*3xHmr?}=-Y^o@6 zmC@(w^Ic2aOWlR8#f3$Mh06+;xX#Cyd-+RzOY=8J@b%|bS0u1K7^O$jMav2opS<|O zxW`4P@zCLspw~%Q5Z6equ)ZCqiPE3vkqWz$)#BU?)a>F?>|3$bWAl}6z!!lv!H};Z z8mIvv#4>F7Xb(=;VOtMQeK}YiZSV#0!KipX8N)}bK_EiV^Lv#(EECH-Ov&*OE8Ok^ ze2Z8U2uBO>MZXVU_y_8?6v(e;-4&vsAsQ)&Myd#js6NaW(amqAlYmMC|Ea zB5CP#O|WOkGtq7vb-R=q=Yu%Xj9@4RnQer#JJUbqu`kzjhh|D96BOA8g0kZ{d+IWXD1}Z;g z9@UMCEw~nYI!vP`Bf2d}U$N{W_EGGZ-HTfKN5o$CH1=|^j1CIhcb9;B9~+i6$mo{) z-(#;DI+moFw$bEumyNz;qM}=1zceE&2cpk`Ws4je)t!`zlM;|SI3?-0KJ09#zDIl8 zxT8dtD<-8x*v1LxsByOOHb=(PjM*8vbeL@8Y_qd-$2zj7W|PD@3aRFav13;N!tqj7 zv@VUSI95x9Bm0!Gr;aVnJw4No%loJ~qBQrPyE;B@7I)@=U&C!XJ(C^I&7>4Oqipat z%80AmkyCmiDEfM=Z4nW_nFac_-ht4=A$0A>4ANBGYUXGY8sKF#6W z;s~#EWS@3E&TspubLGC4_C34p3novo<0E2QT|M5hHH4yg85yW?(TPr%mDT_Dr8~N{ zJ`ziI#1Of(TwVX=D_lG;`o>?#RLLDJ0^o~@Kxlhd_#?IWNS{CHj$fEf9p;mlYM(z6 z@$cYozNI-#s7Do!Z{W>7Sb{#aTByyYE_%4)Yb3BtuC@C$62jv6KCt{)p0AjJ|~qD#g!>)7lpU`Reg)1 zHc_E!ae|8uH5mI=g-w=-zIW28sE!%QaZ1agXLCBLj&QP)FIQA|_(_T%M%>XI+hYFB z!i}%~nQqlJ43|J8EZm{`Sip@B`txJ{TE)plaad3rYH-Up%=y6@;U;dYKe|=8Yj%X- znrSS;lI>WR0dtll#s^s>Q0FHDRSVa}gqwz#aMNpU;g*BS9jUMJ$Na(_*y`JYcjN)z z)*9Gq4o zwXPodFbZF>9pQv86=~RE`}YA_uZOPD`u9b#-2*PMz7@TfHWzt4Hvx3vrkRDdue5u> zCDxB}NiMNVOH^eIZ@QA~V|I_kW6Gf((GtYYYJVCqbNn0Z9w0*8pzQM`m)KeD>C7{* z>HItG9&iG`^CS_yOk@nI>PwLJvjG@BxvJOpO%4yK;dh}Vg7TvxXJ4S&UV_+t2isU| z+P+)acPslzijp1my|&l=;s-`5+Ft+uJcOwL75#gYIrgFJL}_|WzxA}e{(X0FG;6J5 z63<`jX-XG#`t7KB{rl0WE=gf!PkB?GI)4R=W>lyzwSChVj}$d2dwr#{_MdM5O%{9o z`?piaSB}Y^E~+}dw*L_@>Q~iW@!$WhEPyClkGbLF?*|Z=?TfQKQngswcaqRl^tf>< z>~>JIy`CryQnDzArZCleZKwIWP%+!<=M@L$vxah`)rXxw_bGeb|N2MAA!XlT$zS)M zcKD#hUa$AoxR5m}wW6ZB{ThB86$+u__b&8EQLnPU`k%!AgT;Cns{?lmmTXTH9u1z2PXhhNa%q4h@e+_}zUO%tuSN8qd zP*H8CX*!I}_Pw4esoCpc1Dz*xiTIk&f-(8h?Wb?0sq}dy5%b8osklsas*YFj3oW>| z*VhFt*6uW&ox=XGhb>lVLq)Znrfy|#Q4@tL8L6@eto^Y(g?+oSw@yf*o>bPSuzz3K zm+2EzSy}DBlfwSgHEeK_#lXUf+7$L7HHFQ$7+6>lRrc27_v5u3|3r&{W;MM+*(Y@h zn7wB(GDj;a1q&-$QrN%dWrG4`r-{}6M=9*HOWC4a8!BqG$2Z#Xv2RwrNZJ3wVqjtA zxo0`V(#?TQ?CpTMk1hba8k;_E^>uMHFn#W^1@GIdm|kl+fvoY(HrBcW*%jM8Rv7W@ z@%2c}LRvLU&mWWVG|UG*W=Pf@nE7>%h>ybjZ6y74(LExbA@sb<9K?aA?0P4$@uZOe^!|xS$AOdQ+HA(XK3VAdSu#FF z==)SMK2_-ZPcoiZzcCp1ZlQ{Zz_;B$xJbUvqv4~DD)x=xIQ=K{B5y-oe#T-#CRBk8+=+prR?o|kET z`dwwiO0?E-vQFZ|#|b?{r?{RR4{C|B4OJWd!+daMl?3(mX{W-EDqLUpt_MzjIxT)u z<$sIxGeHbm#`Di5o-4}Ld=;!LzsBA+!Z`meaEjYL-y`+(E>%DO2?`_i=j*_!y<%0B zbfMcj2#)+`&Gkr}_d>X!K)8MntKU=7|MACKI`yqp3blz5KN?s4Nb3L}B-}n#&{uip@{+`l7qlp!xx6X;V;rh}Xi>tCfje;> z*1vz^HU{UpQ{{Q55_}8zNd0p*a97+oQK?Fye+6)go3W}Yp^VzD6#924oEGFTmjUI+ zn*U%5KVvYjr2Oe)A9Go#a5F~r)hX}`Q{X!#o+D^oMJn{|5S1$>KEk|9y-gxQ&!zFg zs@h*yXIi~kR7YabSOeBXq_-=pD>rQPm6cXh(xT43HA&pM)h1C*y|1>eezU*MC!bgQ z{0)r)&!WTltSwOEUV8FLCkqnzf;ElcmJ|to2wu~$ZQBkVr{t_PGR)eIE7zC!G)G}{ zJrPAd9G6k>nO%M*7J&}AtHWWrB63+V-tpASNASM$mK6Eu?N1B~ArlhB*LRo6&*6N2 zEFWAGz?19-e1(U_s|$lc;X9*j!>W~KK3}O%E)eWH@4T`ixT{r*SJMZ3+XKGv_L9Yg z!%;jGH@|>VzBa5~TTy}qlvl1QE1_PbG?HFFnE?4dQux+<=d6|MOIHheu&>@-$rY^4 z%RqZPa@AtBJh2qh8p;+p@qSFchKM`+W@}qx3G2ji)z(P=bOrF_CBH4^v_N-ag=mYA zbC)ZItA)TV3b3{;>YcJLWEab$t9+<+pLvn!WK8!eEk$jKXj)%-xQ!(jS}WUPjwQUm z!NT3co&2k7p`!)Mu=F<;M7O?^m2DiJAwcWqb%Ip?D0r^sp4z6Br-l8zVTJs){Y4P*q e71m*_)=HP-MXhIVaY^T}Zv605hA*h{viv_OLH9}k diff --git a/scripts/OSM/OSM-Train.sh b/scripts/OSM/OSM-Train.sh index cb20f5f1e..019976ab5 100755 --- a/scripts/OSM/OSM-Train.sh +++ b/scripts/OSM/OSM-Train.sh @@ -1,35 +1,34 @@ #!/bin/sh -PATH=$PATH:/fs/hel1/nadir/SRILM/bin/i686-m64/ - echo 'Training OSM - Start' date +mkdir $5 +ln -s $1 $5/e +ln -s $2 $5/f + +$6/scripts/OSM/flipAlignment $3 > $5/align + +echo 'Extracting Singletons' + +$6/scripts/OSM/extract-singletons.perl $5/e $5/f $5/align > $5/Singletons + +echo 'Converting Bilingual Sentence Pair into Operation Corpus' + +$6/scripts/OSM/generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus + +echo 'Learning Operation Sequence Translation Model' + +ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM + +echo 'Binarizing' + +$6/bin/build_binary $5/operationLM$4 $5/operationLM.bin + \rm $5/e \rm $5/f \rm $5/align -ln -s $1 $5/e -ln -s $2 $5/f - -./flipAlignment $3 > $5/align - -echo 'Extracting Singletons' - -./extract-singletons.perl $5/e $5/f $5/align > $5/Singletons - -echo 'Converting Bilingual Sentence Pair into Operation Corpus' - -./generateSequences $5/e $5/f $5/align $5/Singletons > $5/opCorpus # Generates Operation Corpus - -echo 'Learning Operation Sequence Translation Model' - -ngram-count -kndiscount -order $4 -unk -text $5/opCorpus -lm $5/operationLM$4 - -echo 'Binarizing' - -../../bin/build_binary $5/operationLM$4 $5/operationLM$4.bin - echo 'Training OSM - End' date diff --git a/scripts/ems/experiment.meta b/scripts/ems/experiment.meta index cf79d580b..0ead260bb 100644 --- a/scripts/ems/experiment.meta +++ b/scripts/ems/experiment.meta @@ -516,8 +516,8 @@ build-osm out: osm-model ignore-unless: operation-sequence-model rerun-on-change: operation-sequence-model training-options script giza-settings - template: $moses-script-dir/OSM/OSM-Train IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir - default-name: model/OSM/ + template: $moses-script-dir/OSM/OSM-Train.sh IN0.$output-extension IN0.$input-extension IN1.$alignment-symmetrization-method $operation-sequence-model-order OUT $moses-src-dir + default-name: model/OSM extract-phrases in: corpus-mml-postfilter=OR=word-alignment scored-corpus out: extracted-phrases @@ -586,7 +586,7 @@ build-sparse default-name: model/sparse-features template: $moses-script-dir/ems/support/build-sparse-features.perl IN $input-extension $output-extension OUT "$sparse-features" create-config - in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains INTERPOLATED-LM:binlm LM:binlm osm-model + in: sigtest-filter-reordering-table sigtest-filter-phrase-translation-table generation-table sparse corpus-mml-prefilter=OR=corpus-mml-postfilter=OR=domains osm-model INTERPOLATED-LM:binlm LM:binlm out: config ignore-if: use-hiero rerun-on-change: decoding-steps alignment-factors translation-factors reordering-factors generation-factors lexicalized-reordering training-options script decoding-graph-backoff score-settings additional-ini diff --git a/scripts/ems/experiment.perl b/scripts/ems/experiment.perl index e22638d79..d3a4f9788 100755 --- a/scripts/ems/experiment.perl +++ b/scripts/ems/experiment.perl @@ -2164,11 +2164,13 @@ sub get_config_tables { sub define_training_create_config { my ($step_id) = @_; - my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,@LM) + my ($config,$reordering_table,$phrase_translation_table,$generation_table,$sparse_lexical_features,$domains,$osm, @LM) = &get_output_and_input($step_id); my $cmd = &get_config_tables($config,$reordering_table,$phrase_translation_table,$generation_table,$domains); + $cmd .= "-osm-model $osm/operationLM.bin " if $osm; + # sparse lexical features provide additional content for config file $cmd .= "-additional-ini-file $sparse_lexical_features.ini " if $sparse_lexical_features; diff --git a/scripts/training/train-model.perl b/scripts/training/train-model.perl index 8f5580541..332eb78bb 100755 --- a/scripts/training/train-model.perl +++ b/scripts/training/train-model.perl @@ -31,7 +31,7 @@ my($_EXTERNAL_BINDIR, $_ROOT_DIR, $_CORPUS_DIR, $_GIZA_E2F, $_GIZA_F2E, $_MODEL_ $_DECODING_GRAPH_BACKOFF, $_DECODING_STEPS, $_PARALLEL, $_FACTOR_DELIMITER, @_PHRASE_TABLE, @_REORDERING_TABLE, @_GENERATION_TABLE, @_GENERATION_TYPE, $_GENERATION_CORPUS, - $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, + $_DONT_ZIP, $_MGIZA, $_MGIZA_CPUS, $_SNT2COOC, $_HMM_ALIGN, $_CONFIG, $_OSM, $_HIERARCHICAL,$_XML,$_SOURCE_SYNTAX,$_TARGET_SYNTAX,$_GLUE_GRAMMAR,$_GLUE_GRAMMAR_FILE,$_UNKNOWN_WORD_LABEL_FILE,$_GHKM,$_PCFG,@_EXTRACT_OPTIONS,@_SCORE_OPTIONS, $_ALT_DIRECT_RULE_SCORE_1, $_ALT_DIRECT_RULE_SCORE_2, $_OMIT_WORD_ALIGNMENT,$_FORCE_FACTORED_FILENAMES, @@ -119,6 +119,7 @@ $_HELP = 1 'xml' => \$_XML, 'no-word-alignment' => \$_OMIT_WORD_ALIGNMENT, 'config=s' => \$_CONFIG, + 'osm-model=s' => \$_OSM, 'max-lexical-reordering' => \$_MAX_LEXICAL_REORDERING, 'do-steps=s' => \$_DO_STEPS, 'memscore:s' => \$_MEMSCORE, @@ -1992,6 +1993,15 @@ sub create_ini { } } + # operation sequence model + + if($_OSM) + { + + $feature_spec .= "OpSequenceModel num-features=5 path=". $_OSM . " \n"; + $weight_spec .= "OpSequenceModel0= 0.08 -0.02 0.02 -0.001 0.03\n"; + } + # distance-based reordering if (!$_HIERARCHICAL) { $feature_spec .= "Distortion\n";