mirror of
https://github.com/marian-nmt/marian.git
synced 2025-01-06 00:13:33 +03:00
updated notebook
This commit is contained in:
parent
03acd8c3bd
commit
fc08eea244
@ -133,12 +133,12 @@
|
||||
"\n",
|
||||
"$$\n",
|
||||
"\\begin{eqnarray}\n",
|
||||
"\\ora{\\underline{h}}_i &=& \\tanh\\left(\\overline{E}_i\\ora{W_x} + \\ora{b_x} + \\ora{r}_i \\circ (\\ora{h}_{i-1}\\ora{U_x})\\right)\\\\\n",
|
||||
"\\left[\n",
|
||||
"\\begin{array}{c}\n",
|
||||
"\\ora{r}_i \\\\\n",
|
||||
"\\ora{u}_i\\end{array}\n",
|
||||
"\\right] &=& \\sigma\\left(\\overline{E}_i\\ora{W} + \\ora{b} + \\ora{h}_{i-1}\\ora{U}\\right)\\\\\n",
|
||||
"\\right] &=& \\sigma\\left(\\ora{h}_{i-1}\\ora{U} + \\overline{E}_i\\ora{W} + \\ora{b} \\right)\\\\\n",
|
||||
"\\ora{\\underline{h}}_i &=& \\tanh\\left((\\ora{h}_{i-1}\\ora{U_x}) \\circ \\ora{r}_i+ \\overline{E}_i\\ora{W_x} + \\ora{b_x} \\right)\\\\\n",
|
||||
"\\end{eqnarray}\n",
|
||||
"$$\n",
|
||||
"\n",
|
||||
@ -321,9 +321,9 @@
|
||||
"\\ora{u}_i^h\\end{array}\n",
|
||||
"\\right] &=& \\sigma \\left( s_{i-1}U + E_{i-1}W + b \\right) \\\\\n",
|
||||
"\\\\\n",
|
||||
"\\overline{s}_m &=& \\tanh \\left( (s_{i-1}U_x) \\circ r_i^h + E_{i-1}W_x + b_x \\right) \\\\\n",
|
||||
"\\overline{s}_i &=& \\tanh \\left( (s_{i-1}U_x) \\circ r_i^h + E_{i-1}W_x + b_x \\right) \\\\\n",
|
||||
"\\\\\n",
|
||||
"s_m &=& u_i^h \\circ s_{i-1} + (1- u_i^h) \\circ \\overline{s}_m \\\\\n",
|
||||
"s_i &=& u_i^h \\circ s_{i-1} + (1- u_i^h) \\circ \\overline{s}_i \\\\\n",
|
||||
"\\end{eqnarray}\n",
|
||||
"$$\n"
|
||||
]
|
||||
@ -384,7 +384,7 @@
|
||||
"\n",
|
||||
"$$\n",
|
||||
"\\begin{eqnarray}\n",
|
||||
"e_{ij} &=& v_\\alpha^T \\tanh\\left(s_m W_{\\alpha} + b_{\\alpha} + h_jU_{\\alpha}\\right) + c_{\\alpha}\n",
|
||||
"e_{ij} &=& v_\\alpha^T \\tanh\\left(s_{i} W_{\\alpha} + b_{\\alpha} + h_jU_{\\alpha}\\right) + c_{\\alpha}\n",
|
||||
"\\end{eqnarray}\n",
|
||||
"$$\n",
|
||||
"\n",
|
||||
@ -425,17 +425,19 @@
|
||||
}
|
||||
},
|
||||
"source": [
|
||||
"Take care for the different bias in the computation of the intermediate state, $\\tilde{z}_i$. This is an oddity in the way Nematus implements GRUs.\n",
|
||||
"\n",
|
||||
"$$\n",
|
||||
"\\begin{eqnarray}\n",
|
||||
"\\left[\n",
|
||||
"\\begin{array}{c}\n",
|
||||
"\\ora{r}_i^f \\\\\n",
|
||||
"\\ora{u}_i^f\\end{array}\n",
|
||||
"\\right] &=& \\sigma \\left( s_mU + c_iW + b \\right) \\\\\n",
|
||||
"\\right] &=& \\sigma \\left( s_iU + c_iW + b \\right) \\\\\n",
|
||||
"\\\\\n",
|
||||
"\\tilde{s_i} &=& \\tanh\\left( (s_mU_x + b_x) \\circ r_i^f + c_iW_x \\right) \\\\\n",
|
||||
"\\tilde{z}_i &=& \\tanh\\left( (s_iU_x + b_x) \\circ r_i^f + c_iW_x \\right) \\\\\n",
|
||||
"\\\\\n",
|
||||
"s_i &=& u_i^f \\circ s_m + (1 - u_i^f) \\circ \\tilde{s_i}\n",
|
||||
"z_i &=& u_i^f \\circ s_i + (1 - u_i^f) \\circ \\tilde{z}_i\n",
|
||||
"\\end{eqnarray}\n",
|
||||
"$$"
|
||||
]
|
||||
@ -480,9 +482,9 @@
|
||||
"source": [
|
||||
"$$\n",
|
||||
"\\begin{eqnarray}\n",
|
||||
" t_i &=&\\tanh \\left( \\left( s_iW_1 + b_1 \\right) + \\left( E_{i-1} W_2 + b_2 \\right) + \\left( c_iW_3 + b_3 \\right) \\right) \\\\\n",
|
||||
" t_i &=&\\tanh \\left( \\left( z_iW_1 + b_1 \\right) + \\left( E_{i-1} W_2 + b_2 \\right) + \\left( c_iW_3 + b_3 \\right) \\right) \\\\\n",
|
||||
"\\\\\n",
|
||||
"p(y_i|s_{i-1},y_{i-1},c_i) &=& \\textrm{softmax} \\left( t_iW_4 + b_4 \\right) \\\\\n",
|
||||
"p(y_i|z_{i},y_{i-1},c_i) &=& \\textrm{softmax} \\left( t_iW_4 + b_4 \\right) \\\\\n",
|
||||
"\\end{eqnarray}\n",
|
||||
"$$\n",
|
||||
"\n",
|
||||
|
Loading…
Reference in New Issue
Block a user