From cbc29a0ab182b8aed49784d3ee9dfbb6a75740b0 Mon Sep 17 00:00:00 2001 From: Andre Martins Date: Fri, 16 Sep 2016 18:41:26 +0100 Subject: [PATCH] Added ref for the backprop of the fast softmax. --- src/node_operators.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/node_operators.h b/src/node_operators.h index 8620a645..c63c9333 100644 --- a/src/node_operators.h +++ b/src/node_operators.h @@ -163,6 +163,13 @@ struct SoftmaxNodeOp : public UnaryNodeOp { // For each row, the Jacobian times vector is given by: // J * dy = p .* (dy - avg*1) // where avg = p'*dy and p is the softmax output (probabilities). + // + // For more information, see sec. 2.5 of the following reference: + // André F. T. Martins and Ramon Astudillo. + // "From Softmax to Sparsemax: A Sparse Model of Attention and Multi-Label + // Classification." ICML 2016. + // http://jmlr.org/proceedings/papers/v48/martins16.pdf + Tensor result(adj_.shape()); thrust::copy(adj_.begin(), adj_.end(), result.begin()); SubtractMean(&result, val_);