From 7d52af64c03e71bcd23112a7086dc8aab1b37ed2 Mon Sep 17 00:00:00 2001
From: Philipp Gross <phipleg@users.noreply.github.com>
Date: Sat, 22 Apr 2017 20:49:33 +0200
Subject: [PATCH] Added logsumexp to backend. (#6346)

---
 keras/backend/tensorflow_backend.py | 22 ++++++++++++++++++
 keras/backend/theano_backend.py     | 23 +++++++++++++++++++
 tests/keras/backend/backend_test.py | 35 +++++++++++++++++++++++++++++
 3 files changed, 80 insertions(+)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index dde257ce96bd..d1f187d81370 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -1304,6 +1304,28 @@ def log(x):
     return tf.log(x)
 
 
+def logsumexp(x, axis=None, keepdims=False):
+    """Computes log(sum(exp(elements across dimensions of a tensor))).
+
+    This function is more numerically stable than log(sum(exp(x))).
+    It avoids overflows caused by taking the exp of large inputs and
+    underflows caused by taking the log of small inputs.
+
+    # Arguments
+        x: A tensor or variable.
+        axis: An integer, the axis to reduce over.
+        keepdims: A boolean, whether to keep the dimensions or not.
+            If `keepdims` is `False`, the rank of the tensor is reduced
+            by 1. If `keepdims` is `True`, the reduced dimension is
+            retained with length 1.
+
+    # Returns
+        The reduced tensor.
+    """
+    axis = _normalize_axis(axis, ndim(x))
+    return tf.reduce_logsumexp(x, reduction_indices=axis, keep_dims=keepdims)
+
+
 def round(x):
     """Element-wise rounding to the closest integer.
 
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 3a9ed3217b7b..4388ba4e7f23 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -528,6 +528,29 @@ def log(x):
     return T.log(x)
 
 
+def logsumexp(x, axis=None, keepdims=False):
+    """Computes log(sum(exp(elements across dimensions of a tensor))).
+
+    This function is more numerically stable than log(sum(exp(x))).
+    It avoids overflows caused by taking the exp of large inputs and
+    underflows caused by taking the log of small inputs.
+
+    # Arguments
+        x: A tensor or variable.
+        axis: An integer, the axis to reduce over.
+        keepdims: A boolean, whether to keep the dimensions or not.
+            If `keepdims` is `False`, the rank of the tensor is reduced
+            by 1. If `keepdims` is `True`, the reduced dimension is
+            retained with length 1.
+
+    # Returns
+        The reduced tensor.
+    """
+    # Theano has a built-in optimization for logsumexp (see https://github.com/Theano/Theano/pull/4736)
+    # so we can just write the expression directly:
+    return T.log(T.sum(T.exp(x), axis=axis, keepdims=keepdims))
+
+
 def round(x):
     return T.round(x, mode='half_to_even')
 
diff --git a/tests/keras/backend/backend_test.py b/tests/keras/backend/backend_test.py
index c48d3918468b..e0406049a7ce 100644
--- a/tests/keras/backend/backend_test.py
+++ b/tests/keras/backend/backend_test.py
@@ -580,6 +580,41 @@ def step_function(x, states):
         assert_allclose(tf_last_output, th_last_output, atol=1e-04)
         assert_allclose(tf_outputs, th_outputs, atol=1e-04)
 
+    @pytest.mark.parametrize('x_np,axis,keepdims', [
+        (np.array([1.1, 0.8, 0.9]), 0, False),
+        (np.array([[1.1, 0.8, 0.9]]), 0, False),
+        (np.array([[1.1, 0.8, 0.9]]), 1, False),
+        (np.array([[1.1, 0.8, 0.9]]), -1, False),
+        (np.array([[1.1, 0.8, 0.9]]), 1, True),
+        (np.array([[1.1], [1.2]]), 0, False),
+        (np.array([[1.1], [1.2]]), 1, False),
+        (np.array([[1.1], [1.2]]), -1, False),
+        (np.array([[1.1], [1.2]]), -1, True),
+        (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), None, False),
+        (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 0, False),
+        (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 1, False),
+        (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), -1, False),
+    ])
+    @pytest.mark.parametrize('K', [KTH, KTF], ids=["KTH", "KTF"])
+    def test_logsumexp(self, x_np, axis, keepdims, K):
+        '''
+        Check if K.logsumexp works properly for values close to one.
+        '''
+        x = K.variable(x_np)
+        assert_allclose(K.eval(K.logsumexp(x, axis=axis, keepdims=keepdims)),
+                        np.log(np.sum(np.exp(x_np), axis=axis, keepdims=keepdims)),
+                        rtol=1e-5)
+
+    @pytest.mark.parametrize('K', [KTH, KTF], ids=["KTH", "KTF"])
+    def test_logsumexp_optim(self, K):
+        '''
+        Check if optimization works.
+        '''
+        x_np = np.array([1e+4, 1e-4])
+        assert_allclose(K.eval(K.logsumexp(K.variable(x_np), axis=0)),
+                        1e4,
+                        rtol=1e-5)
+
     def test_switch(self):
         val = np.random.random()
         xth = KTH.variable(val)