From 7d52af64c03e71bcd23112a7086dc8aab1b37ed2 Mon Sep 17 00:00:00 2001 From: Philipp Gross Date: Sat, 22 Apr 2017 20:49:33 +0200 Subject: [PATCH] Added logsumexp to backend. (#6346) --- keras/backend/tensorflow_backend.py | 22 ++++++++++++++++++ keras/backend/theano_backend.py | 23 +++++++++++++++++++ tests/keras/backend/backend_test.py | 35 +++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+) diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py index dde257ce96bd..d1f187d81370 100644 --- a/keras/backend/tensorflow_backend.py +++ b/keras/backend/tensorflow_backend.py @@ -1304,6 +1304,28 @@ def log(x): return tf.log(x) +def logsumexp(x, axis=None, keepdims=False): + """Computes log(sum(exp(elements across dimensions of a tensor))). + + This function is more numerically stable than log(sum(exp(x))). + It avoids overflows caused by taking the exp of large inputs and + underflows caused by taking the log of small inputs. + + # Arguments + x: A tensor or variable. + axis: An integer, the axis to reduce over. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, the reduced dimension is + retained with length 1. + + # Returns + The reduced tensor. + """ + axis = _normalize_axis(axis, ndim(x)) + return tf.reduce_logsumexp(x, reduction_indices=axis, keep_dims=keepdims) + + def round(x): """Element-wise rounding to the closest integer. diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py index 3a9ed3217b7b..4388ba4e7f23 100644 --- a/keras/backend/theano_backend.py +++ b/keras/backend/theano_backend.py @@ -528,6 +528,29 @@ def log(x): return T.log(x) +def logsumexp(x, axis=None, keepdims=False): + """Computes log(sum(exp(elements across dimensions of a tensor))). + + This function is more numerically stable than log(sum(exp(x))). + It avoids overflows caused by taking the exp of large inputs and + underflows caused by taking the log of small inputs. + + # Arguments + x: A tensor or variable. + axis: An integer, the axis to reduce over. + keepdims: A boolean, whether to keep the dimensions or not. + If `keepdims` is `False`, the rank of the tensor is reduced + by 1. If `keepdims` is `True`, the reduced dimension is + retained with length 1. + + # Returns + The reduced tensor. + """ + # Theano has a built-in optimization for logsumexp (see https://github.com/Theano/Theano/pull/4736) + # so we can just write the expression directly: + return T.log(T.sum(T.exp(x), axis=axis, keepdims=keepdims)) + + def round(x): return T.round(x, mode='half_to_even') diff --git a/tests/keras/backend/backend_test.py b/tests/keras/backend/backend_test.py index c48d3918468b..e0406049a7ce 100644 --- a/tests/keras/backend/backend_test.py +++ b/tests/keras/backend/backend_test.py @@ -580,6 +580,41 @@ def step_function(x, states): assert_allclose(tf_last_output, th_last_output, atol=1e-04) assert_allclose(tf_outputs, th_outputs, atol=1e-04) + @pytest.mark.parametrize('x_np,axis,keepdims', [ + (np.array([1.1, 0.8, 0.9]), 0, False), + (np.array([[1.1, 0.8, 0.9]]), 0, False), + (np.array([[1.1, 0.8, 0.9]]), 1, False), + (np.array([[1.1, 0.8, 0.9]]), -1, False), + (np.array([[1.1, 0.8, 0.9]]), 1, True), + (np.array([[1.1], [1.2]]), 0, False), + (np.array([[1.1], [1.2]]), 1, False), + (np.array([[1.1], [1.2]]), -1, False), + (np.array([[1.1], [1.2]]), -1, True), + (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), None, False), + (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 0, False), + (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), 1, False), + (np.array([[1.1, 1.2, 1.3], [0.9, 0.7, 1.4]]), -1, False), + ]) + @pytest.mark.parametrize('K', [KTH, KTF], ids=["KTH", "KTF"]) + def test_logsumexp(self, x_np, axis, keepdims, K): + ''' + Check if K.logsumexp works properly for values close to one. + ''' + x = K.variable(x_np) + assert_allclose(K.eval(K.logsumexp(x, axis=axis, keepdims=keepdims)), + np.log(np.sum(np.exp(x_np), axis=axis, keepdims=keepdims)), + rtol=1e-5) + + @pytest.mark.parametrize('K', [KTH, KTF], ids=["KTH", "KTF"]) + def test_logsumexp_optim(self, K): + ''' + Check if optimization works. + ''' + x_np = np.array([1e+4, 1e-4]) + assert_allclose(K.eval(K.logsumexp(K.variable(x_np), axis=0)), + 1e4, + rtol=1e-5) + def test_switch(self): val = np.random.random() xth = KTH.variable(val)