Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Fix CUDA version detection in CUB
Browse files Browse the repository at this point in the history
This fixes the problem with CUB using deprecated shfl/vote instructions when CUB
is compiled with clang (e.g. some TensorFlow builds).
  • Loading branch information
Artem-B committed Sep 23, 2019
1 parent 135da60 commit fd6e7a6
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 3 deletions.
3 changes: 2 additions & 1 deletion cub/util_arch.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ namespace cub {

#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document

#if (__CUDACC_VER_MAJOR__ >= 9) && !defined(CUB_USE_COOPERATIVE_GROUPS)
#if !defined(CUB_USE_COOPERATIVE_GROUPS) && \
(__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
#define CUB_USE_COOPERATIVE_GROUPS
#endif

Expand Down
4 changes: 2 additions & 2 deletions cub/util_type.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
#include <limits>
#include <cfloat>

#if (__CUDACC_VER_MAJOR__ >= 9)
#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
#include <cuda_fp16.h>
#endif

Expand Down Expand Up @@ -1063,7 +1063,7 @@ struct FpLimits<double>
};


#if (__CUDACC_VER_MAJOR__ >= 9)
#if (__CUDACC_VER_MAJOR__ >= 9 || CUDA_VERSION >= 9000)
template <>
struct FpLimits<__half>
{
Expand Down

0 comments on commit fd6e7a6

Please sign in to comment.