diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 7dd411ba39b..d317e931e78 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -17,7 +17,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v18.1.8 + rev: v19.1.6 hooks: - id: clang-format types_or: [file] @@ -39,7 +39,7 @@ repos: # TODO/REMINDER: add the Ruff vscode extension to the devcontainers # Ruff, the Python auto-correcting linter/formatter written in Rust - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.3 + rev: v0.8.6 hooks: - id: ruff # linter - id: ruff-format # formatter @@ -57,7 +57,7 @@ repos: - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.13.0' + rev: 'v1.14.1' hooks: - id: mypy additional_dependencies: [types-cachetools, numpy] diff --git a/cub/cub/agent/agent_histogram.cuh b/cub/cub/agent/agent_histogram.cuh index 21a487828ca..e454dc837b1 100644 --- a/cub/cub/agent/agent_histogram.cuh +++ b/cub/cub/agent/agent_histogram.cuh @@ -629,7 +629,7 @@ struct AgentHistogram // Set valid flags MarkValid( - is_valid, valid_samples, Int2Type{}); + is_valid, valid_samples, Int2Type < AgentHistogramPolicyT::LOAD_ALGORITHM == BLOCK_LOAD_STRIPED > {}); // Accumulate samples if (prefer_smem) diff --git a/cub/cub/agent/agent_reduce.cuh b/cub/cub/agent/agent_reduce.cuh index 2e0d94b219c..d5e3514f369 100644 --- a/cub/cub/agent/agent_reduce.cuh +++ b/cub/cub/agent/agent_reduce.cuh @@ -382,8 +382,8 @@ struct AgentReduce even_share.template BlockInit(block_offset, block_end); return (IsAligned(d_in + block_offset, Int2Type())) - ? ConsumeRange(even_share, Int2Type < true && ATTEMPT_VECTORIZATION > ()) - : ConsumeRange(even_share, Int2Type < false && ATTEMPT_VECTORIZATION > ()); + ? ConsumeRange(even_share, Int2Type()) + : ConsumeRange(even_share, Int2Type()); } /** @@ -396,8 +396,8 @@ struct AgentReduce even_share.template BlockInit(); return (IsAligned(d_in, Int2Type())) - ? ConsumeRange(even_share, Int2Type < true && ATTEMPT_VECTORIZATION > ()) - : ConsumeRange(even_share, Int2Type < false && ATTEMPT_VECTORIZATION > ()); + ? ConsumeRange(even_share, Int2Type()) + : ConsumeRange(even_share, Int2Type()); } private: diff --git a/cub/cub/block/block_radix_rank.cuh b/cub/cub/block/block_radix_rank.cuh index 490abb86bda..92605b5168d 100644 --- a/cub/cub/block/block_radix_rank.cuh +++ b/cub/cub/block/block_radix_rank.cuh @@ -606,8 +606,7 @@ private: { volatile DigitCounterT warp_digit_counters[RADIX_DIGITS][PADDED_WARPS]; DigitCounterT raking_grid[BLOCK_THREADS][PADDED_RAKING_SEGMENT]; - } - aliasable; + } aliasable; }; #endif // !_CCCL_DOXYGEN_INVOKED diff --git a/cub/cub/detail/strong_load.cuh b/cub/cub/detail/strong_load.cuh index 61693d808e2..b6ba4bb5fc8 100644 --- a/cub/cub/detail/strong_load.cuh +++ b/cub/cub/detail/strong_load.cuh @@ -59,14 +59,14 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint4 load_relaxed(uint4 const* ptr) uint4 retval; NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("ld.relaxed.gpu.v4.u32 {%0, %1, %2, %3}, [%4];" - : "=r"(retval.x), "=r"(retval.y), "=r"(retval.z), "=r"(retval.w) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.v4.u32 {%0, %1, %2, %3}, [%4];" - : "=r"(retval.x), "=r"(retval.y), "=r"(retval.z), "=r"(retval.w) - : "l"(ptr) - : "memory");)); + (asm volatile("ld.relaxed.gpu.v4.u32 {%0, %1, %2, %3}, [%4];" : "=r"(retval.x), + "=r"(retval.y), + "=r"(retval.z), + "=r"(retval.w) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.v4.u32 {%0, %1, %2, %3}, [%4];" : "=r"(retval.x), + "=r"(retval.y), + "=r"(retval.z), + "=r"(retval.w) : "l"(ptr) : "memory");)); return retval; } @@ -75,14 +75,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ulonglong2 load_relaxed(ulonglong2 const* ulonglong2 retval; NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("ld.relaxed.gpu.v2.u64 {%0, %1}, [%2];" - : "=l"(retval.x), "=l"(retval.y) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" - : "=l"(retval.x), "=l"(retval.y) - : "l"(ptr) - : "memory");)); + (asm volatile("ld.relaxed.gpu.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");)); return retval; } @@ -91,14 +85,14 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ushort4 load_relaxed(ushort4 const* ptr) ushort4 retval; NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("ld.relaxed.gpu.v4.u16 {%0, %1, %2, %3}, [%4];" - : "=h"(retval.x), "=h"(retval.y), "=h"(retval.z), "=h"(retval.w) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.v4.u16 {%0, %1, %2, %3}, [%4];" - : "=h"(retval.x), "=h"(retval.y), "=h"(retval.z), "=h"(retval.w) - : "l"(ptr) - : "memory");)); + (asm volatile("ld.relaxed.gpu.v4.u16 {%0, %1, %2, %3}, [%4];" : "=h"(retval.x), + "=h"(retval.y), + "=h"(retval.z), + "=h"(retval.w) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.v4.u16 {%0, %1, %2, %3}, [%4];" : "=h"(retval.x), + "=h"(retval.y), + "=h"(retval.z), + "=h"(retval.w) : "l"(ptr) : "memory");)); return retval; } @@ -107,46 +101,26 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_relaxed(uint2 const* ptr) uint2 retval; NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("ld.relaxed.gpu.v2.u32 {%0, %1}, [%2];" - : "=r"(retval.x), "=r"(retval.y) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" - : "=r"(retval.x), "=r"(retval.y) - : "l"(ptr) - : "memory");)); + (asm volatile("ld.relaxed.gpu.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");)); return retval; } static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned long long load_relaxed(unsigned long long const* ptr) { unsigned long long retval; - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("ld.relaxed.gpu.u64 %0, [%1];" - : "=l"(retval) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.u64 %0, [%1];" - : "=l"(retval) - : "l"(ptr) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("ld.relaxed.gpu.u64 %0, [%1];" : "=l"(retval) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.u64 %0, [%1];" : "=l"(retval) : "l"(ptr) : "memory");)); return retval; } static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_relaxed(unsigned int const* ptr) { unsigned int retval; - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("ld.relaxed.gpu.u32 %0, [%1];" - : "=r"(retval) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.u32 %0, [%1];" - : "=r"(retval) - : "l"(ptr) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("ld.relaxed.gpu.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");)); return retval; } @@ -154,16 +128,9 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_relaxed(unsigned int con static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned short load_relaxed(unsigned short const* ptr) { unsigned short retval; - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("ld.relaxed.gpu.u16 %0, [%1];" - : "=h"(retval) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.u16 %0, [%1];" - : "=h"(retval) - : "l"(ptr) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("ld.relaxed.gpu.u16 %0, [%1];" : "=h"(retval) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.u16 %0, [%1];" : "=h"(retval) : "l"(ptr) : "memory");)); return retval; } @@ -172,24 +139,16 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned char load_relaxed(unsigned char c unsigned short retval; NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile( - "{" - " .reg .u8 datum;" - " ld.relaxed.gpu.u8 datum, [%1];" - " cvt.u16.u8 %0, datum;" - "}" - : "=h"(retval) - : "l"(ptr) - : "memory");), - (asm volatile( - "{" - " .reg .u8 datum;" - " ld.cg.u8 datum, [%1];" - " cvt.u16.u8 %0, datum;" - "}" - : "=h"(retval) - : "l"(ptr) - : "memory");)); + (asm volatile("{" + " .reg .u8 datum;" + " ld.relaxed.gpu.u8 datum, [%1];" + " cvt.u16.u8 %0, datum;" + "}" : "=h"(retval) : "l"(ptr) : "memory");), + (asm volatile("{" + " .reg .u8 datum;" + " ld.cg.u8 datum, [%1];" + " cvt.u16.u8 %0, datum;" + "}" : "=h"(retval) : "l"(ptr) : "memory");)); return (unsigned char) retval; } @@ -198,14 +157,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ulonglong2 load_acquire(ulonglong2 const* ulonglong2 retval; NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("ld.acquire.gpu.v2.u64 {%0, %1}, [%2];" - : "=l"(retval.x), "=l"(retval.y) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" - : "=l"(retval.x), "=l"(retval.y) - : "l"(ptr) - : "memory"); + (asm volatile("ld.acquire.gpu.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory"); __threadfence();)); return retval; } @@ -215,14 +168,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_acquire(uint2 const* ptr) uint2 retval; NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("ld.acquire.gpu.v2.u32 {%0, %1}, [%2];" - : "=r"(retval.x), "=r"(retval.y) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" - : "=r"(retval.x), "=r"(retval.y) - : "l"(ptr) - : "memory"); + (asm volatile("ld.acquire.gpu.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory"); __threadfence();)); return retval; } @@ -230,17 +177,9 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_acquire(uint2 const* ptr) static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_acquire(unsigned int const* ptr) { unsigned int retval; - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("ld.acquire.gpu.u32 %0, [%1];" - : "=r"(retval) - : "l"(ptr) - : "memory");), - (asm volatile("ld.cg.u32 %0, [%1];" - : "=r"(retval) - : "l"(ptr) - : "memory"); - __threadfence();)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("ld.acquire.gpu.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");), + (asm volatile("ld.cg.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory"); __threadfence();)); return retval; } diff --git a/cub/cub/detail/strong_store.cuh b/cub/cub/detail/strong_store.cuh index 9b8091738db..cc0e8f60e71 100644 --- a/cub/cub/detail/strong_store.cuh +++ b/cub/cub/detail/strong_store.cuh @@ -56,98 +56,61 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(uint4* ptr, uint4 val) { NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("st.relaxed.gpu.v4.u32 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) - : "memory");), - (asm volatile("st.cg.v4.u32 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) - : "memory");)); + (asm volatile("st.relaxed.gpu.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), + "r"(val.x), + "r"(val.y), + "r"(val.z), + "r"(val.w) : "memory");), + (asm volatile( + "st.cg.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) : "memory");)); } static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(ulonglong2* ptr, ulonglong2 val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.relaxed.gpu.v2.u64 [%0], {%1, %2};" - : - : "l"(ptr), "l"(val.x), "l"(val.y) - : "memory");), - (asm volatile("st.cg.v2.u64 [%0], {%1, %2};" - : - : "l"(ptr), "l"(val.x), "l"(val.y) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.relaxed.gpu.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");), + (asm volatile("st.cg.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");)); } static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(ushort4* ptr, ushort4 val) { NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("st.relaxed.gpu.v4.u16 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) - : "memory");), - (asm volatile("st.cg.v4.u16 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) - : "memory");)); + (asm volatile("st.relaxed.gpu.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), + "h"(val.x), + "h"(val.y), + "h"(val.z), + "h"(val.w) : "memory");), + (asm volatile( + "st.cg.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) : "memory");)); } static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(uint2* ptr, uint2 val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.relaxed.gpu.v2.u32 [%0], {%1, %2};" - : - : "l"(ptr), "r"(val.x), "r"(val.y) - : "memory");), - (asm volatile("st.cg.v2.u32 [%0], {%1, %2};" - : - : "l"(ptr), "r"(val.x), "r"(val.y) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.relaxed.gpu.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");), + (asm volatile("st.cg.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");)); } static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned long long* ptr, unsigned long long val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.relaxed.gpu.u64 [%0], %1;" - : - : "l"(ptr), "l"(val) - : "memory");), - (asm volatile("st.cg.u64 [%0], %1;" - : - : "l"(ptr), "l"(val) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.relaxed.gpu.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");), + (asm volatile("st.cg.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");)); } static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned int* ptr, unsigned int val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.relaxed.gpu.u32 [%0], %1;" - : - : "l"(ptr), "r"(val) - : "memory");), - (asm volatile("st.cg.u32 [%0], %1;" - : - : "l"(ptr), "r"(val) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.relaxed.gpu.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");), + (asm volatile("st.cg.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");)); } static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned short* ptr, unsigned short val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.relaxed.gpu.u16 [%0], %1;" - : - : "l"(ptr), "h"(val) - : "memory");), - (asm volatile("st.cg.u16 [%0], %1;" - : - : "l"(ptr), "h"(val) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.relaxed.gpu.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");), + (asm volatile("st.cg.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");)); } static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned char* ptr, unsigned char val) @@ -158,123 +121,77 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned char* ptr, uns " .reg .u8 datum;" " cvt.u8.u16 datum, %1;" " st.relaxed.gpu.u8 [%0], datum;" - "}" - : - : "l"(ptr), "h"((unsigned short) val) - : "memory");), + "}" : : "l"(ptr), + "h"((unsigned short) val) : "memory");), (asm volatile("{" " .reg .u8 datum;" " cvt.u8.u16 datum, %1;" " st.cg.u8 [%0], datum;" - "}" - : - : "l"(ptr), "h"((unsigned short) val) - : "memory");)); + "}" : : "l"(ptr), + "h"((unsigned short) val) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(uint4* ptr, uint4 val) { NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("st.release.gpu.v4.u32 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) - : "memory");), - (__threadfence(); - asm volatile("st.cg.v4.u32 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) - : "memory");)); + (asm volatile("st.release.gpu.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), + "r"(val.x), + "r"(val.y), + "r"(val.z), + "r"(val.w) : "memory");), + (__threadfence(); asm volatile( + "st.cg.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(ulonglong2* ptr, ulonglong2 val) { NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("st.release.gpu.v2.u64 [%0], {%1, %2};" - : - : "l"(ptr), "l"(val.x), "l"(val.y) - : "memory");), - (__threadfence(); - asm volatile("st.cg.v2.u64 [%0], {%1, %2};" - : - : "l"(ptr), "l"(val.x), "l"(val.y) - : "memory");)); + (asm volatile("st.release.gpu.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");), + (__threadfence(); asm volatile("st.cg.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(ushort4* ptr, ushort4 val) { NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("st.release.gpu.v4.u16 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) - : "memory");), - (__threadfence(); - asm volatile("st.cg.v4.u16 [%0], {%1, %2, %3, %4};" - : - : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) - : "memory");)); + (asm volatile("st.release.gpu.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), + "h"(val.x), + "h"(val.y), + "h"(val.z), + "h"(val.w) : "memory");), + (__threadfence(); asm volatile( + "st.cg.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(uint2* ptr, uint2 val) { NV_IF_TARGET( NV_PROVIDES_SM_70, - (asm volatile("st.release.gpu.v2.u32 [%0], {%1, %2};" - : - : "l"(ptr), "r"(val.x), "r"(val.y) - : "memory");), - (__threadfence(); - asm volatile("st.cg.v2.u32 [%0], {%1, %2};" - : - : "l"(ptr), "r"(val.x), "r"(val.y) - : "memory");)); + (asm volatile("st.release.gpu.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");), + (__threadfence(); asm volatile("st.cg.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned long long* ptr, unsigned long long val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.release.gpu.u64 [%0], %1;" - : - : "l"(ptr), "l"(val) - : "memory");), - (__threadfence(); - asm volatile("st.cg.u64 [%0], %1;" - : - : "l"(ptr), "l"(val) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.release.gpu.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");), + (__threadfence(); asm volatile("st.cg.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned int* ptr, unsigned int val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.release.gpu.u32 [%0], %1;" - : - : "l"(ptr), "r"(val) - : "memory");), - (__threadfence(); - asm volatile("st.cg.u32 [%0], %1;" - : - : "l"(ptr), "r"(val) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.release.gpu.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");), + (__threadfence(); asm volatile("st.cg.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned short* ptr, unsigned short val) { - NV_IF_TARGET( - NV_PROVIDES_SM_70, - (asm volatile("st.release.gpu.u16 [%0], %1;" - : - : "l"(ptr), "h"(val) - : "memory");), - (__threadfence(); - asm volatile("st.cg.u16 [%0], %1;" - : - : "l"(ptr), "h"(val) - : "memory");)); + NV_IF_TARGET(NV_PROVIDES_SM_70, + (asm volatile("st.release.gpu.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");), + (__threadfence(); asm volatile("st.cg.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");)); } _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned char* ptr, unsigned char val) @@ -285,19 +202,15 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned char* ptr, unsigned c " .reg .u8 datum;" " cvt.u8.u16 datum, %1;" " st.release.gpu.u8 [%0], datum;" - "}" - : - : "l"(ptr), "h"((unsigned short) val) - : "memory");), + "}" : : "l"(ptr), + "h"((unsigned short) val) : "memory");), (__threadfence(); asm volatile( "{" " .reg .u8 datum;" " cvt.u8.u16 datum, %1;" " st.cg.u8 [%0], datum;" - "}" - : - : "l"(ptr), "h"((unsigned short) val) - : "memory");)); + "}" : : "l"(ptr), + "h"((unsigned short) val) : "memory");)); } } // namespace detail diff --git a/cub/cub/device/dispatch/dispatch_transform.cuh b/cub/cub/device/dispatch/dispatch_transform.cuh index 386a6276dfa..fa4fa80d0ef 100644 --- a/cub/cub/device/dispatch/dispatch_transform.cuh +++ b/cub/cub/device/dispatch/dispatch_transform.cuh @@ -169,11 +169,10 @@ _CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply_impl(F&& f, Tuple&& t, ::cuda::st } template -_CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply(F&& f, Tuple&& t) - -> decltype(poor_apply_impl( - ::cuda::std::forward(f), - ::cuda::std::forward(t), - ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t>::value>{})) +_CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply(F&& f, Tuple&& t) -> decltype(poor_apply_impl( + ::cuda::std::forward(f), + ::cuda::std::forward(t), + ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t>::value>{})) { return poor_apply_impl( ::cuda::std::forward(f), @@ -473,8 +472,9 @@ using needs_aligned_ptr_t = #ifdef _CUB_HAS_TRANSFORM_UBLKCP template ::value, int> = 0> -_CCCL_DEVICE _CCCL_FORCEINLINE auto select_kernel_arg( - ::cuda::std::integral_constant, kernel_arg&& arg) -> aligned_base_ptr>&& +_CCCL_DEVICE _CCCL_FORCEINLINE auto +select_kernel_arg(::cuda::std::integral_constant, kernel_arg&& arg) + -> aligned_base_ptr>&& { return ::cuda::std::move(arg.aligned_ptr); } @@ -660,10 +660,9 @@ struct dispatch_t - CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE auto configure_ublkcp_kernel() - -> PoorExpected< - ::cuda::std:: - tuple> + CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE auto configure_ublkcp_kernel() -> PoorExpected< + ::cuda::std:: + tuple> { using policy_t = typename ActivePolicy::algo_policy; constexpr int block_dim = policy_t::block_threads; diff --git a/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh b/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh index c6894ccbc86..3645e4b9ed7 100644 --- a/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh +++ b/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh @@ -255,9 +255,8 @@ struct policy_hub typename Tuning::delay_constructor>; template - static auto select_agent_policy(long) -> - typename DefaultPolicy< - default_delay_constructor_t::pack_t>>::ThreeWayPartitionPolicy; + static auto select_agent_policy(long) -> typename DefaultPolicy< + default_delay_constructor_t::pack_t>>::ThreeWayPartitionPolicy; struct Policy800 : ChainedPolicy<800, Policy800, Policy350> { diff --git a/cub/cub/thread/thread_operators.cuh b/cub/cub/thread/thread_operators.cuh index 7af32df392c..feef89776a9 100644 --- a/cub/cub/thread/thread_operators.cuh +++ b/cub/cub/thread/thread_operators.cuh @@ -391,8 +391,8 @@ struct CCCL_DEPRECATED BinaryFlip {} template - _CCCL_DEVICE auto - operator()(T&& t, U&& u) -> decltype(binary_op(::cuda::std::forward(u), ::cuda::std::forward(t))) + _CCCL_DEVICE auto operator()(T&& t, U&& u) + -> decltype(binary_op(::cuda::std::forward(u), ::cuda::std::forward(t))) { return binary_op(::cuda::std::forward(u), ::cuda::std::forward(t)); } diff --git a/cub/cub/thread/thread_reduce.cuh b/cub/cub/thread/thread_reduce.cuh index 294bc449e31..d3850051ca7 100644 --- a/cub/cub/thread/thread_reduce.cuh +++ b/cub/cub/thread/thread_reduce.cuh @@ -543,8 +543,8 @@ ThreadReduceTernaryTree(const Input& input, ReductionOp reduction_op) // never reached. Protect instantion of ThreadReduceSimd with arbitrary types and operators _CCCL_TEMPLATE(typename Input, typename ReductionOp) _CCCL_REQUIRES((!cub::internal::enable_generic_simd_reduction())) -_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto -ThreadReduceSimd(const Input& input, ReductionOp) -> ::cuda::std::remove_cvref_t +_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto ThreadReduceSimd(const Input& input, ReductionOp) + -> ::cuda::std::remove_cvref_t { assert(false); return input[0]; @@ -552,8 +552,8 @@ ThreadReduceSimd(const Input& input, ReductionOp) -> ::cuda::std::remove_cvref_t _CCCL_TEMPLATE(typename Input, typename ReductionOp) _CCCL_REQUIRES((cub::internal::enable_generic_simd_reduction())) -_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto -ThreadReduceSimd(const Input& input, ReductionOp reduction_op) -> ::cuda::std::remove_cvref_t +_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto ThreadReduceSimd(const Input& input, ReductionOp reduction_op) + -> ::cuda::std::remove_cvref_t { using cub::detail::unsafe_bitcast; using T = ::cuda::std::remove_cvref_t; diff --git a/cub/test/catch2_test_device_for_each_in_extents.cu b/cub/test/catch2_test_device_for_each_in_extents.cu index 8ad75a1d0cb..3e5a6c6689a 100644 --- a/cub/test/catch2_test_device_for_each_in_extents.cu +++ b/cub/test/catch2_test_device_for_each_in_extents.cu @@ -135,8 +135,8 @@ using dimensions = cuda::std::index_sequence<3, 2, 5, 4>>; template -auto build_static_extents(IndexType, - cuda::std::index_sequence) -> cuda::std::extents +auto build_static_extents(IndexType, cuda::std::index_sequence) + -> cuda::std::extents { return {}; } diff --git a/cub/test/catch2_test_device_transform.cu b/cub/test/catch2_test_device_transform.cu index 06f2b7c31a7..95c4794b8cf 100644 --- a/cub/test/catch2_test_device_transform.cu +++ b/cub/test/catch2_test_device_transform.cu @@ -166,8 +166,8 @@ struct alignas(Alignment) overaligned_addable_t return a.value == b.value; } - _CCCL_HOST_DEVICE friend auto - operator+(const overaligned_addable_t& a, const overaligned_addable_t& b) -> overaligned_addable_t + _CCCL_HOST_DEVICE friend auto operator+(const overaligned_addable_t& a, const overaligned_addable_t& b) + -> overaligned_addable_t { check(a); check(b); diff --git a/cub/test/test_block_radix_rank.cu b/cub/test/test_block_radix_rank.cu index 8c1df1a80c7..c53c6b179e3 100644 --- a/cub/test/test_block_radix_rank.cu +++ b/cub/test/test_block_radix_rank.cu @@ -310,7 +310,7 @@ void Test() Test(); Test(); - Test(cub::Int2Type<(BlockThreads % 32) == 0>{}); + Test(cub::Int2Type < (BlockThreads % 32) == 0 > {}); } int main(int argc, char** argv) diff --git a/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh b/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh index 459beddee22..ae8ad239d46 100644 --- a/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh +++ b/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh @@ -60,8 +60,8 @@ struct receiver_defaults } template - _CUDAX_TRIVIAL_API static auto - set_stopped(__ignore, _Rcvr& __rcvr) noexcept -> __async::completion_signatures<__async::set_stopped_t()> + _CUDAX_TRIVIAL_API static auto set_stopped(__ignore, _Rcvr& __rcvr) noexcept + -> __async::completion_signatures<__async::set_stopped_t()> { __async::set_stopped(static_cast<_Rcvr&&>(__rcvr)); return {}; @@ -198,15 +198,15 @@ _CUDAX_TRIVIAL_API auto __make_opstate(_Sndr __sndr, _Rcvr __rcvr) } template -_CUDAX_TRIVIAL_API auto -__get_attrs(int, const _Data& __data, const _Sndrs&... __sndrs) noexcept -> decltype(__data.get_attrs(__sndrs...)) +_CUDAX_TRIVIAL_API auto __get_attrs(int, const _Data& __data, const _Sndrs&... __sndrs) noexcept + -> decltype(__data.get_attrs(__sndrs...)) { return __data.get_attrs(__sndrs...); } template -_CUDAX_TRIVIAL_API auto -__get_attrs(long, const _Data&, const _Sndrs&... __sndrs) noexcept -> decltype(__async::get_env(__sndrs...)) +_CUDAX_TRIVIAL_API auto __get_attrs(long, const _Data&, const _Sndrs&... __sndrs) noexcept + -> decltype(__async::get_env(__sndrs...)) { return __async::get_env(__sndrs...); } diff --git a/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh b/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh index 25d5ef04d76..868c911b1da 100644 --- a/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh +++ b/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh @@ -76,48 +76,36 @@ template class _Vy, template class _ using __transform_sig_t = decltype(__transform_sig<_Sig, _Vy, _Ey, _Sy>()); template - class _Vy, - template - class _Ey, + template class _Vy, + template class _Ey, class _Sy, - template - class _Variant, + template class _Variant, class... _More> extern _DIAGNOSTIC<_Sigs> __transform_completion_signatures_v; template - class _Vy, - template - class _Ey, + template class _Vy, + template class _Ey, class _Sy, - template - class _Variant, + template class _Variant, class... _More> extern __fn_t<_ERROR<_What...>>* __transform_completion_signatures_v<_ERROR<_What...>, _Vy, _Ey, _Sy, _Variant, _More...>; template - class _Vy, - template - class _Ey, + template class _Vy, + template class _Ey, class _Sy, - template - class _Variant, + template class _Variant, class... _More> extern __fn_t<_Variant<__transform_sig_t<_Sigs, _Vy, _Ey, _Sy>..., _More...>>* __transform_completion_signatures_v, _Vy, _Ey, _Sy, _Variant, _More...>; template - class _Vy, - template - class _Ey, + template class _Vy, + template class _Ey, class _Sy, - template - class _Variant, + template class _Variant, class... _More> using __transform_completion_signatures = decltype(__transform_completion_signatures_v<_Sigs, _Vy, _Ey, _Sy, _Variant, _More...>()); @@ -129,12 +117,9 @@ template <> struct __gather_sigs_fn { template - class _Then, - template - class _Else, - template - class _Variant, + template class _Then, + template class _Else, + template class _Variant, class... _More> using __call = __transform_completion_signatures< _Sigs, @@ -149,12 +134,9 @@ template <> struct __gather_sigs_fn { template - class _Then, - template - class _Else, - template - class _Variant, + template class _Then, + template class _Else, + template class _Variant, class... _More> using __call = __transform_completion_signatures< _Sigs, @@ -169,12 +151,9 @@ template <> struct __gather_sigs_fn { template - class _Then, - template - class _Else, - template - class _Variant, + template class _Then, + template class _Else, + template class _Variant, class... _More> using __call = __transform_completion_signatures< _Sigs, @@ -187,12 +166,9 @@ struct __gather_sigs_fn template - class _Then, - template - class _Else, - template - class _Variant, + template class _Then, + template class _Else, + template class _Variant, class... _More> using __gather_completion_signatures = typename __gather_sigs_fn<_WantedTag>::template __call<_Sigs, _Then, _Else, _Variant, _More...>; @@ -404,13 +380,12 @@ template auto completion(_Tag, _Args&&...) -> __csig::__sigs<_Tag(_Args...)>&; template -auto completions_of(_Sndr&&, - _Rcvr = {}) -> decltype(__csig::__to_sigs(__declval&>())); +auto completions_of(_Sndr&&, _Rcvr = {}) + -> decltype(__csig::__to_sigs(__declval&>())); template -auto eptr_completion_if() - -> _CUDA_VSTD:: - conditional_t<_PotentiallyThrowing, __csig::__sigs, __csig::__sigs<>>&; +auto eptr_completion_if() -> _CUDA_VSTD:: + conditional_t<_PotentiallyThrowing, __csig::__sigs, __csig::__sigs<>>&; } // namespace meta } // namespace cuda::experimental::__async diff --git a/cudax/include/cuda/experimental/__async/sender/continue_on.cuh b/cudax/include/cuda/experimental/__async/sender/continue_on.cuh index 9a0c142e21c..8da87a443a3 100644 --- a/cudax/include/cuda/experimental/__async/sender/continue_on.cuh +++ b/cudax/include/cuda/experimental/__async/sender/continue_on.cuh @@ -267,8 +267,8 @@ struct continue_on_t::__sndr_t }; template -_CUDAX_API auto -continue_on_t::operator()(_Sndr __sndr, _Sch __sch) const noexcept -> continue_on_t::__sndr_t<_Sndr, _Sch> +_CUDAX_API auto continue_on_t::operator()(_Sndr __sndr, _Sch __sch) const noexcept + -> continue_on_t::__sndr_t<_Sndr, _Sch> { return __sndr_t<_Sndr, _Sch>{{}, __sch, static_cast<_Sndr&&>(__sndr)}; } diff --git a/cudax/include/cuda/experimental/__async/sender/cpos.cuh b/cudax/include/cuda/experimental/__async/sender/cpos.cuh index 7f1fb383a71..dab62e7ac10 100644 --- a/cudax/include/cuda/experimental/__async/sender/cpos.cuh +++ b/cudax/include/cuda/experimental/__async/sender/cpos.cuh @@ -110,8 +110,8 @@ _CCCL_GLOBAL_CONSTANT struct set_error_t _CCCL_GLOBAL_CONSTANT struct set_stopped_t { template - _CUDAX_TRIVIAL_API auto - operator()(_Rcvr&& __rcvr) const noexcept -> decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped()) + _CUDAX_TRIVIAL_API auto operator()(_Rcvr&& __rcvr) const noexcept + -> decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped()) { static_assert(_CUDA_VSTD::is_same_v(__rcvr).set_stopped()), void>); static_assert(noexcept(static_cast<_Rcvr&&>(__rcvr).set_stopped())); @@ -119,8 +119,8 @@ _CCCL_GLOBAL_CONSTANT struct set_stopped_t } template - _CUDAX_TRIVIAL_API auto - operator()(_Rcvr* __rcvr) const noexcept -> decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped()) + _CUDAX_TRIVIAL_API auto operator()(_Rcvr* __rcvr) const noexcept + -> decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped()) { static_assert(_CUDA_VSTD::is_same_v(*__rcvr).set_stopped()), void>); static_assert(noexcept(static_cast<_Rcvr&&>(*__rcvr).set_stopped())); diff --git a/cudax/include/cuda/experimental/__async/sender/let_value.cuh b/cudax/include/cuda/experimental/__async/sender/let_value.cuh index 7d06e071fe0..6742a1c1d6c 100644 --- a/cudax/include/cuda/experimental/__async/sender/let_value.cuh +++ b/cudax/include/cuda/experimental/__async/sender/let_value.cuh @@ -243,8 +243,9 @@ private: _Sndr __sndr_; template - _CUDAX_API auto connect(_Rcvr __rcvr) && noexcept( - __nothrow_constructible<__opstate_t<_Rcvr, _Sndr, _Fn>, _Sndr, _Fn, _Rcvr>) -> __opstate_t<_Rcvr, _Sndr, _Fn> + _CUDAX_API auto + connect(_Rcvr __rcvr) && noexcept(__nothrow_constructible<__opstate_t<_Rcvr, _Sndr, _Fn>, _Sndr, _Fn, _Rcvr>) + -> __opstate_t<_Rcvr, _Sndr, _Fn> { return __opstate_t<_Rcvr, _Sndr, _Fn>( static_cast<_Sndr&&>(__sndr_), static_cast<_Fn&&>(__fn_), static_cast<_Rcvr&&>(__rcvr)); diff --git a/cudax/include/cuda/experimental/__async/sender/stop_token.cuh b/cudax/include/cuda/experimental/__async/sender/stop_token.cuh index 35e6d4d164a..693816dbb45 100644 --- a/cudax/include/cuda/experimental/__async/sender/stop_token.cuh +++ b/cudax/include/cuda/experimental/__async/sender/stop_token.cuh @@ -369,8 +369,8 @@ _CUDAX_API inline void inplace_stop_source::__unlock(uint8_t __old_state) const (void) __state_.store(__old_state, _CUDA_VSTD::memory_order_release); } -_CUDAX_API inline auto -inplace_stop_source::__try_lock_unless_stop_requested(bool __set_stop_requested) const noexcept -> bool +_CUDAX_API inline auto inplace_stop_source::__try_lock_unless_stop_requested(bool __set_stop_requested) const noexcept + -> bool { __stok::__spin_wait __spin; auto __old_state = __state_.load(_CUDA_VSTD::memory_order_relaxed); diff --git a/cudax/include/cuda/experimental/__async/sender/tuple.cuh b/cudax/include/cuda/experimental/__async/sender/tuple.cuh index 98a1d0997f1..0229ed8b9c7 100644 --- a/cudax/include/cuda/experimental/__async/sender/tuple.cuh +++ b/cudax/include/cuda/experimental/__async/sender/tuple.cuh @@ -65,8 +65,8 @@ struct __tupl<_CUDA_VSTD::index_sequence<_Idx...>, _Ts...> : __box<_Idx, _Ts>... template _CUDAX_TRIVIAL_API static auto __for_each(_Fn&& __fn, _Self&& __self, _Us&&... __us) // - noexcept((__nothrow_callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> - && ...)) -> _CUDA_VSTD::enable_if_t<(__callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...)> + noexcept((__nothrow_callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...)) + -> _CUDA_VSTD::enable_if_t<(__callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...)> { return ( static_cast<_Fn&&>(__fn)(static_cast<_Us&&>(__us)..., static_cast<_Self&&>(__self).__box<_Idx, _Ts>::__value_), diff --git a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh index 8a42bab40ca..0e1dceff19b 100644 --- a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh +++ b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh @@ -80,8 +80,8 @@ struct __with_property template struct __iproperty : interface<__iproperty> { - _CUDAX_HOST_API friend auto - get_property([[maybe_unused]] const __iproperty& __obj, _Property) -> __property_result_t<_Property> + _CUDAX_HOST_API friend auto get_property([[maybe_unused]] const __iproperty& __obj, _Property) + -> __property_result_t<_Property> { if constexpr (!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>) { @@ -268,8 +268,8 @@ template struct __with_try_get_property { template - _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto - try_get_property(const _Derived& __self, _Property) noexcept -> __try_property_result_t<_Property> + _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto try_get_property(const _Derived& __self, _Property) noexcept + -> __try_property_result_t<_Property> { auto __prop = __cudax::dynamic_any_cast*>(&__self); if constexpr (_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>) diff --git a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh index 5b64dbc531d..bd481b3dea2 100644 --- a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh +++ b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh @@ -50,8 +50,8 @@ _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super>& } template