diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7dd411ba39b..d317e931e78 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
       - id: mixed-line-ending
       - id: trailing-whitespace
   - repo: https://github.com/pre-commit/mirrors-clang-format
-    rev: v18.1.8
+    rev: v19.1.6
     hooks:
       - id: clang-format
         types_or: [file]
@@ -39,7 +39,7 @@ repos:
   # TODO/REMINDER: add the Ruff vscode extension to the devcontainers
   # Ruff, the Python auto-correcting linter/formatter written in Rust
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.8.3
+    rev: v0.8.6
     hooks:
     - id: ruff  # linter
     - id: ruff-format  # formatter
@@ -57,7 +57,7 @@ repos:
 
 
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: 'v1.13.0'
+    rev: 'v1.14.1'
     hooks:
       - id: mypy
         additional_dependencies: [types-cachetools, numpy]
diff --git a/cub/cub/agent/agent_histogram.cuh b/cub/cub/agent/agent_histogram.cuh
index 21a487828ca..e454dc837b1 100644
--- a/cub/cub/agent/agent_histogram.cuh
+++ b/cub/cub/agent/agent_histogram.cuh
@@ -629,7 +629,7 @@ struct AgentHistogram
 
     // Set valid flags
     MarkValid<IS_FULL_TILE>(
-      is_valid, valid_samples, Int2Type<AgentHistogramPolicyT::LOAD_ALGORITHM == BLOCK_LOAD_STRIPED>{});
+      is_valid, valid_samples, Int2Type < AgentHistogramPolicyT::LOAD_ALGORITHM == BLOCK_LOAD_STRIPED > {});
 
     // Accumulate samples
     if (prefer_smem)
diff --git a/cub/cub/agent/agent_reduce.cuh b/cub/cub/agent/agent_reduce.cuh
index 2e0d94b219c..d5e3514f369 100644
--- a/cub/cub/agent/agent_reduce.cuh
+++ b/cub/cub/agent/agent_reduce.cuh
@@ -382,8 +382,8 @@ struct AgentReduce
     even_share.template BlockInit<TILE_ITEMS>(block_offset, block_end);
 
     return (IsAligned(d_in + block_offset, Int2Type<ATTEMPT_VECTORIZATION>()))
-           ? ConsumeRange(even_share, Int2Type < true && ATTEMPT_VECTORIZATION > ())
-           : ConsumeRange(even_share, Int2Type < false && ATTEMPT_VECTORIZATION > ());
+           ? ConsumeRange(even_share, Int2Type<true && ATTEMPT_VECTORIZATION>())
+           : ConsumeRange(even_share, Int2Type<false && ATTEMPT_VECTORIZATION>());
   }
 
   /**
@@ -396,8 +396,8 @@ struct AgentReduce
     even_share.template BlockInit<TILE_ITEMS, GRID_MAPPING_STRIP_MINE>();
 
     return (IsAligned(d_in, Int2Type<ATTEMPT_VECTORIZATION>()))
-           ? ConsumeRange(even_share, Int2Type < true && ATTEMPT_VECTORIZATION > ())
-           : ConsumeRange(even_share, Int2Type < false && ATTEMPT_VECTORIZATION > ());
+           ? ConsumeRange(even_share, Int2Type<true && ATTEMPT_VECTORIZATION>())
+           : ConsumeRange(even_share, Int2Type<false && ATTEMPT_VECTORIZATION>());
   }
 
 private:
diff --git a/cub/cub/block/block_radix_rank.cuh b/cub/cub/block/block_radix_rank.cuh
index 490abb86bda..92605b5168d 100644
--- a/cub/cub/block/block_radix_rank.cuh
+++ b/cub/cub/block/block_radix_rank.cuh
@@ -606,8 +606,7 @@ private:
     {
       volatile DigitCounterT warp_digit_counters[RADIX_DIGITS][PADDED_WARPS];
       DigitCounterT raking_grid[BLOCK_THREADS][PADDED_RAKING_SEGMENT];
-    }
-    aliasable;
+    } aliasable;
   };
 #endif // !_CCCL_DOXYGEN_INVOKED
 
diff --git a/cub/cub/detail/strong_load.cuh b/cub/cub/detail/strong_load.cuh
index 61693d808e2..b6ba4bb5fc8 100644
--- a/cub/cub/detail/strong_load.cuh
+++ b/cub/cub/detail/strong_load.cuh
@@ -59,14 +59,14 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint4 load_relaxed(uint4 const* ptr)
   uint4 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v4.u32 {%0, %1, %2, %3}, [%4];"
-                  : "=r"(retval.x), "=r"(retval.y), "=r"(retval.z), "=r"(retval.w)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v4.u32 {%0, %1, %2, %3}, [%4];"
-                  : "=r"(retval.x), "=r"(retval.y), "=r"(retval.z), "=r"(retval.w)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v4.u32 {%0, %1, %2, %3}, [%4];" : "=r"(retval.x),
+                  "=r"(retval.y),
+                  "=r"(retval.z),
+                  "=r"(retval.w) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v4.u32 {%0, %1, %2, %3}, [%4];" : "=r"(retval.x),
+                  "=r"(retval.y),
+                  "=r"(retval.z),
+                  "=r"(retval.w) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -75,14 +75,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ulonglong2 load_relaxed(ulonglong2 const*
   ulonglong2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -91,14 +85,14 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ushort4 load_relaxed(ushort4 const* ptr)
   ushort4 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v4.u16 {%0, %1, %2, %3}, [%4];"
-                  : "=h"(retval.x), "=h"(retval.y), "=h"(retval.z), "=h"(retval.w)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v4.u16 {%0, %1, %2, %3}, [%4];"
-                  : "=h"(retval.x), "=h"(retval.y), "=h"(retval.z), "=h"(retval.w)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v4.u16 {%0, %1, %2, %3}, [%4];" : "=h"(retval.x),
+                  "=h"(retval.y),
+                  "=h"(retval.z),
+                  "=h"(retval.w) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v4.u16 {%0, %1, %2, %3}, [%4];" : "=h"(retval.x),
+                  "=h"(retval.y),
+                  "=h"(retval.z),
+                  "=h"(retval.w) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -107,46 +101,26 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_relaxed(uint2 const* ptr)
   uint2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");));
+    (asm volatile("ld.relaxed.gpu.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");));
   return retval;
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned long long load_relaxed(unsigned long long const* ptr)
 {
   unsigned long long retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.u64 %0, [%1];"
-                  : "=l"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u64 %0, [%1];"
-                  : "=l"(retval)
-                  : "l"(ptr)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.relaxed.gpu.u64 %0, [%1];" : "=l"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u64 %0, [%1];" : "=l"(retval) : "l"(ptr) : "memory");));
   return retval;
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_relaxed(unsigned int const* ptr)
 {
   unsigned int retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.relaxed.gpu.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");));
 
   return retval;
 }
@@ -154,16 +128,9 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_relaxed(unsigned int con
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned short load_relaxed(unsigned short const* ptr)
 {
   unsigned short retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.relaxed.gpu.u16 %0, [%1];"
-                  : "=h"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u16 %0, [%1];"
-                  : "=h"(retval)
-                  : "l"(ptr)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.relaxed.gpu.u16 %0, [%1];" : "=h"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u16 %0, [%1];" : "=h"(retval) : "l"(ptr) : "memory");));
   return retval;
 }
 
@@ -172,24 +139,16 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned char load_relaxed(unsigned char c
   unsigned short retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile(
-       "{"
-       "  .reg .u8 datum;"
-       "  ld.relaxed.gpu.u8 datum, [%1];"
-       "  cvt.u16.u8 %0, datum;"
-       "}"
-       : "=h"(retval)
-       : "l"(ptr)
-       : "memory");),
-    (asm volatile(
-       "{"
-       "  .reg .u8 datum;"
-       "  ld.cg.u8 datum, [%1];"
-       "  cvt.u16.u8 %0, datum;"
-       "}"
-       : "=h"(retval)
-       : "l"(ptr)
-       : "memory");));
+    (asm volatile("{"
+                  "  .reg .u8 datum;"
+                  "  ld.relaxed.gpu.u8 datum, [%1];"
+                  "  cvt.u16.u8 %0, datum;"
+                  "}" : "=h"(retval) : "l"(ptr) : "memory");),
+    (asm volatile("{"
+                  "  .reg .u8 datum;"
+                  "  ld.cg.u8 datum, [%1];"
+                  "  cvt.u16.u8 %0, datum;"
+                  "}" : "=h"(retval) : "l"(ptr) : "memory");));
   return (unsigned char) retval;
 }
 
@@ -198,14 +157,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE ulonglong2 load_acquire(ulonglong2 const*
   ulonglong2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.acquire.gpu.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];"
-                  : "=l"(retval.x), "=l"(retval.y)
-                  : "l"(ptr)
-                  : "memory");
+    (asm volatile("ld.acquire.gpu.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l"(ptr) : "memory");
      __threadfence();));
   return retval;
 }
@@ -215,14 +168,8 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_acquire(uint2 const* ptr)
   uint2 retval;
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("ld.acquire.gpu.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];"
-                  : "=r"(retval.x), "=r"(retval.y)
-                  : "l"(ptr)
-                  : "memory");
+    (asm volatile("ld.acquire.gpu.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");),
+    (asm volatile("ld.cg.v2.u32 {%0, %1}, [%2];" : "=r"(retval.x), "=r"(retval.y) : "l"(ptr) : "memory");
      __threadfence();));
   return retval;
 }
@@ -230,17 +177,9 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE uint2 load_acquire(uint2 const* ptr)
 static _CCCL_DEVICE _CCCL_FORCEINLINE unsigned int load_acquire(unsigned int const* ptr)
 {
   unsigned int retval;
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("ld.acquire.gpu.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");),
-    (asm volatile("ld.cg.u32 %0, [%1];"
-                  : "=r"(retval)
-                  : "l"(ptr)
-                  : "memory");
-     __threadfence();));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("ld.acquire.gpu.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory");),
+               (asm volatile("ld.cg.u32 %0, [%1];" : "=r"(retval) : "l"(ptr) : "memory"); __threadfence();));
 
   return retval;
 }
diff --git a/cub/cub/detail/strong_store.cuh b/cub/cub/detail/strong_store.cuh
index 9b8091738db..cc0e8f60e71 100644
--- a/cub/cub/detail/strong_store.cuh
+++ b/cub/cub/detail/strong_store.cuh
@@ -56,98 +56,61 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(uint4* ptr, uint4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");),
-    (asm volatile("st.cg.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");));
+    (asm volatile("st.relaxed.gpu.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "r"(val.x),
+                  "r"(val.y),
+                  "r"(val.z),
+                  "r"(val.w) : "memory");),
+    (asm volatile(
+       "st.cg.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(ulonglong2* ptr, ulonglong2 val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");),
-    (asm volatile("st.cg.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");),
+               (asm volatile("st.cg.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(ushort4* ptr, ushort4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");),
-    (asm volatile("st.cg.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");));
+    (asm volatile("st.relaxed.gpu.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "h"(val.x),
+                  "h"(val.y),
+                  "h"(val.z),
+                  "h"(val.w) : "memory");),
+    (asm volatile(
+       "st.cg.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(uint2* ptr, uint2 val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");),
-    (asm volatile("st.cg.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");),
+               (asm volatile("st.cg.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned long long* ptr, unsigned long long val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");),
-    (asm volatile("st.cg.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");),
+               (asm volatile("st.cg.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned int* ptr, unsigned int val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");),
-    (asm volatile("st.cg.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");),
+               (asm volatile("st.cg.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned short* ptr, unsigned short val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.relaxed.gpu.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");),
-    (asm volatile("st.cg.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.relaxed.gpu.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");),
+               (asm volatile("st.cg.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");));
 }
 
 static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned char* ptr, unsigned char val)
@@ -158,123 +121,77 @@ static _CCCL_DEVICE _CCCL_FORCEINLINE void store_relaxed(unsigned char* ptr, uns
                   "  .reg .u8 datum;"
                   "  cvt.u8.u16 datum, %1;"
                   "  st.relaxed.gpu.u8 [%0], datum;"
-                  "}"
-                  :
-                  : "l"(ptr), "h"((unsigned short) val)
-                  : "memory");),
+                  "}" : : "l"(ptr),
+                  "h"((unsigned short) val) : "memory");),
     (asm volatile("{"
                   "  .reg .u8 datum;"
                   "  cvt.u8.u16 datum, %1;"
                   "  st.cg.u8 [%0], datum;"
-                  "}"
-                  :
-                  : "l"(ptr), "h"((unsigned short) val)
-                  : "memory");));
+                  "}" : : "l"(ptr),
+                  "h"((unsigned short) val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(uint4* ptr, uint4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v4.u32 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "r"(val.x),
+                  "r"(val.y),
+                  "r"(val.z),
+                  "r"(val.w) : "memory");),
+    (__threadfence(); asm volatile(
+       "st.cg.v4.u32 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "r"(val.x), "r"(val.y), "r"(val.z), "r"(val.w) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(ulonglong2* ptr, ulonglong2 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v2.u64 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "l"(val.x), "l"(val.y)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");),
+    (__threadfence(); asm volatile("st.cg.v2.u64 [%0], {%1, %2};" : : "l"(ptr), "l"(val.x), "l"(val.y) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(ushort4* ptr, ushort4 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v4.u16 [%0], {%1, %2, %3, %4};"
-                  :
-                  : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr),
+                  "h"(val.x),
+                  "h"(val.y),
+                  "h"(val.z),
+                  "h"(val.w) : "memory");),
+    (__threadfence(); asm volatile(
+       "st.cg.v4.u16 [%0], {%1, %2, %3, %4};" : : "l"(ptr), "h"(val.x), "h"(val.y), "h"(val.z), "h"(val.w) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(uint2* ptr, uint2 val)
 {
   NV_IF_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.v2.u32 [%0], {%1, %2};"
-                  :
-                  : "l"(ptr), "r"(val.x), "r"(val.y)
-                  : "memory");));
+    (asm volatile("st.release.gpu.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");),
+    (__threadfence(); asm volatile("st.cg.v2.u32 [%0], {%1, %2};" : : "l"(ptr), "r"(val.x), "r"(val.y) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned long long* ptr, unsigned long long val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.u64 [%0], %1;"
-                  :
-                  : "l"(ptr), "l"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.release.gpu.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");),
+               (__threadfence(); asm volatile("st.cg.u64 [%0], %1;" : : "l"(ptr), "l"(val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned int* ptr, unsigned int val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.u32 [%0], %1;"
-                  :
-                  : "l"(ptr), "r"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.release.gpu.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");),
+               (__threadfence(); asm volatile("st.cg.u32 [%0], %1;" : : "l"(ptr), "r"(val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned short* ptr, unsigned short val)
 {
-  NV_IF_TARGET(
-    NV_PROVIDES_SM_70,
-    (asm volatile("st.release.gpu.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");),
-    (__threadfence();
-     asm volatile("st.cg.u16 [%0], %1;"
-                  :
-                  : "l"(ptr), "h"(val)
-                  : "memory");));
+  NV_IF_TARGET(NV_PROVIDES_SM_70,
+               (asm volatile("st.release.gpu.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");),
+               (__threadfence(); asm volatile("st.cg.u16 [%0], %1;" : : "l"(ptr), "h"(val) : "memory");));
 }
 
 _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned char* ptr, unsigned char val)
@@ -285,19 +202,15 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void store_release(unsigned char* ptr, unsigned c
                   "  .reg .u8 datum;"
                   "  cvt.u8.u16 datum, %1;"
                   "  st.release.gpu.u8 [%0], datum;"
-                  "}"
-                  :
-                  : "l"(ptr), "h"((unsigned short) val)
-                  : "memory");),
+                  "}" : : "l"(ptr),
+                  "h"((unsigned short) val) : "memory");),
     (__threadfence(); asm volatile(
        "{"
        "  .reg .u8 datum;"
        "  cvt.u8.u16 datum, %1;"
        "  st.cg.u8 [%0], datum;"
-       "}"
-       :
-       : "l"(ptr), "h"((unsigned short) val)
-       : "memory");));
+       "}" : : "l"(ptr),
+       "h"((unsigned short) val) : "memory");));
 }
 
 } // namespace detail
diff --git a/cub/cub/device/dispatch/dispatch_transform.cuh b/cub/cub/device/dispatch/dispatch_transform.cuh
index 386a6276dfa..fa4fa80d0ef 100644
--- a/cub/cub/device/dispatch/dispatch_transform.cuh
+++ b/cub/cub/device/dispatch/dispatch_transform.cuh
@@ -169,11 +169,10 @@ _CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply_impl(F&& f, Tuple&& t, ::cuda::st
 }
 
 template <class F, class Tuple>
-_CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply(F&& f, Tuple&& t)
-  -> decltype(poor_apply_impl(
-    ::cuda::std::forward<F>(f),
-    ::cuda::std::forward<Tuple>(t),
-    ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t<Tuple>>::value>{}))
+_CCCL_DEVICE _CCCL_FORCEINLINE auto poor_apply(F&& f, Tuple&& t) -> decltype(poor_apply_impl(
+  ::cuda::std::forward<F>(f),
+  ::cuda::std::forward<Tuple>(t),
+  ::cuda::std::make_index_sequence<::cuda::std::tuple_size<::cuda::std::remove_reference_t<Tuple>>::value>{}))
 {
   return poor_apply_impl(
     ::cuda::std::forward<F>(f),
@@ -473,8 +472,9 @@ using needs_aligned_ptr_t =
 
 #ifdef _CUB_HAS_TRANSFORM_UBLKCP
 template <Algorithm Alg, typename It, ::cuda::std::enable_if_t<needs_aligned_ptr_t<Alg>::value, int> = 0>
-_CCCL_DEVICE _CCCL_FORCEINLINE auto select_kernel_arg(
-  ::cuda::std::integral_constant<Algorithm, Alg>, kernel_arg<It>&& arg) -> aligned_base_ptr<value_t<It>>&&
+_CCCL_DEVICE _CCCL_FORCEINLINE auto
+select_kernel_arg(::cuda::std::integral_constant<Algorithm, Alg>, kernel_arg<It>&& arg)
+  -> aligned_base_ptr<value_t<It>>&&
 {
   return ::cuda::std::move(arg.aligned_ptr);
 }
@@ -660,10 +660,9 @@ struct dispatch_t<RequiresStableAddress,
   // TODO(bgruber): I want to write tests for this but those are highly depending on the architecture we are running
   // on?
   template <typename ActivePolicy>
-  CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE auto configure_ublkcp_kernel()
-    -> PoorExpected<
-      ::cuda::std::
-        tuple<THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron, decltype(CUB_DETAIL_TRANSFORM_KERNEL_PTR), int>>
+  CUB_RUNTIME_FUNCTION _CCCL_VISIBILITY_HIDDEN _CCCL_FORCEINLINE auto configure_ublkcp_kernel() -> PoorExpected<
+    ::cuda::std::
+      tuple<THRUST_NS_QUALIFIER::cuda_cub::launcher::triple_chevron, decltype(CUB_DETAIL_TRANSFORM_KERNEL_PTR), int>>
   {
     using policy_t          = typename ActivePolicy::algo_policy;
     constexpr int block_dim = policy_t::block_threads;
diff --git a/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh b/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh
index c6894ccbc86..3645e4b9ed7 100644
--- a/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh
+++ b/cub/cub/device/dispatch/tuning/tuning_three_way_partition.cuh
@@ -255,9 +255,8 @@ struct policy_hub
                                     typename Tuning::delay_constructor>;
 
   template <typename Tuning>
-  static auto select_agent_policy(long) ->
-    typename DefaultPolicy<
-      default_delay_constructor_t<typename accumulator_pack_t<OffsetT>::pack_t>>::ThreeWayPartitionPolicy;
+  static auto select_agent_policy(long) -> typename DefaultPolicy<
+    default_delay_constructor_t<typename accumulator_pack_t<OffsetT>::pack_t>>::ThreeWayPartitionPolicy;
 
   struct Policy800 : ChainedPolicy<800, Policy800, Policy350>
   {
diff --git a/cub/cub/thread/thread_operators.cuh b/cub/cub/thread/thread_operators.cuh
index 7af32df392c..feef89776a9 100644
--- a/cub/cub/thread/thread_operators.cuh
+++ b/cub/cub/thread/thread_operators.cuh
@@ -391,8 +391,8 @@ struct CCCL_DEPRECATED BinaryFlip
   {}
 
   template <typename T, typename U>
-  _CCCL_DEVICE auto
-  operator()(T&& t, U&& u) -> decltype(binary_op(::cuda::std::forward<U>(u), ::cuda::std::forward<T>(t)))
+  _CCCL_DEVICE auto operator()(T&& t, U&& u)
+    -> decltype(binary_op(::cuda::std::forward<U>(u), ::cuda::std::forward<T>(t)))
   {
     return binary_op(::cuda::std::forward<U>(u), ::cuda::std::forward<T>(t));
   }
diff --git a/cub/cub/thread/thread_reduce.cuh b/cub/cub/thread/thread_reduce.cuh
index 294bc449e31..d3850051ca7 100644
--- a/cub/cub/thread/thread_reduce.cuh
+++ b/cub/cub/thread/thread_reduce.cuh
@@ -543,8 +543,8 @@ ThreadReduceTernaryTree(const Input& input, ReductionOp reduction_op)
 // never reached. Protect instantion of ThreadReduceSimd with arbitrary types and operators
 _CCCL_TEMPLATE(typename Input, typename ReductionOp)
 _CCCL_REQUIRES((!cub::internal::enable_generic_simd_reduction<Input, ReductionOp>()))
-_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto
-ThreadReduceSimd(const Input& input, ReductionOp) -> ::cuda::std::remove_cvref_t<decltype(input[0])>
+_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto ThreadReduceSimd(const Input& input, ReductionOp)
+  -> ::cuda::std::remove_cvref_t<decltype(input[0])>
 {
   assert(false);
   return input[0];
@@ -552,8 +552,8 @@ ThreadReduceSimd(const Input& input, ReductionOp) -> ::cuda::std::remove_cvref_t
 
 _CCCL_TEMPLATE(typename Input, typename ReductionOp)
 _CCCL_REQUIRES((cub::internal::enable_generic_simd_reduction<Input, ReductionOp>()))
-_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto
-ThreadReduceSimd(const Input& input, ReductionOp reduction_op) -> ::cuda::std::remove_cvref_t<decltype(input[0])>
+_CCCL_NODISCARD _CCCL_DEVICE _CCCL_FORCEINLINE auto ThreadReduceSimd(const Input& input, ReductionOp reduction_op)
+  -> ::cuda::std::remove_cvref_t<decltype(input[0])>
 {
   using cub::detail::unsafe_bitcast;
   using T                       = ::cuda::std::remove_cvref_t<decltype(input[0])>;
diff --git a/cub/test/catch2_test_device_for_each_in_extents.cu b/cub/test/catch2_test_device_for_each_in_extents.cu
index 8ad75a1d0cb..3e5a6c6689a 100644
--- a/cub/test/catch2_test_device_for_each_in_extents.cu
+++ b/cub/test/catch2_test_device_for_each_in_extents.cu
@@ -135,8 +135,8 @@ using dimensions =
                  cuda::std::index_sequence<3, 2, 5, 4>>;
 
 template <typename IndexType, size_t... Dimensions>
-auto build_static_extents(IndexType,
-                          cuda::std::index_sequence<Dimensions...>) -> cuda::std::extents<IndexType, Dimensions...>
+auto build_static_extents(IndexType, cuda::std::index_sequence<Dimensions...>)
+  -> cuda::std::extents<IndexType, Dimensions...>
 {
   return {};
 }
diff --git a/cub/test/catch2_test_device_transform.cu b/cub/test/catch2_test_device_transform.cu
index 06f2b7c31a7..95c4794b8cf 100644
--- a/cub/test/catch2_test_device_transform.cu
+++ b/cub/test/catch2_test_device_transform.cu
@@ -166,8 +166,8 @@ struct alignas(Alignment) overaligned_addable_t
     return a.value == b.value;
   }
 
-  _CCCL_HOST_DEVICE friend auto
-  operator+(const overaligned_addable_t& a, const overaligned_addable_t& b) -> overaligned_addable_t
+  _CCCL_HOST_DEVICE friend auto operator+(const overaligned_addable_t& a, const overaligned_addable_t& b)
+    -> overaligned_addable_t
   {
     check(a);
     check(b);
diff --git a/cub/test/test_block_radix_rank.cu b/cub/test/test_block_radix_rank.cu
index 8c1df1a80c7..c53c6b179e3 100644
--- a/cub/test/test_block_radix_rank.cu
+++ b/cub/test/test_block_radix_rank.cu
@@ -310,7 +310,7 @@ void Test()
   Test<cub::RadixRankAlgorithm::RADIX_RANK_BASIC, BlockThreads>();
   Test<cub::RadixRankAlgorithm::RADIX_RANK_MEMOIZE, BlockThreads>();
 
-  Test<BlockThreads>(cub::Int2Type<(BlockThreads % 32) == 0>{});
+  Test<BlockThreads>(cub::Int2Type < (BlockThreads % 32) == 0 > {});
 }
 
 int main(int argc, char** argv)
diff --git a/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh b/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh
index 459beddee22..ae8ad239d46 100644
--- a/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/basic_sender.cuh
@@ -60,8 +60,8 @@ struct receiver_defaults
   }
 
   template <class _Rcvr>
-  _CUDAX_TRIVIAL_API static auto
-  set_stopped(__ignore, _Rcvr& __rcvr) noexcept -> __async::completion_signatures<__async::set_stopped_t()>
+  _CUDAX_TRIVIAL_API static auto set_stopped(__ignore, _Rcvr& __rcvr) noexcept
+    -> __async::completion_signatures<__async::set_stopped_t()>
   {
     __async::set_stopped(static_cast<_Rcvr&&>(__rcvr));
     return {};
@@ -198,15 +198,15 @@ _CUDAX_TRIVIAL_API auto __make_opstate(_Sndr __sndr, _Rcvr __rcvr)
 }
 
 template <class _Data, class... _Sndrs>
-_CUDAX_TRIVIAL_API auto
-__get_attrs(int, const _Data& __data, const _Sndrs&... __sndrs) noexcept -> decltype(__data.get_attrs(__sndrs...))
+_CUDAX_TRIVIAL_API auto __get_attrs(int, const _Data& __data, const _Sndrs&... __sndrs) noexcept
+  -> decltype(__data.get_attrs(__sndrs...))
 {
   return __data.get_attrs(__sndrs...);
 }
 
 template <class _Data, class... _Sndrs>
-_CUDAX_TRIVIAL_API auto
-__get_attrs(long, const _Data&, const _Sndrs&... __sndrs) noexcept -> decltype(__async::get_env(__sndrs...))
+_CUDAX_TRIVIAL_API auto __get_attrs(long, const _Data&, const _Sndrs&... __sndrs) noexcept
+  -> decltype(__async::get_env(__sndrs...))
 {
   return __async::get_env(__sndrs...);
 }
diff --git a/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh b/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh
index 25d5ef04d76..868c911b1da 100644
--- a/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/completion_signatures.cuh
@@ -76,48 +76,36 @@ template <class _Sig, template <class...> class _Vy, template <class...> class _
 using __transform_sig_t = decltype(__transform_sig<_Sig, _Vy, _Ey, _Sy>());
 
 template <class _Sigs,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 extern _DIAGNOSTIC<_Sigs> __transform_completion_signatures_v;
 
 template <class... _What,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 extern __fn_t<_ERROR<_What...>>*
   __transform_completion_signatures_v<_ERROR<_What...>, _Vy, _Ey, _Sy, _Variant, _More...>;
 
 template <class... _Sigs,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 extern __fn_t<_Variant<__transform_sig_t<_Sigs, _Vy, _Ey, _Sy>..., _More...>>*
   __transform_completion_signatures_v<completion_signatures<_Sigs...>, _Vy, _Ey, _Sy, _Variant, _More...>;
 
 template <class _Sigs,
-          template <class...>
-          class _Vy,
-          template <class...>
-          class _Ey,
+          template <class...> class _Vy,
+          template <class...> class _Ey,
           class _Sy,
-          template <class...>
-          class _Variant,
+          template <class...> class _Variant,
           class... _More>
 using __transform_completion_signatures =
   decltype(__transform_completion_signatures_v<_Sigs, _Vy, _Ey, _Sy, _Variant, _More...>());
@@ -129,12 +117,9 @@ template <>
 struct __gather_sigs_fn<set_value_t>
 {
   template <class _Sigs,
-            template <class...>
-            class _Then,
-            template <class...>
-            class _Else,
-            template <class...>
-            class _Variant,
+            template <class...> class _Then,
+            template <class...> class _Else,
+            template <class...> class _Variant,
             class... _More>
   using __call = __transform_completion_signatures<
     _Sigs,
@@ -149,12 +134,9 @@ template <>
 struct __gather_sigs_fn<set_error_t>
 {
   template <class _Sigs,
-            template <class...>
-            class _Then,
-            template <class...>
-            class _Else,
-            template <class...>
-            class _Variant,
+            template <class...> class _Then,
+            template <class...> class _Else,
+            template <class...> class _Variant,
             class... _More>
   using __call = __transform_completion_signatures<
     _Sigs,
@@ -169,12 +151,9 @@ template <>
 struct __gather_sigs_fn<set_stopped_t>
 {
   template <class _Sigs,
-            template <class...>
-            class _Then,
-            template <class...>
-            class _Else,
-            template <class...>
-            class _Variant,
+            template <class...> class _Then,
+            template <class...> class _Else,
+            template <class...> class _Variant,
             class... _More>
   using __call = __transform_completion_signatures<
     _Sigs,
@@ -187,12 +166,9 @@ struct __gather_sigs_fn<set_stopped_t>
 
 template <class _Sigs,
           class _WantedTag,
-          template <class...>
-          class _Then,
-          template <class...>
-          class _Else,
-          template <class...>
-          class _Variant,
+          template <class...> class _Then,
+          template <class...> class _Else,
+          template <class...> class _Variant,
           class... _More>
 using __gather_completion_signatures =
   typename __gather_sigs_fn<_WantedTag>::template __call<_Sigs, _Then, _Else, _Variant, _More...>;
@@ -404,13 +380,12 @@ template <class _Tag, class... _Args>
 auto completion(_Tag, _Args&&...) -> __csig::__sigs<_Tag(_Args...)>&;
 
 template <class _Sndr, class _Rcvr = receiver_archetype>
-auto completions_of(_Sndr&&,
-                    _Rcvr = {}) -> decltype(__csig::__to_sigs(__declval<completion_signatures_of_t<_Sndr, _Rcvr>&>()));
+auto completions_of(_Sndr&&, _Rcvr = {})
+  -> decltype(__csig::__to_sigs(__declval<completion_signatures_of_t<_Sndr, _Rcvr>&>()));
 
 template <bool _PotentiallyThrowing>
-auto eptr_completion_if()
-  -> _CUDA_VSTD::
-    conditional_t<_PotentiallyThrowing, __csig::__sigs<set_error_t(::std::exception_ptr)>, __csig::__sigs<>>&;
+auto eptr_completion_if() -> _CUDA_VSTD::
+  conditional_t<_PotentiallyThrowing, __csig::__sigs<set_error_t(::std::exception_ptr)>, __csig::__sigs<>>&;
 } // namespace meta
 } // namespace cuda::experimental::__async
 
diff --git a/cudax/include/cuda/experimental/__async/sender/continue_on.cuh b/cudax/include/cuda/experimental/__async/sender/continue_on.cuh
index 9a0c142e21c..8da87a443a3 100644
--- a/cudax/include/cuda/experimental/__async/sender/continue_on.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/continue_on.cuh
@@ -267,8 +267,8 @@ struct continue_on_t::__sndr_t
 };
 
 template <class _Sndr, class _Sch>
-_CUDAX_API auto
-continue_on_t::operator()(_Sndr __sndr, _Sch __sch) const noexcept -> continue_on_t::__sndr_t<_Sndr, _Sch>
+_CUDAX_API auto continue_on_t::operator()(_Sndr __sndr, _Sch __sch) const noexcept
+  -> continue_on_t::__sndr_t<_Sndr, _Sch>
 {
   return __sndr_t<_Sndr, _Sch>{{}, __sch, static_cast<_Sndr&&>(__sndr)};
 }
diff --git a/cudax/include/cuda/experimental/__async/sender/cpos.cuh b/cudax/include/cuda/experimental/__async/sender/cpos.cuh
index 7f1fb383a71..dab62e7ac10 100644
--- a/cudax/include/cuda/experimental/__async/sender/cpos.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/cpos.cuh
@@ -110,8 +110,8 @@ _CCCL_GLOBAL_CONSTANT struct set_error_t
 _CCCL_GLOBAL_CONSTANT struct set_stopped_t
 {
   template <class _Rcvr>
-  _CUDAX_TRIVIAL_API auto
-  operator()(_Rcvr&& __rcvr) const noexcept -> decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped())
+  _CUDAX_TRIVIAL_API auto operator()(_Rcvr&& __rcvr) const noexcept
+    -> decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped())
   {
     static_assert(_CUDA_VSTD::is_same_v<decltype(static_cast<_Rcvr&&>(__rcvr).set_stopped()), void>);
     static_assert(noexcept(static_cast<_Rcvr&&>(__rcvr).set_stopped()));
@@ -119,8 +119,8 @@ _CCCL_GLOBAL_CONSTANT struct set_stopped_t
   }
 
   template <class _Rcvr>
-  _CUDAX_TRIVIAL_API auto
-  operator()(_Rcvr* __rcvr) const noexcept -> decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped())
+  _CUDAX_TRIVIAL_API auto operator()(_Rcvr* __rcvr) const noexcept
+    -> decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped())
   {
     static_assert(_CUDA_VSTD::is_same_v<decltype(static_cast<_Rcvr&&>(*__rcvr).set_stopped()), void>);
     static_assert(noexcept(static_cast<_Rcvr&&>(*__rcvr).set_stopped()));
diff --git a/cudax/include/cuda/experimental/__async/sender/let_value.cuh b/cudax/include/cuda/experimental/__async/sender/let_value.cuh
index 7d06e071fe0..6742a1c1d6c 100644
--- a/cudax/include/cuda/experimental/__async/sender/let_value.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/let_value.cuh
@@ -243,8 +243,9 @@ private:
     _Sndr __sndr_;
 
     template <class _Rcvr>
-    _CUDAX_API auto connect(_Rcvr __rcvr) && noexcept(
-      __nothrow_constructible<__opstate_t<_Rcvr, _Sndr, _Fn>, _Sndr, _Fn, _Rcvr>) -> __opstate_t<_Rcvr, _Sndr, _Fn>
+    _CUDAX_API auto
+    connect(_Rcvr __rcvr) && noexcept(__nothrow_constructible<__opstate_t<_Rcvr, _Sndr, _Fn>, _Sndr, _Fn, _Rcvr>)
+      -> __opstate_t<_Rcvr, _Sndr, _Fn>
     {
       return __opstate_t<_Rcvr, _Sndr, _Fn>(
         static_cast<_Sndr&&>(__sndr_), static_cast<_Fn&&>(__fn_), static_cast<_Rcvr&&>(__rcvr));
diff --git a/cudax/include/cuda/experimental/__async/sender/stop_token.cuh b/cudax/include/cuda/experimental/__async/sender/stop_token.cuh
index 35e6d4d164a..693816dbb45 100644
--- a/cudax/include/cuda/experimental/__async/sender/stop_token.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/stop_token.cuh
@@ -369,8 +369,8 @@ _CUDAX_API inline void inplace_stop_source::__unlock(uint8_t __old_state) const
   (void) __state_.store(__old_state, _CUDA_VSTD::memory_order_release);
 }
 
-_CUDAX_API inline auto
-inplace_stop_source::__try_lock_unless_stop_requested(bool __set_stop_requested) const noexcept -> bool
+_CUDAX_API inline auto inplace_stop_source::__try_lock_unless_stop_requested(bool __set_stop_requested) const noexcept
+  -> bool
 {
   __stok::__spin_wait __spin;
   auto __old_state = __state_.load(_CUDA_VSTD::memory_order_relaxed);
diff --git a/cudax/include/cuda/experimental/__async/sender/tuple.cuh b/cudax/include/cuda/experimental/__async/sender/tuple.cuh
index 98a1d0997f1..0229ed8b9c7 100644
--- a/cudax/include/cuda/experimental/__async/sender/tuple.cuh
+++ b/cudax/include/cuda/experimental/__async/sender/tuple.cuh
@@ -65,8 +65,8 @@ struct __tupl<_CUDA_VSTD::index_sequence<_Idx...>, _Ts...> : __box<_Idx, _Ts>...
 
   template <class _Fn, class _Self, class... _Us>
   _CUDAX_TRIVIAL_API static auto __for_each(_Fn&& __fn, _Self&& __self, _Us&&... __us) //
-    noexcept((__nothrow_callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>>
-              && ...)) -> _CUDA_VSTD::enable_if_t<(__callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...)>
+    noexcept((__nothrow_callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...))
+      -> _CUDA_VSTD::enable_if_t<(__callable<_Fn, _Us..., __copy_cvref_t<_Self, _Ts>> && ...)>
   {
     return (
       static_cast<_Fn&&>(__fn)(static_cast<_Us&&>(__us)..., static_cast<_Self&&>(__self).__box<_Idx, _Ts>::__value_),
diff --git a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
index 8a42bab40ca..0e1dceff19b 100644
--- a/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
+++ b/cudax/include/cuda/experimental/__memory_resource/any_resource.cuh
@@ -80,8 +80,8 @@ struct __with_property
   template <class...>
   struct __iproperty : interface<__iproperty>
   {
-    _CUDAX_HOST_API friend auto
-    get_property([[maybe_unused]] const __iproperty& __obj, _Property) -> __property_result_t<_Property>
+    _CUDAX_HOST_API friend auto get_property([[maybe_unused]] const __iproperty& __obj, _Property)
+      -> __property_result_t<_Property>
     {
       if constexpr (!_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>)
       {
@@ -268,8 +268,8 @@ template <class _Derived>
 struct __with_try_get_property
 {
   template <class _Property>
-  _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto
-  try_get_property(const _Derived& __self, _Property) noexcept -> __try_property_result_t<_Property>
+  _CUDAX_HOST_API _CCCL_NODISCARD_FRIEND auto try_get_property(const _Derived& __self, _Property) noexcept
+    -> __try_property_result_t<_Property>
   {
     auto __prop = __cudax::dynamic_any_cast<const __iproperty<_Property>*>(&__self);
     if constexpr (_CUDA_VSTD::is_same_v<__property_result_t<_Property>, void>)
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh
index 5b64dbc531d..bd481b3dea2 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_from.cuh
@@ -50,8 +50,8 @@ _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super>&
 }
 
 template <template <class...> class _Interface, class _Super>
-_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-basic_any_from(_Interface<_Super> const& __self) noexcept -> basic_any<_Super> const&
+_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super> const& __self) noexcept
+  -> basic_any<_Super> const&
 {
   return static_cast<basic_any<_Super> const&>(__self);
 }
@@ -72,8 +72,8 @@ _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super>*
 }
 
 template <template <class...> class _Interface, class _Super>
-_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-basic_any_from(_Interface<_Super> const* __self) noexcept -> basic_any<_Super> const*
+_CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto basic_any_from(_Interface<_Super> const* __self) noexcept
+  -> basic_any<_Super> const*
 {
   return static_cast<basic_any<_Super> const*>(__self);
 }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh
index 8c9e67e757d..03e05648dae 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/basic_any_ptr.cuh
@@ -169,8 +169,8 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT basic_any<_Interface*>
   //!
   _CCCL_TEMPLATE(class _Tp, class _Up = _CUDA_VSTD::remove_pointer_t<_Tp>, class _Vp = _CUDA_VSTD::remove_const_t<_Up>)
   _CCCL_REQUIRES(__satisfies<_Vp, _Interface> _CCCL_AND(__is_const_ptr || !_CUDA_VSTD::is_const_v<_Up>))
-  _CUDAX_HOST_API auto
-  emplace(_CUDA_VSTD::type_identity_t<_Up>* __obj) noexcept -> _CUDA_VSTD::__maybe_const<__is_const_ptr, _Vp>*&
+  _CUDAX_HOST_API auto emplace(_CUDA_VSTD::type_identity_t<_Up>* __obj) noexcept
+    -> _CUDA_VSTD::__maybe_const<__is_const_ptr, _Vp>*&
   {
     __vptr_for<interface_type> __vptr = __cudax::__get_vtable_ptr_for<interface_type, _Vp>();
     __ref_.__set_ref(__obj ? __vptr : nullptr, __obj);
@@ -184,7 +184,8 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT basic_any<_Interface*>
     return *static_cast<__void_ptr_t>(__get_optr()) == *static_cast<__void_ptr_t>(__other.__get_optr());
   }
 #else // ^^^ !_CCCL_NO_THREE_WAY_COMPARISON ^^^ / vvv _CCCL_NO_THREE_WAY_COMPARISON vvv
-  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API auto operator==(basic_any const& __lhs, basic_any const& __rhs) noexcept -> bool
+  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API auto operator==(basic_any const& __lhs, basic_any const& __rhs) noexcept
+    -> bool
   {
     using __void_ptr_t _CCCL_NODEBUG_ALIAS = _CUDA_VSTD::__maybe_const<__is_const_ptr, void>* const*;
     return *static_cast<__void_ptr_t>(__lhs.__get_optr()) == *static_cast<__void_ptr_t>(__rhs.__get_optr());
@@ -271,8 +272,8 @@ private:
     return &__ref_.__optr_;
   }
 
-  _CCCL_NODISCARD _CUDAX_HOST_API auto
-  __get_optr() const noexcept -> _CUDA_VSTD::__maybe_const<__is_const_ptr, void>* const*
+  _CCCL_NODISCARD _CUDAX_HOST_API auto __get_optr() const noexcept
+    -> _CUDA_VSTD::__maybe_const<__is_const_ptr, void>* const*
   {
     return &__ref_.__optr_;
   }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
index 435e43ee699..fcd05a6600b 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/interfaces.cuh
@@ -318,22 +318,22 @@ template <template <class...> class _Interface>
 struct __interface_cast_fn<_Interface<>>
 {
   template <class _Super>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-  operator()(_Interface<_Super>&& __self) const noexcept -> _Interface<_Super>&&
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto operator()(_Interface<_Super>&& __self) const noexcept
+    -> _Interface<_Super>&&
   {
     return _CUDA_VSTD::move(__self);
   }
 
   template <class _Super>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-  operator()(_Interface<_Super>& __self) const noexcept -> _Interface<_Super>&
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto operator()(_Interface<_Super>& __self) const noexcept
+    -> _Interface<_Super>&
   {
     return __self;
   }
 
   template <class _Super>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto
-  operator()(_Interface<_Super> const& __self) noexcept -> _Interface<_Super> const&
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API auto operator()(_Interface<_Super> const& __self) noexcept
+    -> _Interface<_Super> const&
   {
     return __self;
   }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh b/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh
index d16fdc43fd7..419be0e7660 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/iset.cuh
@@ -102,8 +102,8 @@ struct __iset_vptr : __base_vptr
   }
 
   template <class _Interface>
-  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API constexpr auto
-  __query_interface(_Interface) const noexcept -> __vptr_for<_Interface>
+  _CCCL_NODISCARD _CUDAX_TRIVIAL_HOST_API constexpr auto __query_interface(_Interface) const noexcept
+    -> __vptr_for<_Interface>
   {
     if (__vptr_->__kind_ == __vtable_kind::__normal)
     {
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh b/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh
index cda9a72e789..470c7c84228 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/rtti.cuh
@@ -137,8 +137,8 @@ struct __rtti : __rtti_base
   {}
 
   template <class... _Interfaces>
-  _CCCL_NODISCARD _CUDAX_HOST_API auto
-  __query_interface(__iset<_Interfaces...>) const noexcept -> __vptr_for<__iset<_Interfaces...>>
+  _CCCL_NODISCARD _CUDAX_HOST_API auto __query_interface(__iset<_Interfaces...>) const noexcept
+    -> __vptr_for<__iset<_Interfaces...>>
   {
     // TODO: find a way to check at runtime that the requested __iset is a subset
     // of the interfaces in the vtable.
@@ -201,8 +201,8 @@ struct __rtti_ex : __rtti
 //! interfaces.
 //!
 template <class _SrcInterface, class _DstInterface>
-_CCCL_NODISCARD _CUDAX_HOST_API auto
-__try_vptr_cast(__vptr_for<_SrcInterface> __src_vptr) noexcept -> __vptr_for<_DstInterface>
+_CCCL_NODISCARD _CUDAX_HOST_API auto __try_vptr_cast(__vptr_for<_SrcInterface> __src_vptr) noexcept
+  -> __vptr_for<_DstInterface>
 {
   static_assert(_CUDA_VSTD::is_class_v<_SrcInterface> && _CUDA_VSTD::is_class_v<_DstInterface>, "expected class types");
   if (__src_vptr == nullptr)
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh b/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh
index 1d41a05d05f..902477040ab 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/virtual_ptrs.cuh
@@ -61,12 +61,14 @@ struct __base_vptr
 #if !defined(_CCCL_NO_THREE_WAY_COMPARISON)
   bool operator==(__base_vptr const& __other) const noexcept = default;
 #else // ^^^ !_CCCL_NO_THREE_WAY_COMPARISON ^^^ / vvv _CCCL_NO_THREE_WAY_COMPARISON vvv
-  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator==(__base_vptr __lhs, __base_vptr __rhs) noexcept -> bool
+  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator==(__base_vptr __lhs, __base_vptr __rhs) noexcept
+    -> bool
   {
     return __lhs.__vptr_ == __rhs.__vptr_;
   }
 
-  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator!=(__base_vptr __lhs, __base_vptr __rhs) noexcept -> bool
+  _CCCL_NODISCARD_FRIEND _CUDAX_HOST_API constexpr auto operator!=(__base_vptr __lhs, __base_vptr __rhs) noexcept
+    -> bool
   {
     return !(__lhs == __rhs);
   }
diff --git a/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh b/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh
index a673ecd7746..289be9d8112 100644
--- a/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh
+++ b/cudax/include/cuda/experimental/__utility/basic_any/virtual_tables.cuh
@@ -71,8 +71,8 @@ struct _CCCL_DECLSPEC_EMPTY_BASES __basic_vtable
   }
 
   template <class... _Others>
-  _CCCL_NODISCARD _CUDAX_HOST_API auto
-  __query_interface(__iset<_Others...>) const noexcept -> __vptr_for<__iset<_Others...>>
+  _CCCL_NODISCARD _CUDAX_HOST_API auto __query_interface(__iset<_Others...>) const noexcept
+    -> __vptr_for<__iset<_Others...>>
   {
     using __remainder _CCCL_NODEBUG_ALIAS =
       _CUDA_VSTD::__type_list_size<_CUDA_VSTD::__type_find<__unique_interfaces<interface>, __iset<_Others...>>>;
diff --git a/cudax/test/stf/error_checks/ctx_mismatch.cu b/cudax/test/stf/error_checks/ctx_mismatch.cu
index c04d589c367..6e44900393c 100644
--- a/cudax/test/stf/error_checks/ctx_mismatch.cu
+++ b/cudax/test/stf/error_checks/ctx_mismatch.cu
@@ -56,8 +56,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/data_interface_mismatch.cu b/cudax/test/stf/error_checks/data_interface_mismatch.cu
index ea2ada7e633..400b913fa10 100644
--- a/cudax/test/stf/error_checks/data_interface_mismatch.cu
+++ b/cudax/test/stf/error_checks/data_interface_mismatch.cu
@@ -66,8 +66,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/double_finalize.cu b/cudax/test/stf/error_checks/double_finalize.cu
index 37913ca6e36..cae7cecfc50 100644
--- a/cudax/test/stf/error_checks/double_finalize.cu
+++ b/cudax/test/stf/error_checks/double_finalize.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/erase_frozen.cu b/cudax/test/stf/error_checks/erase_frozen.cu
index 624dfb062f8..eaec786bf96 100644
--- a/cudax/test/stf/error_checks/erase_frozen.cu
+++ b/cudax/test/stf/error_checks/erase_frozen.cu
@@ -43,8 +43,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu b/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu
index fa28e5467e0..4b04ae3a182 100644
--- a/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu
+++ b/cudax/test/stf/error_checks/misformed_tasks_dbl_end.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu b/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu
index b35cb99457f..84d3e33518f 100644
--- a/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu
+++ b/cudax/test/stf/error_checks/misformed_tasks_dbl_start.cu
@@ -40,8 +40,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/non_managed_data.cu b/cudax/test/stf/error_checks/non_managed_data.cu
index a1188c7750f..6ac0121470d 100644
--- a/cudax/test/stf/error_checks/non_managed_data.cu
+++ b/cudax/test/stf/error_checks/non_managed_data.cu
@@ -44,8 +44,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/slice_check_bounds.cu b/cudax/test/stf/error_checks/slice_check_bounds.cu
index fecea9e7a55..a6b77c6bcc5 100644
--- a/cudax/test/stf/error_checks/slice_check_bounds.cu
+++ b/cudax/test/stf/error_checks/slice_check_bounds.cu
@@ -51,8 +51,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/uninitialized_data.cu b/cudax/test/stf/error_checks/uninitialized_data.cu
index 6af57556ad5..cf30b023bfb 100644
--- a/cudax/test/stf/error_checks/uninitialized_data.cu
+++ b/cudax/test/stf/error_checks/uninitialized_data.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/unsatisfiable_spec.cu b/cudax/test/stf/error_checks/unsatisfiable_spec.cu
index a0e4277979c..041b535fe61 100644
--- a/cudax/test/stf/error_checks/unsatisfiable_spec.cu
+++ b/cudax/test/stf/error_checks/unsatisfiable_spec.cu
@@ -42,8 +42,7 @@ int main()
 #  if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #  else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/cudax/test/stf/error_checks/write_frozen.cu b/cudax/test/stf/error_checks/write_frozen.cu
index b4e08642a5e..011f4afd88e 100644
--- a/cudax/test/stf/error_checks/write_frozen.cu
+++ b/cudax/test/stf/error_checks/write_frozen.cu
@@ -43,8 +43,7 @@ int main()
 #if _CCCL_COMPILER(MSVC)
   signal(SIGABRT, &cleanupRoutine);
 #else // ^^^ _CCCL_COMPILER(MSVC) ^^^ / vvv !_CCCL_COMPILER(MSVC)
-  struct sigaction sigabrt_action
-  {};
+  struct sigaction sigabrt_action{};
   memset(&sigabrt_action, 0, sizeof(sigabrt_action));
   sigabrt_action.sa_handler = &cleanupRoutine;
 
diff --git a/libcudacxx/include/cuda/__barrier/barrier_block_scope.h b/libcudacxx/include/cuda/__barrier/barrier_block_scope.h
index 13027dfc581..af38ab97c8b 100644
--- a/libcudacxx/include/cuda/__barrier/barrier_block_scope.h
+++ b/libcudacxx/include/cuda/__barrier/barrier_block_scope.h
@@ -157,8 +157,7 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
         int __inc              = __popc(__active) * __update;
 
         unsigned __laneid;
-        asm("mov.u32 %0, %%laneid;"
-            : "=r"(__laneid));
+        asm("mov.u32 %0, %%laneid;" : "=r"(__laneid));
         int __leader = __ffs(__active) - 1;
         // All threads in mask synchronize here, establishing cummulativity to the __leader:
         __syncwarp(__mask);
@@ -181,10 +180,8 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
                     ".reg .pred p;\n\t"
                     "mbarrier.test_wait.shared.b64 p, [%1], %2;\n\t"
                     "selp.b32 %0, 1, 0, p;\n\t"
-                    "}"
-                    : "=r"(__ready)
-                    : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))), "l"(__token)
-                    : "memory");))
+                    "}" : "=r"(__ready) : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                    "l"(__token) : "memory");))
     return __ready;
   }
 
@@ -203,10 +200,9 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
                        ".reg .pred p;\n\t"
                        "mbarrier.try_wait.shared.b64 p, [%1], %2;\n\t"
                        "selp.b32 %0, 1, 0, p;\n\t"
-                       "}"
-                       : "=r"(__ready)
-                       : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))), "l"(__token)
-                       : "memory");
+                       "}" : "=r"(__ready) : "r"(
+                         static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                       "l"(__token) : "memory");
         return __ready;),
       NV_PROVIDES_SM_80,
       (if (!__isShared(&__barrier)) {
@@ -278,15 +274,12 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
     uint16_t __ready = 0;
     NV_DISPATCH_TARGET(
       NV_PROVIDES_SM_80,
-      (asm volatile(
-         "{"
-         ".reg .pred %%p;"
-         "mbarrier.test_wait.parity.shared.b64 %%p, [%1], %2;"
-         "selp.u16 %0, 1, 0, %%p;"
-         "}"
-         : "=h"(__ready)
-         : "r"(static_cast<uint32_t>(__cvta_generic_to_shared(&__barrier))), "r"(static_cast<uint32_t>(__phase_parity))
-         : "memory");))
+      (asm volatile("{"
+                    ".reg .pred %%p;"
+                    "mbarrier.test_wait.parity.shared.b64 %%p, [%1], %2;"
+                    "selp.u16 %0, 1, 0, %%p;"
+                    "}" : "=h"(__ready) : "r"(static_cast<uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                    "r"(static_cast<uint32_t>(__phase_parity)) : "memory");))
     return __ready;
   }
 
@@ -299,16 +292,12 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
           return _CUDA_VSTD::__call_try_wait_parity(__barrier, __phase_parity);
         } else if (!__isShared(&__barrier)) { __trap(); } int32_t __ready = 0;
 
-        asm volatile(
-          "{\n\t"
-          ".reg .pred p;\n\t"
-          "mbarrier.try_wait.parity.shared.b64 p, [%1], %2;\n\t"
-          "selp.b32 %0, 1, 0, p;\n\t"
-          "}"
-          : "=r"(__ready)
-          : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
-            "r"(static_cast<_CUDA_VSTD::uint32_t>(__phase_parity))
-          :);
+        asm volatile("{\n\t"
+                     ".reg .pred p;\n\t"
+                     "mbarrier.try_wait.parity.shared.b64 p, [%1], %2;\n\t"
+                     "selp.b32 %0, 1, 0, p;\n\t"
+                     "}" : "=r"(__ready) : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))),
+                     "r"(static_cast<_CUDA_VSTD::uint32_t>(__phase_parity)) :);
 
         return __ready;),
       NV_PROVIDES_SM_80,
@@ -402,9 +391,8 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
           __trap();
         }
 
-        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(static_cast<_CUDA_VSTD::uint32_t>(
-          __cvta_generic_to_shared(&__barrier)))
-                     : "memory");),
+        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(
+          static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))) : "memory");),
       NV_PROVIDES_SM_80,
       (
         // Fallback to slowpath on device
@@ -413,9 +401,8 @@ class barrier<thread_scope_block, _CUDA_VSTD::__empty_completion> : public __blo
           return;
         }
 
-        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(static_cast<_CUDA_VSTD::uint32_t>(
-          __cvta_generic_to_shared(&__barrier)))
-                     : "memory");),
+        asm volatile("mbarrier.arrive_drop.shared.b64 _, [%0];" ::"r"(
+          static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(&__barrier))) : "memory");),
       NV_ANY_TARGET,
       (
         // Fallback to slowpath on device
diff --git a/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h b/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h
index 4d9f063512f..1b8fc49d400 100644
--- a/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h
+++ b/libcudacxx/include/cuda/__barrier/barrier_expect_tx.h
@@ -54,11 +54,8 @@ barrier_expect_tx(barrier<thread_scope_block>& __b, _CUDA_VSTD::ptrdiff_t __tran
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (auto __bh = __cvta_generic_to_shared(barrier_native_handle(__b));
-     asm("mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;"
-         :
-         : "r"(static_cast<_CUDA_VSTD::uint32_t>(__bh)),
-           "r"(static_cast<_CUDA_VSTD::uint32_t>(__transaction_count_update))
-         : "memory");),
+     asm("mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" : : "r"(static_cast<_CUDA_VSTD::uint32_t>(__bh)),
+         "r"(static_cast<_CUDA_VSTD::uint32_t>(__transaction_count_update)) : "memory");),
     (__cuda_ptx_barrier_expect_tx_is_not_supported_before_SM_90__();));
 }
 
diff --git a/libcudacxx/include/cuda/__functional/address_stability.h b/libcudacxx/include/cuda/__functional/address_stability.h
index d3c88b8bfa4..3402c3cea0e 100644
--- a/libcudacxx/include/cuda/__functional/address_stability.h
+++ b/libcudacxx/include/cuda/__functional/address_stability.h
@@ -68,8 +68,8 @@ struct proclaims_copyable_arguments<__callable_permitting_copied_arguments<F>> :
 //! implementation.
 //! @see proclaims_copyable_arguments
 template <typename F>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-proclaim_copyable_arguments(F&& f) -> __callable_permitting_copied_arguments<::cuda::std::decay_t<F>>
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto proclaim_copyable_arguments(F&& f)
+  -> __callable_permitting_copied_arguments<::cuda::std::decay_t<F>>
 {
   return {::cuda::std::forward<F>(f)};
 }
diff --git a/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h b/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h
index 867dad16111..07eb5c84c93 100644
--- a/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h
+++ b/libcudacxx/include/cuda/__memcpy_async/cp_async_shared_global.h
@@ -47,12 +47,10 @@ inline __device__ void __cp_async_shared_global(char* __dest, const char* __src)
 
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (asm volatile("cp.async.ca.shared.global [%0], [%1], %2, %2;"
-                  :
-                  : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
-                    "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
-                    "n"(_Copy_size)
-                  : "memory");),
+    (asm volatile("cp.async.ca.shared.global [%0], [%1], %2, %2;" : : "r"(
+                    static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
+                  "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
+                  "n"(_Copy_size) : "memory");),
     (__cuda_ptx_cp_async_shared_global_is_not_supported_before_SM_80__();));
 }
 
@@ -63,12 +61,10 @@ inline __device__ void __cp_async_shared_global<16>(char* __dest, const char* __
   // When copying 16 bytes, it is possible to skip L1 cache (.cg).
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (asm volatile("cp.async.cg.shared.global [%0], [%1], %2, %2;"
-                  :
-                  : "r"(static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
-                    "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
-                    "n"(16)
-                  : "memory");),
+    (asm volatile("cp.async.cg.shared.global [%0], [%1], %2, %2;" : : "r"(
+                    static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__dest))),
+                  "l"(static_cast<_CUDA_VSTD::uint64_t>(__cvta_generic_to_global(__src))),
+                  "n"(16) : "memory");),
     (__cuda_ptx_cp_async_shared_global_is_not_supported_before_SM_80__();));
 }
 
diff --git a/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h b/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h
index 182357d591a..3dd3e91d125 100644
--- a/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h
+++ b/libcudacxx/include/cuda/__memcpy_async/memcpy_completion.h
@@ -76,9 +76,8 @@ struct __memcpy_completion_impl
             // have completed writing to shared memory.
             _CUDA_VSTD::uint64_t* __bh = __try_get_barrier_handle(__barrier);
 
-            asm volatile("cp.async.mbarrier.arrive.shared.b64 [%0];" ::"r"(static_cast<_CUDA_VSTD::uint32_t>(
-              __cvta_generic_to_shared(__bh)))
-                         : "memory");));
+            asm volatile("cp.async.mbarrier.arrive.shared.b64 [%0];" ::"r"(
+              static_cast<_CUDA_VSTD::uint32_t>(__cvta_generic_to_shared(__bh))) : "memory");));
         return async_contract_fulfillment::async;
       case __completion_mechanism::__async_bulk_group:
         // This completion mechanism should not be used with a shared
@@ -124,8 +123,7 @@ struct __memcpy_completion_impl
                      (
                        // Blocking: wait for all thread-local cp.async instructions to have
                        // completed writing to shared memory.
-                       asm volatile("cp.async.wait_all;" ::
-                                      : "memory");));
+                       asm volatile("cp.async.wait_all;" :: : "memory");));
         return async_contract_fulfillment::async;
       case __completion_mechanism::__mbarrier_complete_tx:
         // Non-smem barriers do not have an mbarrier_complete_tx mechanism..
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h b/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h
index 10d55714c5b..c8ce41c0a20 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/barrier_cluster.h
@@ -16,10 +16,7 @@ _CCCL_DEVICE static inline void barrier_cluster_arrive()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.arrive;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.arrive;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_arrive_is_not_supported_before_SM_90__();));
@@ -39,10 +36,7 @@ _CCCL_DEVICE static inline void barrier_cluster_wait()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.wait;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.wait;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_wait_is_not_supported_before_SM_90__();));
@@ -65,10 +59,7 @@ _CCCL_DEVICE static inline void barrier_cluster_arrive(sem_release_t)
   // __sem == sem_release (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.arrive.release;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.arrive.release;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_arrive_is_not_supported_before_SM_90__();));
@@ -91,10 +82,7 @@ _CCCL_DEVICE static inline void barrier_cluster_arrive(sem_relaxed_t)
   // __sem == sem_relaxed (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.arrive.relaxed;"
-                  :
-                  :
-                  :);),
+    (asm volatile("barrier.cluster.arrive.relaxed;" : : :);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_arrive_is_not_supported_before_SM_90__();));
@@ -117,10 +105,7 @@ _CCCL_DEVICE static inline void barrier_cluster_wait(sem_acquire_t)
   // __sem == sem_acquire (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("barrier.cluster.wait.acquire;"
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("barrier.cluster.wait.acquire;" : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_barrier_cluster_wait_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h
index 8ba40d45f64..d2196402e7a 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk.h
@@ -32,10 +32,11 @@ _CCCL_DEVICE static inline void cp_async_bulk(
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // 1a. unicast"
-         :
-         : "r"(__as_ptr_smem(__dstMem)), "l"(__as_ptr_gmem(__srcMem)), "r"(__size), "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+    (asm("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // "
+         "1a. unicast" : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__as_ptr_gmem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90__();));
@@ -70,13 +71,11 @@ _CCCL_DEVICE static inline void cp_async_bulk(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // 2. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+    (asm("cp.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3]; // 2. " : : "r"(
+           __as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90__();));
@@ -105,10 +104,9 @@ cp_async_bulk(space_global_t, space_shared_t, void* __dstMem, const void* __srcM
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.global.shared::cta.bulk_group [%0], [%1], %2; // 3. "
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.async.bulk.global.shared::cta.bulk_group [%0], [%1], %2; // 3. " : : "l"(__as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h
index 7bb58675ddb..3c32743e977 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_commit_group.h
@@ -15,10 +15,7 @@ _CCCL_DEVICE static inline void cp_async_bulk_commit_group()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("cp.async.bulk.commit_group;"
-                  :
-                  :
-                  :);),
+    (asm volatile("cp.async.bulk.commit_group;" : : :);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_commit_group_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h
index a5534ef0b48..f54bf8bbdeb 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_multicast.h
@@ -35,14 +35,11 @@ _CCCL_DEVICE static inline void cp_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes.multicast::cluster [%0], [%1], %2, [%3], "
-         "%4; // 1. "
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__as_ptr_gmem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "%4; // 1. " : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__as_ptr_gmem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_is_not_supported_before_SM_90a__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h
index 3cbd26fda04..f7c60bb72f6 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor.h
@@ -33,10 +33,10 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2}], [%3];// "
-         "1a."
-         :
-         : "r"(__as_ptr_smem(__dstMem)), "l"(__tensorMap), "r"(__tensorCoords[0]), "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "1a." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -69,10 +69,9 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%0, {%1}], [%2]; // 3a."
-         :
-         : "l"(__tensorMap), "r"(__tensorCoords[0]), "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.1d.global.shared::cta.tile.bulk_group [%0, {%1}], [%2]; // 3a." : : "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -109,14 +108,11 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3}], "
-         "[%4];// 1b."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "[%4];// 1b." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -149,10 +145,10 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%0, {%1, %2}], [%3]; // 3b."
-         :
-         : "l"(__tensorMap), "r"(__tensorCoords[0]), "r"(__tensorCoords[1]), "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.2d.global.shared::cta.tile.bulk_group [%0, {%1, %2}], [%3]; // 3b." : : "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -189,15 +185,12 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3, %4}], "
-         "[%5];// 1c."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "[%5];// 1c." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -230,14 +223,12 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3}], [%4]; // 3c."
-         :
-         : "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.3d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3}], [%4]; // 3c." : : "l"(
+           __tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -274,16 +265,13 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3, %4, "
-         "%5}], [%6];// 1d."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "%5}], [%6];// 1d." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -316,15 +304,13 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4}], [%5]; // 3d."
-         :
-         : "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.4d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4}], [%5]; // 3d." : : "l"(
+           __tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -361,17 +347,14 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes [%0], [%1, {%2, %3, %4, %5, "
-         "%6}], [%7];// 1e."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__tensorCoords[4]),
-           "r"(__as_ptr_smem(__smem_bar))
-         : "memory");),
+         "%6}], [%7];// 1e." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__tensorCoords[4]),
+         "r"(__as_ptr_smem(__smem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
@@ -404,16 +387,14 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4, %5}], [%6]; // 3e."
-         :
-         : "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__tensorCoords[4]),
-           "r"(__as_ptr_smem(__srcMem))
-         : "memory");),
+    (asm("cp.async.bulk.tensor.5d.global.shared::cta.tile.bulk_group [%0, {%1, %2, %3, %4, %5}], [%6]; // 3e." : : "l"(
+           __tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__tensorCoords[4]),
+         "r"(__as_ptr_smem(__srcMem)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h
index 915979d18f3..56c199d39ff 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_tensor_multicast.h
@@ -35,14 +35,11 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.1d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2}], [%3], %4; // 2a."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2}], [%3], %4; // 2a." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -81,15 +78,12 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.2d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3}], [%4], %5; // 2b."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3}], [%4], %5; // 2b." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -128,16 +122,13 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.3d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3, %4}], [%5], %6; // 2c."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3, %4}], [%5], %6; // 2c." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -176,17 +167,14 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.4d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3, %4, %5}], [%6], %7; // 2d."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3, %4, %5}], [%6], %7; // 2d." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
@@ -225,18 +213,15 @@ _CCCL_DEVICE static inline void cp_async_bulk_tensor(
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
     (asm("cp.async.bulk.tensor.5d.shared::cluster.global.tile.mbarrier::complete_tx::bytes.multicast::cluster [%0], "
-         "[%1, {%2, %3, %4, %5, %6}], [%7], %8; // 2e."
-         :
-         : "r"(__as_ptr_smem(__dstMem)),
-           "l"(__tensorMap),
-           "r"(__tensorCoords[0]),
-           "r"(__tensorCoords[1]),
-           "r"(__tensorCoords[2]),
-           "r"(__tensorCoords[3]),
-           "r"(__tensorCoords[4]),
-           "r"(__as_ptr_smem(__smem_bar)),
-           "h"(__ctaMask)
-         : "memory");),
+         "[%1, {%2, %3, %4, %5, %6}], [%7], %8; // 2e." : : "r"(__as_ptr_smem(__dstMem)),
+         "l"(__tensorMap),
+         "r"(__tensorCoords[0]),
+         "r"(__tensorCoords[1]),
+         "r"(__tensorCoords[2]),
+         "r"(__tensorCoords[3]),
+         "r"(__tensorCoords[4]),
+         "r"(__as_ptr_smem(__smem_bar)),
+         "h"(__ctaMask) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_tensor_is_not_supported_before_SM_90a__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h
index 2057323665a..85b1507f721 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_async_bulk_wait_group.h
@@ -16,10 +16,7 @@ _CCCL_DEVICE static inline void cp_async_bulk_wait_group(n32_t<_N32> __N)
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("cp.async.bulk.wait_group %0;"
-                  :
-                  : "n"(__N.value)
-                  : "memory");),
+    (asm volatile("cp.async.bulk.wait_group %0;" : : "n"(__N.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_wait_group_is_not_supported_before_SM_90__();));
@@ -39,10 +36,7 @@ _CCCL_DEVICE static inline void cp_async_bulk_wait_group_read(n32_t<_N32> __N)
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("cp.async.bulk.wait_group.read %0;"
-                  :
-                  : "n"(__N.value)
-                  : "memory");),
+    (asm volatile("cp.async.bulk.wait_group.read %0;" : : "n"(__N.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_async_bulk_wait_group_read_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h
index a35684c85e1..9b1bf35b290 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk.h
@@ -39,13 +39,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.and.b32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -88,13 +85,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.or.b32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -137,13 +131,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.xor.b32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -186,13 +177,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.min.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -235,13 +223,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.max.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -284,13 +269,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -333,13 +315,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.inc.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -382,13 +361,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.dec.u32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -431,13 +407,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.min.s32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -480,13 +453,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.max.s32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -529,13 +499,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.s32 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -578,13 +545,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.u64 [%0], [%1], %2, [%3]; "
-         "// 1."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 1." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -627,13 +591,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("cp.reduce.async.bulk.shared::cluster.shared::cta.mbarrier::complete_tx::bytes.add.u64 [%0], [%1], %2, [%3]; "
-         "// 2."
-         :
-         : "r"(__as_ptr_remote_dsmem(__dstMem)),
-           "r"(__as_ptr_smem(__srcMem)),
-           "r"(__size),
-           "r"(__as_ptr_remote_dsmem(__rdsmem_bar))
-         : "memory");),
+         "// 2." : : "r"(__as_ptr_remote_dsmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size),
+         "r"(__as_ptr_remote_dsmem(__rdsmem_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -807,10 +768,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -849,10 +810,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -891,10 +852,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -933,10 +894,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_inc (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.inc.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.inc.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -975,10 +936,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_dec (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.dec.u32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.dec.u32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1017,10 +978,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1059,10 +1020,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1101,10 +1062,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.s32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.s32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1143,10 +1104,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.u64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1185,10 +1146,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.u64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1227,10 +1188,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1269,10 +1230,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.s64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1311,10 +1272,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.s64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1348,10 +1309,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f32  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f32  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1385,10 +1346,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f64  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.f64  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -1427,10 +1388,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 6."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.u64  [%0], [%1], %2; // 6." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h
index 1e13bb5f4f2..da5cdb6bc9b 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_bf16.h
@@ -35,10 +35,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.bf16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.bf16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -77,10 +77,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.bf16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.bf16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -119,10 +119,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.bf16  [%0], [%1], %2; // 5."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.bf16  [%0], [%1], %2; // 5." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h
index 0c4678c95bb..3d9d4520dcb 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/cp_reduce_async_bulk_f16.h
@@ -30,10 +30,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.f16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.min.f16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -67,10 +67,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.f16  [%0], [%1], %2; // 4."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.max.f16  [%0], [%1], %2; // 4." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
@@ -104,10 +104,10 @@ _CCCL_DEVICE static inline void cp_reduce_async_bulk(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.f16  [%0], [%1], %2; // 5."
-         :
-         : "l"(__as_ptr_gmem(__dstMem)), "r"(__as_ptr_smem(__srcMem)), "r"(__size)
-         : "memory");),
+    (asm("cp.reduce.async.bulk.global.shared::cta.bulk_group.add.noftz.f16  [%0], [%1], %2; // 5." : : "l"(
+           __as_ptr_gmem(__dstMem)),
+         "r"(__as_ptr_smem(__srcMem)),
+         "r"(__size) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_cp_reduce_async_bulk_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h
index e185913b3cd..f8c4e6cf476 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_mbarrier_init.h
@@ -21,10 +21,7 @@ _CCCL_DEVICE static inline void fence_mbarrier_init(sem_release_t, scope_cluster
   // __scope == scope_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("fence.mbarrier_init.release.cluster; // 3."
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("fence.mbarrier_init.release.cluster; // 3." : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_fence_mbarrier_init_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h
index 40229b84a96..cc413a0f511 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_alias.h
@@ -15,10 +15,7 @@ _CCCL_DEVICE static inline void fence_proxy_alias()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_70,
-    (asm volatile("fence.proxy.alias; // 4."
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("fence.proxy.alias; // 4." : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_fence_proxy_alias_is_not_supported_before_SM_70__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h
index f64b5faee5e..176d24ff73f 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/fence_proxy_async.h
@@ -15,10 +15,7 @@ _CCCL_DEVICE static inline void fence_proxy_async()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm volatile("fence.proxy.async; // 5."
-                  :
-                  :
-                  : "memory");),
+    (asm volatile("fence.proxy.async; // 5." : : : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_fence_proxy_async_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h b/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h
index 08128cc00a1..da802adb9db 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/get_sreg.h
@@ -135,11 +135,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nwarpid()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%nwarpid;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%nwarpid;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -264,11 +260,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nsmid()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%nsmid;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%nsmid;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -307,10 +299,7 @@ _CCCL_DEVICE static inline bool get_sreg_is_explicit_cluster()
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mov.pred P_OUT, %%is_explicit_cluster;\n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__sreg_value)
-         :
-         :);
+         "}" : "=r"(__sreg_value) : :);
      return static_cast<bool>(__sreg_value);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -330,11 +319,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clusterid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%clusterid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%clusterid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -354,11 +339,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clusterid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%clusterid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%clusterid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -378,11 +359,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clusterid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%clusterid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%clusterid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -402,11 +379,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nclusterid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%nclusterid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%nclusterid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -426,11 +399,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nclusterid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%nclusterid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%nclusterid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -450,11 +419,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_nclusterid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%nclusterid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%nclusterid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -474,11 +439,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctaid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctaid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctaid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -498,11 +459,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctaid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctaid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctaid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -522,11 +479,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctaid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctaid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctaid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -546,11 +499,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctaid_x()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctaid.x;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctaid.x;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -570,11 +519,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctaid_y()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctaid.y;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctaid.y;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -594,11 +539,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctaid_z()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctaid.z;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctaid.z;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -618,11 +559,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_ctarank()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_ctarank;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_ctarank;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -642,11 +579,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_cluster_nctarank()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%cluster_nctarank;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%cluster_nctarank;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -666,11 +599,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_eq()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_eq;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_eq;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -690,11 +619,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_le()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_le;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_le;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -714,11 +639,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_lt()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_lt;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_lt;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -738,11 +659,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_ge()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_ge;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_ge;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -762,11 +679,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_lanemask_gt()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%lanemask_gt;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%lanemask_gt;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -801,11 +714,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_clock_hi()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%clock_hi;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%clock_hi;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -825,11 +734,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t get_sreg_clock64()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile(
-       "mov.u64 %0, %%clock64;"
-       : "=l"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile("mov.u64 %0, %%clock64;" : "=l"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -849,11 +754,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t get_sreg_globaltimer()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile(
-       "mov.u64 %0, %%globaltimer;"
-       : "=l"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint64_t __sreg_value; asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -873,11 +774,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_globaltimer_lo()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%globaltimer_lo;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%globaltimer_lo;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -897,11 +794,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_globaltimer_hi()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile(
-       "mov.u32 %0, %%globaltimer_hi;"
-       : "=r"(__sreg_value)
-       :
-       :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm volatile("mov.u32 %0, %%globaltimer_hi;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -921,11 +814,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_total_smem_size()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%total_smem_size;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%total_smem_size;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -945,11 +834,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_aggr_smem_size()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%aggr_smem_size;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%aggr_smem_size;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -969,11 +854,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t get_sreg_dynamic_smem_size()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_35,
-    (_CUDA_VSTD::uint32_t __sreg_value;
-     asm("mov.u32 %0, %%dynamic_smem_size;"
-         : "=r"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint32_t __sreg_value; asm("mov.u32 %0, %%dynamic_smem_size;" : "=r"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -993,11 +874,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t get_sreg_current_graph_exec()
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_50,
-    (_CUDA_VSTD::uint64_t __sreg_value;
-     asm("mov.u64 %0, %%current_graph_exec;"
-         : "=l"(__sreg_value)
-         :
-         :);
+    (_CUDA_VSTD::uint64_t __sreg_value; asm("mov.u64 %0, %%current_graph_exec;" : "=l"(__sreg_value) : :);
      return __sreg_value;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h b/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h
index a769868f45c..22bb73180dc 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/getctarank.h
@@ -20,10 +20,7 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint32_t getctarank(space_cluster_t, cons
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (_CUDA_VSTD::uint32_t __dest;
-     asm("getctarank.shared::cluster.u32 %0, %1;"
-         : "=r"(__dest)
-         : "r"(__as_ptr_smem(__addr))
-         :);
+     asm("getctarank.shared::cluster.u32 %0, %1;" : "=r"(__dest) : "r"(__as_ptr_smem(__addr)) :);
      return __dest;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h
index e1afe25d8c2..c7102ebfdb5 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive.h
@@ -16,11 +16,8 @@ _CCCL_DEVICE static inline _CUDA_VSTD::uint64_t mbarrier_arrive(_CUDA_VSTD::uint
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (_CUDA_VSTD::uint64_t __state;
-     asm("mbarrier.arrive.shared.b64                                  %0,  [%1];           // 1. "
-         : "=l"(__state)
-         : "r"(__as_ptr_smem(__addr))
-         : "memory");
+    (_CUDA_VSTD::uint64_t __state; asm("mbarrier.arrive.shared.b64                                  %0,  [%1];         "
+                                       "  // 1. " : "=l"(__state) : "r"(__as_ptr_smem(__addr)) : "memory");
      return __state;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -44,10 +41,9 @@ mbarrier_arrive(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint32_t& __coun
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (_CUDA_VSTD::uint64_t __state;
-     asm("mbarrier.arrive.shared::cta.b64                             %0,  [%1], %2;    // 2. "
-         : "=l"(__state)
-         : "r"(__as_ptr_smem(__addr)), "r"(__count)
-         : "memory");
+     asm("mbarrier.arrive.shared::cta.b64                             %0,  [%1], %2;    "
+         "// 2. " : "=l"(__state) : "r"(__as_ptr_smem(__addr)),
+         "r"(__count) : "memory");
      return __state;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -165,10 +161,8 @@ mbarrier_arrive(sem_release_t, scope_cluster_t, space_cluster_t, _CUDA_VSTD::uin
   // __space == space_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0];                // 4a. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr))
-         : "memory");),
+    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0];               "
+         " // 4a. " : : "r"(__as_ptr_remote_dsmem(__addr)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_arrive_is_not_supported_before_SM_90__();));
@@ -199,10 +193,9 @@ _CCCL_DEVICE static inline void mbarrier_arrive(
   // __space == space_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0], %1;         // 4b. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr)), "r"(__count)
-         : "memory");),
+    (asm("mbarrier.arrive.release.cluster.shared::cluster.b64                   _, [%0], %1;         "
+         "// 4b. " : : "r"(__as_ptr_remote_dsmem(__addr)),
+         "r"(__count) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_arrive_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h
index 79301a57851..dc33b212e21 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_expect_tx.h
@@ -73,10 +73,9 @@ _CCCL_DEVICE static inline void mbarrier_arrive_expect_tx(
   // __space == space_cluster (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64   _, [%0], %1; // 9. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr)), "r"(__tx_count)
-         : "memory");),
+    (asm("mbarrier.arrive.expect_tx.release.cluster.shared::cluster.b64   _, [%0], %1; // 9. " : : "r"(
+           __as_ptr_remote_dsmem(__addr)),
+         "r"(__tx_count) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_arrive_expect_tx_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h
index cbfb275baa4..45c444c5364 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_arrive_no_complete.h
@@ -19,10 +19,9 @@ mbarrier_arrive_no_complete(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
     (_CUDA_VSTD::uint64_t __state;
-     asm("mbarrier.arrive.noComplete.shared.b64                       %0,  [%1], %2;    // 5. "
-         : "=l"(__state)
-         : "r"(__as_ptr_smem(__addr)), "r"(__count)
-         : "memory");
+     asm("mbarrier.arrive.noComplete.shared.b64                       %0,  [%1], %2;    "
+         "// 5. " : "=l"(__state) : "r"(__as_ptr_smem(__addr)),
+         "r"(__count) : "memory");
      return __state;),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h
index d1e5c57c97e..6b3041de0d2 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_init.h
@@ -17,10 +17,7 @@ _CCCL_DEVICE static inline void mbarrier_init(_CUDA_VSTD::uint64_t* __addr, cons
 {
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_80,
-    (asm("mbarrier.init.shared.b64 [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__addr)), "r"(__count)
-         : "memory");),
+    (asm("mbarrier.init.shared.b64 [%0], %1;" : : "r"(__as_ptr_smem(__addr)), "r"(__count) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_mbarrier_init_is_not_supported_before_SM_80__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h
index f3dbb6ed1c3..9adc677c76d 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait.h
@@ -21,10 +21,8 @@ _CCCL_DEVICE static inline bool mbarrier_test_wait(_CUDA_VSTD::uint64_t* __addr,
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.test_wait.shared.b64 P_OUT, [%1], %2;                                                  // 1. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "l"(__state)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "l"(__state) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h
index b975434b2de..1166b336d2d 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_test_wait_parity.h
@@ -22,10 +22,8 @@ mbarrier_test_wait_parity(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint32
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.test_wait.parity.shared.b64 P_OUT, [%1], %2;                                     // 3. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "r"(__phaseParity)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "r"(__phaseParity) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h
index dd50a2c9f41..52fa5a4928a 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait.h
@@ -21,10 +21,8 @@ _CCCL_DEVICE static inline bool mbarrier_try_wait(_CUDA_VSTD::uint64_t* __addr,
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.shared::cta.b64         P_OUT, [%1], %2;                                      // 5a. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "l"(__state)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "l"(__state) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -52,10 +50,9 @@ _CCCL_DEVICE static inline bool mbarrier_try_wait(
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.shared::cta.b64         P_OUT, [%1], %2, %3;                    // 5b. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "l"(__state), "r"(__suspendTimeHint)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "l"(__state),
+         "r"(__suspendTimeHint) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h
index d3deb3ca1d5..aa15e255352 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/mbarrier_try_wait_parity.h
@@ -22,10 +22,8 @@ mbarrier_try_wait_parity(_CUDA_VSTD::uint64_t* __addr, const _CUDA_VSTD::uint32_
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.parity.shared::cta.b64  P_OUT, [%1], %2;                                // 7a. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "r"(__phaseParity)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "r"(__phaseParity) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
@@ -53,10 +51,9 @@ _CCCL_DEVICE static inline bool mbarrier_try_wait_parity(
      asm("{\n\t .reg .pred P_OUT; \n\t"
          "mbarrier.try_wait.parity.shared::cta.b64  P_OUT, [%1], %2, %3;               // 7b. \n\t"
          "selp.b32 %0, 1, 0, P_OUT; \n"
-         "}"
-         : "=r"(__waitComplete)
-         : "r"(__as_ptr_smem(__addr)), "r"(__phaseParity), "r"(__suspendTimeHint)
-         : "memory");
+         "}" : "=r"(__waitComplete) : "r"(__as_ptr_smem(__addr)),
+         "r"(__phaseParity),
+         "r"(__suspendTimeHint) : "memory");
      return static_cast<bool>(__waitComplete);),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h b/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h
index d88392f3635..74110933270 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/red_async.h
@@ -25,10 +25,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_inc (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.inc.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.inc.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -57,10 +57,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_dec (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.dec.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.dec.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -89,10 +89,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -121,10 +121,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -153,10 +153,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -185,10 +185,10 @@ red_async(op_min_t, _CUDA_VSTD::int32_t* __dest, const _CUDA_VSTD::int32_t& __va
   // __op == op_min (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.s32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.min.s32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -217,10 +217,10 @@ red_async(op_max_t, _CUDA_VSTD::int32_t* __dest, const _CUDA_VSTD::int32_t& __va
   // __op == op_max (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.s32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.max.s32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -249,10 +249,10 @@ red_async(op_add_t, _CUDA_VSTD::int32_t* __dest, const _CUDA_VSTD::int32_t& __va
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.s32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.s32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -282,10 +282,10 @@ red_async(op_and_op_t, _B32* __dest, const _B32& __value, _CUDA_VSTD::uint64_t*
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.and.b32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__as_b32(__value)), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.and.b32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__as_b32(__value)),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -315,10 +315,10 @@ red_async(op_or_op_t, _B32* __dest, const _B32& __value, _CUDA_VSTD::uint64_t* _
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.or.b32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__as_b32(__value)), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.or.b32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__as_b32(__value)),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -348,10 +348,10 @@ red_async(op_xor_op_t, _B32* __dest, const _B32& __value, _CUDA_VSTD::uint64_t*
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.xor.b32  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "r"(__as_b32(__value)), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.xor.b32  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "r"(__as_b32(__value)),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -380,10 +380,10 @@ _CCCL_DEVICE static inline void red_async(
   // __op == op_add (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u64  [%0], %1, [%2]; "
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "l"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u64  [%0], %1, [%2]; " : : "r"(
+           __as_ptr_remote_dsmem(__dest)),
+         "l"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
@@ -411,10 +411,9 @@ red_async(op_add_t, _CUDA_VSTD::int64_t* __dest, const _CUDA_VSTD::int64_t& __va
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
     (asm("red.async.relaxed.cluster.shared::cluster.mbarrier::complete_tx::bytes.add.u64  [%0], %1, [%2]; // .u64 "
-         "intentional"
-         :
-         : "r"(__as_ptr_remote_dsmem(__dest)), "l"(__value), "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+         "intentional" : : "r"(__as_ptr_remote_dsmem(__dest)),
+         "l"(__value),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_red_async_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h b/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h
index 18fd2c03a41..e6c3fcf1737 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/st_async.h
@@ -97,15 +97,13 @@ _CCCL_DEVICE static inline void st_async(_B32* __addr, const _B32 (&__value)[4],
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_PROVIDES_SM_90,
-    (asm("st.async.weak.shared::cluster.mbarrier::complete_tx::bytes.v4.b32 [%0], {%1, %2, %3, %4}, [%5];    // 3. "
-         :
-         : "r"(__as_ptr_remote_dsmem(__addr)),
-           "r"(__as_b32(__value[0])),
-           "r"(__as_b32(__value[1])),
-           "r"(__as_b32(__value[2])),
-           "r"(__as_b32(__value[3])),
-           "r"(__as_ptr_remote_dsmem(__remote_bar))
-         : "memory");),
+    (asm("st.async.weak.shared::cluster.mbarrier::complete_tx::bytes.v4.b32 [%0], {%1, %2, %3, %4}, [%5];    // "
+         "3. " : : "r"(__as_ptr_remote_dsmem(__addr)),
+         "r"(__as_b32(__value[0])),
+         "r"(__as_b32(__value[1])),
+         "r"(__as_b32(__value[2])),
+         "r"(__as_b32(__value[3])),
+         "r"(__as_ptr_remote_dsmem(__remote_bar)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_st_async_is_not_supported_before_SM_90__();));
diff --git a/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h b/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h
index 3889026750d..598b56f90b0 100644
--- a/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h
+++ b/libcudacxx/include/cuda/__ptx/instructions/generated/tensormap_replace.h
@@ -21,10 +21,8 @@ _CCCL_DEVICE static inline void tensormap_replace_global_address(space_global_t,
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_address.global.b1024.b64    [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_address.global.b1024.b64    [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_address_is_not_supported_before_SM_90a__();));
@@ -49,10 +47,8 @@ _CCCL_DEVICE static inline void tensormap_replace_global_address(space_shared_t,
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_address.shared::cta.b1024.b64    [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_address.shared::cta.b1024.b64    [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_address_is_not_supported_before_SM_90a__();));
@@ -77,10 +73,8 @@ _CCCL_DEVICE static inline void tensormap_replace_rank(space_global_t, void* __t
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.rank.global.b1024.b32              [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.rank.global.b1024.b32              [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_rank_is_not_supported_before_SM_90a__();));
@@ -105,10 +99,8 @@ _CCCL_DEVICE static inline void tensormap_replace_rank(space_shared_t, void* __t
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.rank.shared::cta.b1024.b32              [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.rank.shared::cta.b1024.b32              [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_rank_is_not_supported_before_SM_90a__();));
@@ -135,10 +127,9 @@ tensormap_replace_box_dim(space_global_t, void* __tm_addr, n32_t<_N32> __ord, _B
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.box_dim.global.b1024.b32           [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.box_dim.global.b1024.b32           [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_box_dim_is_not_supported_before_SM_90a__();));
@@ -165,10 +156,10 @@ tensormap_replace_box_dim(space_shared_t, void* __tm_addr, n32_t<_N32> __ord, _B
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.box_dim.shared::cta.b1024.b32           [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.box_dim.shared::cta.b1024.b32           [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_box_dim_is_not_supported_before_SM_90a__();));
@@ -195,10 +186,9 @@ tensormap_replace_global_dim(space_global_t, void* __tm_addr, n32_t<_N32> __ord,
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_dim.global.b1024.b32        [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_dim.global.b1024.b32        [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_dim_is_not_supported_before_SM_90a__();));
@@ -225,10 +215,10 @@ tensormap_replace_global_dim(space_shared_t, void* __tm_addr, n32_t<_N32> __ord,
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_dim.shared::cta.b1024.b32        [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.global_dim.shared::cta.b1024.b32        [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_dim_is_not_supported_before_SM_90a__();));
@@ -255,10 +245,9 @@ tensormap_replace_global_stride(space_global_t, void* __tm_addr, n32_t<_N32> __o
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_stride.global.b1024.b64     [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.global_stride.global.b1024.b64     [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_stride_is_not_supported_before_SM_90a__();));
@@ -285,10 +274,10 @@ tensormap_replace_global_stride(space_shared_t, void* __tm_addr, n32_t<_N32> __o
   static_assert(sizeof(_B64) == 8, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.global_stride.shared::cta.b1024.b64     [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "l"(__as_b64(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.global_stride.shared::cta.b1024.b64     [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "l"(__as_b64(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_global_stride_is_not_supported_before_SM_90a__();));
@@ -315,10 +304,9 @@ tensormap_replace_element_size(space_global_t, void* __tm_addr, n32_t<_N32> __or
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.element_stride.global.b1024.b32    [%0], %1, %2;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm("tensormap.replace.tile.element_stride.global.b1024.b32    [%0], %1, %2;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__ord.value),
+         "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_element_size_is_not_supported_before_SM_90a__();));
@@ -345,10 +333,10 @@ tensormap_replace_element_size(space_shared_t, void* __tm_addr, n32_t<_N32> __or
   static_assert(sizeof(_B32) == 4, "");
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.element_stride.shared::cta.b1024.b32    [%0], %1, %2;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__ord.value), "r"(__as_b32(__new_val))
-         : "memory");),
+    (asm(
+       "tensormap.replace.tile.element_stride.shared::cta.b1024.b32    [%0], %1, %2;" : : "r"(__as_ptr_smem(__tm_addr)),
+       "n"(__ord.value),
+       "r"(__as_b32(__new_val)) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_element_size_is_not_supported_before_SM_90a__();));
@@ -372,10 +360,8 @@ _CCCL_DEVICE static inline void tensormap_replace_elemtype(space_global_t, void*
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.elemtype.global.b1024.b32          [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.elemtype.global.b1024.b32          [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_elemtype_is_not_supported_before_SM_90a__();));
@@ -399,10 +385,8 @@ _CCCL_DEVICE static inline void tensormap_replace_elemtype(space_shared_t, void*
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.elemtype.shared::cta.b1024.b32          [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.elemtype.shared::cta.b1024.b32          [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_elemtype_is_not_supported_before_SM_90a__();));
@@ -427,10 +411,8 @@ tensormap_replace_interleave_layout(space_global_t, void* __tm_addr, n32_t<_N32>
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.interleave_layout.global.b1024.b32 [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.interleave_layout.global.b1024.b32 [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_interleave_layout_is_not_supported_before_SM_90a__();));
@@ -455,10 +437,8 @@ tensormap_replace_interleave_layout(space_shared_t, void* __tm_addr, n32_t<_N32>
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.interleave_layout.shared::cta.b1024.b32 [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.interleave_layout.shared::cta.b1024.b32 [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_interleave_layout_is_not_supported_before_SM_90a__();));
@@ -482,10 +462,8 @@ _CCCL_DEVICE static inline void tensormap_replace_swizzle_mode(space_global_t, v
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.swizzle_mode.global.b1024.b32      [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.swizzle_mode.global.b1024.b32      [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_swizzle_mode_is_not_supported_before_SM_90a__();));
@@ -509,10 +487,8 @@ _CCCL_DEVICE static inline void tensormap_replace_swizzle_mode(space_shared_t, v
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.swizzle_mode.shared::cta.b1024.b32      [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.swizzle_mode.shared::cta.b1024.b32      [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_swizzle_mode_is_not_supported_before_SM_90a__();));
@@ -536,10 +512,8 @@ _CCCL_DEVICE static inline void tensormap_replace_fill_mode(space_global_t, void
   // __space == space_global (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.fill_mode.global.b1024.b32         [%0], %1;"
-         :
-         : "l"(__as_ptr_gmem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.fill_mode.global.b1024.b32         [%0], %1;" : : "l"(__as_ptr_gmem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_fill_mode_is_not_supported_before_SM_90a__();));
@@ -563,10 +537,8 @@ _CCCL_DEVICE static inline void tensormap_replace_fill_mode(space_shared_t, void
   // __space == space_shared (due to parameter type constraint)
   NV_IF_ELSE_TARGET(
     NV_HAS_FEATURE_SM_90a,
-    (asm("tensormap.replace.tile.fill_mode.shared::cta.b1024.b32         [%0], %1;"
-         :
-         : "r"(__as_ptr_smem(__tm_addr)), "n"(__new_val.value)
-         : "memory");),
+    (asm("tensormap.replace.tile.fill_mode.shared::cta.b1024.b32         [%0], %1;" : : "r"(__as_ptr_smem(__tm_addr)),
+         "n"(__new_val.value) : "memory");),
     (
       // Unsupported architectures will have a linker error with a semi-decent error message
       __cuda_ptx_tensormap_replace_fill_mode_is_not_supported_before_SM_90a__();));
diff --git a/libcudacxx/include/cuda/pipeline b/libcudacxx/include/cuda/pipeline
index d034c931644..7946e8bdc91 100644
--- a/libcudacxx/include/cuda/pipeline
+++ b/libcudacxx/include/cuda/pipeline
@@ -199,12 +199,9 @@ struct __pipeline_asm_helper
 {
   _CCCL_DEVICE static inline uint32_t __lane_id()
   {
-    NV_IF_ELSE_TARGET(
-      NV_IS_DEVICE,
-      (uint32_t __lane_id; asm volatile("mov.u32 %0, %%laneid;"
-                                        : "=r"(__lane_id));
-       return __lane_id;),
-      (return 0;))
+    NV_IF_ELSE_TARGET(NV_IS_DEVICE,
+                      (uint32_t __lane_id; asm volatile("mov.u32 %0, %%laneid;" : "=r"(__lane_id)); return __lane_id;),
+                      (return 0;))
   }
 };
 
@@ -546,9 +543,7 @@ _CCCL_DEVICE void __pipeline_consumer_wait(pipeline<thread_scope_thread>& __pipe
   (void) __pipeline;
   NV_IF_TARGET(NV_PROVIDES_SM_80, constexpr uint8_t __max_prior = 8;
 
-               asm volatile("cp.async.wait_group %0;"
-                            :
-                            : "n"(_Prior < __max_prior ? _Prior : __max_prior));)
+               asm volatile("cp.async.wait_group %0;" : : "n"(_Prior < __max_prior ? _Prior : __max_prior));)
 }
 
 _CCCL_DEVICE inline void __pipeline_consumer_wait(pipeline<thread_scope_thread>& __pipeline, uint8_t __prior)
diff --git a/libcudacxx/include/cuda/std/__atomic/types/base.h b/libcudacxx/include/cuda/std/__atomic/types/base.h
index bbd9086dce5..18eb3713bfe 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/base.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/base.h
@@ -98,8 +98,8 @@ _CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memo
 }
 
 template <typename _Sto, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -109,8 +109,8 @@ __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __at
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -160,8 +160,8 @@ _CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_weak_dispatch(
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -171,8 +171,8 @@ __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -182,8 +182,8 @@ __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -193,8 +193,8 @@ __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -204,8 +204,8 @@ __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
@@ -215,8 +215,8 @@ __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_IF_TARGET(
     NV_IS_DEVICE,
@@ -225,8 +225,8 @@ __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_base<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   NV_IF_TARGET(
     NV_IS_DEVICE,
diff --git a/libcudacxx/include/cuda/std/__atomic/types/common.h b/libcudacxx/include/cuda/std/__atomic/types/common.h
index 13d326bfe79..6706ad5181b 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/common.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/common.h
@@ -78,9 +78,7 @@ _CCCL_HOST_DEVICE inline int __atomic_memcmp(void const* __lhs, void const* __rh
     NV_IS_DEVICE,
     (unsigned char const* __lhs_c; unsigned char const* __rhs_c;
      // NVCC recommended laundering through inline asm to compare padding bytes.
-     asm("mov.b64 %0, %2;\n mov.b64 %1, %3;"
-         : "=l"(__lhs_c), "=l"(__rhs_c)
-         : "l"(__lhs), "l"(__rhs));
+     asm("mov.b64 %0, %2;\n mov.b64 %1, %3;" : "=l"(__lhs_c), "=l"(__rhs_c) : "l"(__lhs), "l"(__rhs));
      while (__count--) {
        auto const __lhs_v = *__lhs_c++;
        auto const __rhs_v = *__rhs_c++;
diff --git a/libcudacxx/include/cuda/std/__atomic/types/locked.h b/libcudacxx/include/cuda/std/__atomic/types/locked.h
index 5538abcce68..c462c5d16a6 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/locked.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/locked.h
@@ -85,8 +85,8 @@ _CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memo
 }
 
 template <typename _Sto, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -97,8 +97,8 @@ __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __at
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -152,8 +152,8 @@ __atomic_compare_exchange_weak_dispatch(_Sto* __a, _Up* __expected, _Up __value,
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -165,8 +165,8 @@ __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) ->
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -178,8 +178,8 @@ __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order, _Sco = {}) ->
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -191,8 +191,8 @@ __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
@@ -204,8 +204,8 @@ __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) ->
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_locked<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   _Tp __old;
diff --git a/libcudacxx/include/cuda/std/__atomic/types/small.h b/libcudacxx/include/cuda/std/__atomic/types/small.h
index 4f24753ca60..a4e969f0936 100644
--- a/libcudacxx/include/cuda/std/__atomic/types/small.h
+++ b/libcudacxx/include/cuda/std/__atomic/types/small.h
@@ -95,16 +95,16 @@ _CCCL_HOST_DEVICE inline void __atomic_store_dispatch(_Sto* __a, _Up __val, memo
 }
 
 template <typename _Sto, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_load_dispatch(const _Sto* __a, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(__atomic_load_dispatch(&__a->__a_value, __order, _Sco{}));
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_exchange_dispatch(_Sto* __a, _Up __value, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -155,8 +155,8 @@ _CCCL_HOST_DEVICE inline bool __atomic_compare_exchange_strong_dispatch(
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -164,8 +164,8 @@ __atomic_fetch_add_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -173,8 +173,8 @@ __atomic_fetch_sub_dispatch(_Sto* __a, _Up __delta, memory_order __order, _Sco =
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -182,8 +182,8 @@ __atomic_fetch_and_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -191,8 +191,8 @@ __atomic_fetch_or_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -200,8 +200,8 @@ __atomic_fetch_xor_dispatch(_Sto* __a, _Up __pattern, memory_order __order, _Sco
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
@@ -209,8 +209,8 @@ __atomic_fetch_max_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {
 }
 
 template <typename _Sto, typename _Up, typename _Sco, __atomic_storage_is_small<_Sto> = 0>
-_CCCL_HOST_DEVICE inline auto
-__atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {}) -> __atomic_underlying_t<_Sto>
+_CCCL_HOST_DEVICE inline auto __atomic_fetch_min_dispatch(_Sto* __a, _Up __val, memory_order __order, _Sco = {})
+  -> __atomic_underlying_t<_Sto>
 {
   using _Tp = __atomic_underlying_t<_Sto>;
   return __atomic_small_from_32<_Tp>(
diff --git a/libcudacxx/include/cuda/std/__concepts/concept_macros.h b/libcudacxx/include/cuda/std/__concepts/concept_macros.h
index 4cbf3e5e9b3..3acd8ae6841 100644
--- a/libcudacxx/include/cuda/std/__concepts/concept_macros.h
+++ b/libcudacxx/include/cuda/std/__concepts/concept_macros.h
@@ -158,7 +158,7 @@ namespace __cccl_unqualified_cuda_std = _CUDA_VSTD; // NOLINT(misc-unused-alias-
       {                                               \
         _CCCL_PP_CAT4(_CCCL_PP_EAT_SAME_AS_, _REQ)    \
       } -> _CCCL_CONCEPT_VSTD::same_as<_CCCL_PP_EVAL( \
-          _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_AUX, _CCCL_PP_CAT4(_CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_, _REQ))>
+        _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_AUX, _CCCL_PP_CAT4(_CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_, _REQ))>
 #    define _CCCL_PP_EAT_SAME_AS__Same_as(...)
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS_AUX(_TYPE, ...) _CCCL_PP_EXPAND _TYPE
 #    define _CCCL_CONCEPT_FRAGMENT_REQS_SAME_AS__Same_as(...)   (__VA_ARGS__),
@@ -170,8 +170,7 @@ namespace __cccl_unqualified_cuda_std = _CUDA_VSTD; // NOLINT(misc-unused-alias-
 #    define _CCCL_CONCEPT _CCCL_INLINE_VAR constexpr bool
 
 #    define _CCCL_CONCEPT_FRAGMENT(_NAME, ...)                                                                         \
-      _LIBCUDACXX_HIDE_FROM_ABI auto _NAME##_CCCL_CONCEPT_FRAGMENT_impl_ _CCCL_CONCEPT_FRAGMENT_REQS_##__VA_ARGS__ > { \
-      }                                                                                                                \
+      _LIBCUDACXX_HIDE_FROM_ABI auto _NAME##_CCCL_CONCEPT_FRAGMENT_impl_ _CCCL_CONCEPT_FRAGMENT_REQS_##__VA_ARGS__> {} \
       template <class... _As>                                                                                          \
       _LIBCUDACXX_HIDE_FROM_ABI char _NAME##_CCCL_CONCEPT_FRAGMENT_(                                                   \
         ::__cccl_tag<_As...>*, decltype(&_NAME##_CCCL_CONCEPT_FRAGMENT_impl_<_As...>));                                \
diff --git a/libcudacxx/include/cuda/std/__cstddef/types.h b/libcudacxx/include/cuda/std/__cstddef/types.h
index 215b60fbc55..730243c32ee 100644
--- a/libcudacxx/include/cuda/std/__cstddef/types.h
+++ b/libcudacxx/include/cuda/std/__cstddef/types.h
@@ -26,7 +26,7 @@
 #  include <cstddef>
 #else
 #  if !defined(offsetof)
-#    define offsetof(type, member) (::size_t)((char*) &(((type*) 0)->member) - (char*) 0)
+#    define offsetof(type, member) (::size_t) ((char*) &(((type*) 0)->member) - (char*) 0)
 #  endif // !offsetof
 #endif // !_CCCL_COMPILER(NVRTC)
 
diff --git a/libcudacxx/include/cuda/std/__cuda/chrono.h b/libcudacxx/include/cuda/std/__cuda/chrono.h
index 1b3110a556c..c40284a18e8 100644
--- a/libcudacxx/include/cuda/std/__cuda/chrono.h
+++ b/libcudacxx/include/cuda/std/__cuda/chrono.h
@@ -40,8 +40,7 @@ _LIBCUDACXX_HIDE_FROM_ABI system_clock::time_point system_clock::now() noexcept
 {
   NV_DISPATCH_TARGET(
     NV_IS_DEVICE,
-    (uint64_t __time; asm volatile("mov.u64 %0, %%globaltimer;"
-                                   : "=l"(__time)::);
+    (uint64_t __time; asm volatile("mov.u64 %0, %%globaltimer;" : "=l"(__time)::);
      return time_point(duration_cast<duration>(nanoseconds(__time)));),
     NV_IS_HOST,
     (return time_point(duration_cast<duration>(nanoseconds(
diff --git a/libcudacxx/include/cuda/std/__functional/function.h b/libcudacxx/include/cuda/std/__functional/function.h
index 6870b89a88f..e2ec912e6fb 100644
--- a/libcudacxx/include/cuda/std/__functional/function.h
+++ b/libcudacxx/include/cuda/std/__functional/function.h
@@ -99,7 +99,7 @@ _LIBCUDACXX_HIDE_FROM_ABI bool __not_null(_Fp* __ptr)
 }
 
 template <class _Ret, class _Class>
-_LIBCUDACXX_HIDE_FROM_ABI bool __not_null(_Ret _Class::*__ptr)
+_LIBCUDACXX_HIDE_FROM_ABI bool __not_null(_Ret _Class::* __ptr)
 {
   return __ptr;
 }
diff --git a/libcudacxx/include/cuda/std/__functional/mem_fn.h b/libcudacxx/include/cuda/std/__functional/mem_fn.h
index 20a55850ea5..8327b4edfef 100644
--- a/libcudacxx/include/cuda/std/__functional/mem_fn.h
+++ b/libcudacxx/include/cuda/std/__functional/mem_fn.h
@@ -53,7 +53,7 @@ class __mem_fn : public __weak_result_type<_Tp>
 };
 
 template <class _Rp, class _Tp>
-_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 __mem_fn<_Rp _Tp::*> mem_fn(_Rp _Tp::*__pm) noexcept
+_LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX20 __mem_fn<_Rp _Tp::*> mem_fn(_Rp _Tp::* __pm) noexcept
 {
   return __mem_fn<_Rp _Tp::*>(__pm);
 }
diff --git a/libcudacxx/include/cuda/std/__iterator/access.h b/libcudacxx/include/cuda/std/__iterator/access.h
index 1cb9eb1382b..1fba194a63c 100644
--- a/libcudacxx/include/cuda/std/__iterator/access.h
+++ b/libcudacxx/include/cuda/std/__iterator/access.h
@@ -36,15 +36,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.begin())) -> decltype(__c.begin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.begin()))
+    -> decltype(__c.begin())
   {
     return __c.begin();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.begin())) -> decltype(__c.begin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.begin()))
+    -> decltype(__c.begin())
   {
     return __c.begin();
   }
@@ -67,15 +67,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.end())) -> decltype(__c.end())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.end()))
+    -> decltype(__c.end())
   {
     return __c.end();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.end())) -> decltype(__c.end())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.end()))
+    -> decltype(__c.end())
   {
     return __c.end();
   }
diff --git a/libcudacxx/include/cuda/std/__iterator/data.h b/libcudacxx/include/cuda/std/__iterator/data.h
index f51d84888df..2177c136d61 100644
--- a/libcudacxx/include/cuda/std/__iterator/data.h
+++ b/libcudacxx/include/cuda/std/__iterator/data.h
@@ -30,16 +30,16 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 
 _CCCL_EXEC_CHECK_DISABLE
 template <class _Cont>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-data(_Cont& __c) noexcept(noexcept(__c.data())) -> decltype(__c.data())
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto data(_Cont& __c) noexcept(noexcept(__c.data()))
+  -> decltype(__c.data())
 {
   return __c.data();
 }
 
 _CCCL_EXEC_CHECK_DISABLE
 template <class _Cont>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-data(const _Cont& __c) noexcept(noexcept(__c.data())) -> decltype(__c.data())
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto data(const _Cont& __c) noexcept(noexcept(__c.data()))
+  -> decltype(__c.data())
 {
   return __c.data();
 }
diff --git a/libcudacxx/include/cuda/std/__iterator/empty.h b/libcudacxx/include/cuda/std/__iterator/empty.h
index 4dea0eb53e0..e9775db3616 100644
--- a/libcudacxx/include/cuda/std/__iterator/empty.h
+++ b/libcudacxx/include/cuda/std/__iterator/empty.h
@@ -29,8 +29,8 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 #if _CCCL_STD_VER > 2011
 
 template <class _Cont>
-_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-empty(const _Cont& __c) noexcept(noexcept(__c.empty())) -> decltype(__c.empty())
+_CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto empty(const _Cont& __c) noexcept(noexcept(__c.empty()))
+  -> decltype(__c.empty())
 {
   return __c.empty();
 }
diff --git a/libcudacxx/include/cuda/std/__iterator/iter_move.h b/libcudacxx/include/cuda/std/__iterator/iter_move.h
index 22a13ef33ae..b8556ed1108 100644
--- a/libcudacxx/include/cuda/std/__iterator/iter_move.h
+++ b/libcudacxx/include/cuda/std/__iterator/iter_move.h
@@ -101,8 +101,9 @@ struct __fn
 
   _CCCL_TEMPLATE(class _Ip)
   _CCCL_REQUIRES(__move_deref<_Ip>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Ip&& __i) const noexcept(noexcept(
-    _CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i)))) -> decltype(_CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Ip&& __i) const
+    noexcept(noexcept(_CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i))))
+      -> decltype(_CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i)))
   {
     return _CUDA_VSTD::move(*_CUDA_VSTD::forward<_Ip>(__i));
   }
diff --git a/libcudacxx/include/cuda/std/__iterator/iterator_traits.h b/libcudacxx/include/cuda/std/__iterator/iterator_traits.h
index 095880f7cce..27f9262e070 100644
--- a/libcudacxx/include/cuda/std/__iterator/iterator_traits.h
+++ b/libcudacxx/include/cuda/std/__iterator/iterator_traits.h
@@ -174,19 +174,20 @@ using _ITER_TRAITS = typename __iter_traits_cache<_Iter>::type;
 #  if defined(_GLIBCXX_DEBUG)
 _CCCL_TEMPLATE(class _Iter, class _Ty, class _Range)
 _CCCL_REQUIRES(_IsSame<_Iter, ::__gnu_debug::_Safe_iterator<_Ty*, _Range>>::value)
-_LIBCUDACXX_HIDE_FROM_ABI auto
-  __iter_concept_fn(::__gnu_debug::_Safe_iterator<_Ty*, _Range>, __priority_tag<3>) -> contiguous_iterator_tag;
+_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::__gnu_debug::_Safe_iterator<_Ty*, _Range>, __priority_tag<3>)
+  -> contiguous_iterator_tag;
 #  endif
 #  if defined(__GLIBCXX__)
 _CCCL_TEMPLATE(class _Iter, class _Ty, class _Range)
 _CCCL_REQUIRES(_IsSame<_Iter, ::__gnu_cxx::__normal_iterator<_Ty*, _Range>>::value)
-_LIBCUDACXX_HIDE_FROM_ABI auto
-  __iter_concept_fn(::__gnu_cxx::__normal_iterator<_Ty*, _Range>, __priority_tag<3>) -> contiguous_iterator_tag;
+_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::__gnu_cxx::__normal_iterator<_Ty*, _Range>, __priority_tag<3>)
+  -> contiguous_iterator_tag;
 #  endif // __GLIBCXX__
 #  if defined(_LIBCPP_VERSION)
 _CCCL_TEMPLATE(class _Iter, class _Ty)
 _CCCL_REQUIRES(_IsSame<_Iter, ::std::__wrap_iter<_Ty*>>::value)
-_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::std::__wrap_iter<_Ty*>, __priority_tag<3>) -> contiguous_iterator_tag;
+_LIBCUDACXX_HIDE_FROM_ABI auto __iter_concept_fn(::std::__wrap_iter<_Ty*>, __priority_tag<3>)
+  -> contiguous_iterator_tag;
 #  elif defined(_MSVC_STL_VERSION) || defined(_IS_WRS)
 _CCCL_TEMPLATE(class _Iter)
 _CCCL_REQUIRES(_IsSame<_Iter, class _Iter::_Array_iterator>::value)
diff --git a/libcudacxx/include/cuda/std/__iterator/reverse_access.h b/libcudacxx/include/cuda/std/__iterator/reverse_access.h
index ffeed85b900..9c66c6a645d 100644
--- a/libcudacxx/include/cuda/std/__iterator/reverse_access.h
+++ b/libcudacxx/include/cuda/std/__iterator/reverse_access.h
@@ -47,15 +47,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.rbegin())) -> decltype(__c.rbegin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.rbegin()))
+    -> decltype(__c.rbegin())
   {
     return __c.rbegin();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.rbegin())) -> decltype(__c.rbegin())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.rbegin()))
+    -> decltype(__c.rbegin())
   {
     return __c.rbegin();
   }
@@ -85,15 +85,15 @@ struct __fn
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const
-    noexcept(noexcept(__c.rend())) -> decltype(__c.rend())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(_Cp& __c) const noexcept(noexcept(__c.rend()))
+    -> decltype(__c.rend())
   {
     return __c.rend();
   }
 
   template <class _Cp>
-  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const
-    noexcept(noexcept(__c.rend())) -> decltype(__c.rend())
+  _LIBCUDACXX_HIDE_FROM_ABI _CCCL_CONSTEXPR_CXX14 auto operator()(const _Cp& __c) const noexcept(noexcept(__c.rend()))
+    -> decltype(__c.rend())
   {
     return __c.rend();
   }
diff --git a/libcudacxx/include/cuda/std/__mdspan/extents.h b/libcudacxx/include/cuda/std/__mdspan/extents.h
index 981e51d35fc..b6ce539a901 100644
--- a/libcudacxx/include/cuda/std/__mdspan/extents.h
+++ b/libcudacxx/include/cuda/std/__mdspan/extents.h
@@ -251,7 +251,8 @@ class extents
   _CCCL_REQUIRES(
     /* multi-stage check to protect from invalid pack expansion when sizes don't match? */
     (decltype(__detail::__check_compatible_extents(
-      integral_constant<bool, sizeof...(_Extents) == sizeof...(_OtherExtents)>{},
+      integral_constant < bool,
+      sizeof...(_Extents) == sizeof...(_OtherExtents) > {},
       __indices_t{}, // _CUDA_VSTD::integer_sequence<size_t, _Extents...>{}
       _CUDA_VSTD::integer_sequence<size_t, _OtherExtents...>{}))::value))
   _LIBCUDACXX_HIDE_FROM_ABI __MDSPAN_CONDITIONAL_EXPLICIT(
diff --git a/libcudacxx/include/cuda/std/__mdspan/macros.h b/libcudacxx/include/cuda/std/__mdspan/macros.h
index b9b56adae37..36895751bb1 100644
--- a/libcudacxx/include/cuda/std/__mdspan/macros.h
+++ b/libcudacxx/include/cuda/std/__mdspan/macros.h
@@ -256,11 +256,11 @@
         return __MDSPAN_PP_REMOVE_PARENS(BODY);                                    \
       }
 #  else
-#    define __MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE(SIGNATURE, BODY)                          \
-      auto __MDSPAN_PP_REMOVE_PARENS(                                                         \
-        SIGNATURE) -> _CUDA_VSTD::remove_cv_t<_CUDA_VSTD::remove_reference_t<decltype(BODY)>> \
-      {                                                                                       \
-        return __MDSPAN_PP_REMOVE_PARENS(BODY);                                               \
+#    define __MDSPAN_DEDUCE_RETURN_TYPE_SINGLE_LINE(SIGNATURE, BODY)               \
+      auto __MDSPAN_PP_REMOVE_PARENS(SIGNATURE)                                    \
+        -> _CUDA_VSTD::remove_cv_t<_CUDA_VSTD::remove_reference_t<decltype(BODY)>> \
+      {                                                                            \
+        return __MDSPAN_PP_REMOVE_PARENS(BODY);                                    \
       }
 #    define __MDSPAN_DEDUCE_DECLTYPE_AUTO_RETURN_TYPE_SINGLE_LINE(SIGNATURE, BODY) \
       auto __MDSPAN_PP_REMOVE_PARENS(SIGNATURE) -> decltype(BODY)                  \
diff --git a/libcudacxx/include/cuda/std/__mdspan/mdspan.h b/libcudacxx/include/cuda/std/__mdspan/mdspan.h
index bf7625a8093..07918917125 100644
--- a/libcudacxx/include/cuda/std/__mdspan/mdspan.h
+++ b/libcudacxx/include/cuda/std/__mdspan/mdspan.h
@@ -472,24 +472,25 @@ class mdspan
 #  if defined(__MDSPAN_USE_CLASS_TEMPLATE_ARGUMENT_DEDUCTION)
 _CCCL_TEMPLATE(class _ElementType, class... _SizeTypes)
 _CCCL_REQUIRES(__fold_and_v<_CCCL_TRAIT(is_integral, _SizeTypes)...> _CCCL_AND(sizeof...(_SizeTypes) > 0))
-_CCCL_HOST_DEVICE explicit mdspan(_ElementType*,
-                                  _SizeTypes...) -> mdspan<_ElementType, dextents<size_t, sizeof...(_SizeTypes)>>;
+_CCCL_HOST_DEVICE explicit mdspan(_ElementType*, _SizeTypes...)
+  -> mdspan<_ElementType, dextents<size_t, sizeof...(_SizeTypes)>>;
 
 _CCCL_TEMPLATE(class _Pointer)
 _CCCL_REQUIRES(_CCCL_TRAIT(is_pointer, _CUDA_VSTD::remove_reference_t<_Pointer>))
-_CCCL_HOST_DEVICE
-mdspan(_Pointer&&) -> mdspan<_CUDA_VSTD::remove_pointer_t<_CUDA_VSTD::remove_reference_t<_Pointer>>, extents<size_t>>;
+_CCCL_HOST_DEVICE mdspan(_Pointer&&)
+  -> mdspan<_CUDA_VSTD::remove_pointer_t<_CUDA_VSTD::remove_reference_t<_Pointer>>, extents<size_t>>;
 _CCCL_TEMPLATE(class _CArray)
 _CCCL_REQUIRES(_CCCL_TRAIT(is_array, _CArray) _CCCL_AND(rank_v<_CArray> == 1))
 _CCCL_HOST_DEVICE mdspan(_CArray&)
   -> mdspan<_CUDA_VSTD::remove_all_extents_t<_CArray>, extents<size_t, _CUDA_VSTD::extent_v<_CArray, 0>>>;
 
 template <class _ElementType, class _SizeType, size_t _Np>
-_CCCL_HOST_DEVICE mdspan(_ElementType*,
-                         const _CUDA_VSTD::array<_SizeType, _Np>&) -> mdspan<_ElementType, dextents<size_t, _Np>>;
+_CCCL_HOST_DEVICE mdspan(_ElementType*, const _CUDA_VSTD::array<_SizeType, _Np>&)
+  -> mdspan<_ElementType, dextents<size_t, _Np>>;
 
 template <class _ElementType, class _SizeType, size_t _Np>
-_CCCL_HOST_DEVICE mdspan(_ElementType*, _CUDA_VSTD::span<_SizeType, _Np>) -> mdspan<_ElementType, dextents<size_t, _Np>>;
+_CCCL_HOST_DEVICE mdspan(_ElementType*, _CUDA_VSTD::span<_SizeType, _Np>)
+  -> mdspan<_ElementType, dextents<size_t, _Np>>;
 
 // This one is necessary because all the constructors take `data_handle_type`s, not
 // `_ElementType*`s, and `data_handle_type` is taken from `accessor_type::data_handle_type`, which
diff --git a/libcudacxx/include/cuda/std/__memory/allocator_traits.h b/libcudacxx/include/cuda/std/__memory/allocator_traits.h
index 726b857be48..b553056ad99 100644
--- a/libcudacxx/include/cuda/std/__memory/allocator_traits.h
+++ b/libcudacxx/include/cuda/std/__memory/allocator_traits.h
@@ -289,8 +289,8 @@ _LIBCUDACXX_HIDE_FROM_ABI typename pointer_traits<_Pointer>::element_type* __to_
 }
 #else // ^^^ C++17 ^^^ / vvv C++20 vvv
 template <class _Pointer>
-_LIBCUDACXX_HIDE_FROM_ABI auto
-__to_raw_pointer(const _Pointer& __p) noexcept -> decltype(pointer_traits<_Pointer>::to_address(__p))
+_LIBCUDACXX_HIDE_FROM_ABI auto __to_raw_pointer(const _Pointer& __p) noexcept
+  -> decltype(pointer_traits<_Pointer>::to_address(__p))
 {
   return pointer_traits<_Pointer>::to_address(__p);
 }
diff --git a/libcudacxx/include/cuda/std/__memory/pointer_traits.h b/libcudacxx/include/cuda/std/__memory/pointer_traits.h
index 66e738b46df..d102dde7a74 100644
--- a/libcudacxx/include/cuda/std/__memory/pointer_traits.h
+++ b/libcudacxx/include/cuda/std/__memory/pointer_traits.h
@@ -238,8 +238,8 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr auto to_address(_Tp* __p) noexcept
 }
 
 template <class _Pointer>
-_LIBCUDACXX_HIDE_FROM_ABI constexpr auto
-to_address(const _Pointer& __p) noexcept -> decltype(_CUDA_VSTD::__to_address(__p))
+_LIBCUDACXX_HIDE_FROM_ABI constexpr auto to_address(const _Pointer& __p) noexcept
+  -> decltype(_CUDA_VSTD::__to_address(__p))
 {
   return _CUDA_VSTD::__to_address(__p);
 }
diff --git a/libcudacxx/include/cuda/std/__ranges/access.h b/libcudacxx/include/cuda/std/__ranges/access.h
index c6ba238ea41..7a5d2aade97 100644
--- a/libcudacxx/include/cuda/std/__ranges/access.h
+++ b/libcudacxx/include/cuda/std/__ranges/access.h
@@ -283,8 +283,9 @@ struct __fn
   _CCCL_EXEC_CHECK_DISABLE
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES(is_rvalue_reference_v<_Tp&&>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const noexcept(noexcept(
-    _CUDA_VRANGES::end(static_cast<const _Tp&&>(__t)))) -> decltype(_CUDA_VRANGES::end(static_cast<const _Tp&&>(__t)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const
+    noexcept(noexcept(_CUDA_VRANGES::end(static_cast<const _Tp&&>(__t))))
+      -> decltype(_CUDA_VRANGES::end(static_cast<const _Tp&&>(__t)))
   {
     return _CUDA_VRANGES::end(static_cast<const _Tp&&>(__t));
   }
diff --git a/libcudacxx/include/cuda/std/__ranges/data.h b/libcudacxx/include/cuda/std/__ranges/data.h
index a9c5db6f085..7f05385b120 100644
--- a/libcudacxx/include/cuda/std/__ranges/data.h
+++ b/libcudacxx/include/cuda/std/__ranges/data.h
@@ -115,8 +115,9 @@ struct __fn
 
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES(is_rvalue_reference_v<_Tp&&>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const noexcept(noexcept(
-    _CUDA_VRANGES::data(static_cast<const _Tp&&>(__t)))) -> decltype(_CUDA_VRANGES::data(static_cast<const _Tp&&>(__t)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const
+    noexcept(noexcept(_CUDA_VRANGES::data(static_cast<const _Tp&&>(__t))))
+      -> decltype(_CUDA_VRANGES::data(static_cast<const _Tp&&>(__t)))
   {
     return _CUDA_VRANGES::data(static_cast<const _Tp&&>(__t));
   }
diff --git a/libcudacxx/include/cuda/std/__ranges/rend.h b/libcudacxx/include/cuda/std/__ranges/rend.h
index 3f21c323eba..42a3c37054f 100644
--- a/libcudacxx/include/cuda/std/__ranges/rend.h
+++ b/libcudacxx/include/cuda/std/__ranges/rend.h
@@ -161,8 +161,9 @@ struct __fn
   _CCCL_EXEC_CHECK_DISABLE
   _CCCL_TEMPLATE(class _Tp)
   _CCCL_REQUIRES(is_rvalue_reference_v<_Tp&&>)
-  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const noexcept(noexcept(
-    _CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t)))) -> decltype(_CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t)))
+  _CCCL_NODISCARD _LIBCUDACXX_HIDE_FROM_ABI constexpr auto operator()(_Tp&& __t) const
+    noexcept(noexcept(_CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t))))
+      -> decltype(_CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t)))
   {
     return _CUDA_VRANGES::rend(static_cast<const _Tp&&>(__t));
   }
diff --git a/libcudacxx/include/cuda/std/__ranges/subrange.h b/libcudacxx/include/cuda/std/__ranges/subrange.h
index 484ce8c1f46..b1b2d9c81d0 100644
--- a/libcudacxx/include/cuda/std/__ranges/subrange.h
+++ b/libcudacxx/include/cuda/std/__ranges/subrange.h
@@ -399,8 +399,8 @@ _CCCL_HOST_DEVICE subrange(_Iter, _Sent) -> subrange<_Iter, _Sent>;
 
 _CCCL_TEMPLATE(class _Iter, class _Sent)
 _CCCL_REQUIRES(input_or_output_iterator<_Iter> _CCCL_AND sentinel_for<_Sent, _Iter>)
-_CCCL_HOST_DEVICE
-subrange(_Iter, _Sent, make_unsigned_t<iter_difference_t<_Iter>>) -> subrange<_Iter, _Sent, subrange_kind::sized>;
+_CCCL_HOST_DEVICE subrange(_Iter, _Sent, make_unsigned_t<iter_difference_t<_Iter>>)
+  -> subrange<_Iter, _Sent, subrange_kind::sized>;
 
 _CCCL_TEMPLATE(class _Range)
 _CCCL_REQUIRES(borrowed_range<_Range>)
diff --git a/libcudacxx/include/cuda/std/__thread/threading_support.h b/libcudacxx/include/cuda/std/__thread/threading_support.h
index d2ebacf576f..31968a27365 100644
--- a/libcudacxx/include/cuda/std/__thread/threading_support.h
+++ b/libcudacxx/include/cuda/std/__thread/threading_support.h
@@ -45,9 +45,9 @@ _LIBCUDACXX_BEGIN_NAMESPACE_STD
 #  define _LIBCUDACXX_POLLING_COUNT 16
 
 #  if defined(__aarch64__)
-#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("yield" :::);)
+#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("yield" :: :);)
 #  elif defined(__x86_64__)
-#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("pause" :::);)
+#    define __LIBCUDACXX_ASM_THREAD_YIELD (asm volatile("pause" :: :);)
 #  else // ^^^ __x86_64__ ^^^ / vvv !__x86_64__ vvv
 #    define __LIBCUDACXX_ASM_THREAD_YIELD (;)
 #  endif // !__x86_64__
diff --git a/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h b/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h
index c46cf508dca..cd3c5f12e07 100644
--- a/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h
+++ b/libcudacxx/include/cuda/std/__thread/threading_support_cuda.h
@@ -35,8 +35,7 @@ _LIBCUDACXX_HIDE_FROM_ABI void __cccl_thread_sleep_for(_CUDA_VSTD::chrono::nanos
 {
   NV_IF_TARGET(NV_IS_DEVICE,
                (auto const __step = __ns.count(); assert(__step < numeric_limits<unsigned>::max());
-                asm volatile("nanosleep.u32 %0;" ::"r"((unsigned) __step)
-                             :);))
+                asm volatile("nanosleep.u32 %0;" ::"r"((unsigned) __step) :);))
 }
 
 _LIBCUDACXX_END_NAMESPACE_STD
diff --git a/libcudacxx/include/cuda/std/__type_traits/type_list.h b/libcudacxx/include/cuda/std/__type_traits/type_list.h
index 66652922ceb..f2dc0fffe43 100644
--- a/libcudacxx/include/cuda/std/__type_traits/type_list.h
+++ b/libcudacxx/include/cuda/std/__type_traits/type_list.h
@@ -1020,8 +1020,8 @@ struct _CCCL_TYPE_VISIBILITY_DEFAULT __type_value_list : __type_list<integral_co
 namespace __detail
 {
 template <class _Ty, _Ty _Start, _Ty _Stride, _Ty... _Is>
-_LIBCUDACXX_HIDE_FROM_ABI auto
-__type_iota_fn(integer_sequence<_Ty, _Is...>*) -> __type_value_list<_Ty, _Ty(_Start + (_Is * _Stride))...>;
+_LIBCUDACXX_HIDE_FROM_ABI auto __type_iota_fn(integer_sequence<_Ty, _Is...>*)
+  -> __type_value_list<_Ty, _Ty(_Start + (_Is * _Stride))...>;
 } // namespace __detail
 
 //! \brief Return an \c __type_value_list of size \c _Size starting at \c _Start
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/span b/libcudacxx/include/cuda/std/detail/libcxx/include/span
index 19fdea2f4ce..b4e8fb04d95 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/span
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/span
@@ -785,8 +785,8 @@ _CCCL_HOST_DEVICE span(const array<_Tp, _Sz>&) -> span<const _Tp, _Sz>;
 
 _CCCL_TEMPLATE(class _It, class _EndOrSize)
 _CCCL_REQUIRES(contiguous_iterator<_It>)
-_CCCL_HOST_DEVICE span(_It,
-                       _EndOrSize) -> span<remove_reference_t<iter_reference_t<_It>>, __maybe_static_ext<_EndOrSize>>;
+_CCCL_HOST_DEVICE span(_It, _EndOrSize)
+  -> span<remove_reference_t<iter_reference_t<_It>>, __maybe_static_ext<_EndOrSize>>;
 
 _CCCL_TEMPLATE(class _Range)
 _CCCL_REQUIRES(_CUDA_VRANGES::contiguous_range<_Range>)
diff --git a/libcudacxx/include/cuda/std/detail/libcxx/include/variant b/libcudacxx/include/cuda/std/detail/libcxx/include/variant
index 908c76ae1ed..0f6ec9d29fc 100644
--- a/libcudacxx/include/cuda/std/detail/libcxx/include/variant
+++ b/libcudacxx/include/cuda/std/detail/libcxx/include/variant
@@ -1059,8 +1059,9 @@ _LIBCUDACXX_VARIANT_MOVE_CONSTRUCTOR(_Trait::_TriviallyAvailable,
 _LIBCUDACXX_VARIANT_MOVE_CONSTRUCTOR(
   _Trait::_Available,
   _LIBCUDACXX_HIDE_FROM_ABI __move_constructor(__move_constructor&& __that) noexcept(
-    __all<_CCCL_TRAIT(is_nothrow_move_constructible, _Types)...>::value)
-  : __move_constructor(__valueless_t{}) { this->__generic_construct(*this, _CUDA_VSTD::move(__that)); });
+    __all<_CCCL_TRAIT(is_nothrow_move_constructible, _Types)...>::value) : __move_constructor(__valueless_t{}) {
+    this->__generic_construct(*this, _CUDA_VSTD::move(__that));
+  });
 
 _LIBCUDACXX_VARIANT_MOVE_CONSTRUCTOR(_Trait::_Unavailable, __move_constructor(__move_constructor&&) = delete;);
 
@@ -1091,8 +1092,10 @@ _LIBCUDACXX_VARIANT_COPY_CONSTRUCTOR(
   _Trait::_TriviallyAvailable, _CCCL_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that) = default;);
 
 _LIBCUDACXX_VARIANT_COPY_CONSTRUCTOR(
-  _Trait::_Available, _LIBCUDACXX_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that)
-  : __copy_constructor(__valueless_t{}) { this->__generic_construct(*this, __that); });
+  _Trait::_Available,
+  _LIBCUDACXX_HIDE_FROM_ABI __copy_constructor(const __copy_constructor& __that) : __copy_constructor(__valueless_t{}) {
+    this->__generic_construct(*this, __that);
+  });
 
 _LIBCUDACXX_VARIANT_COPY_CONSTRUCTOR(_Trait::_Unavailable, __copy_constructor(const __copy_constructor&) = delete;);
 
@@ -1395,8 +1398,8 @@ template <class _Tp, size_t>
 struct __overload_bool
 {
   template <class _Up, class _Ap = remove_cvref_t<_Up>>
-  _LIBCUDACXX_HIDE_FROM_ABI auto
-  operator()(bool, _Up&&) const -> enable_if_t<_CCCL_TRAIT(is_same, _Ap, bool), type_identity<_Tp>>;
+  _LIBCUDACXX_HIDE_FROM_ABI auto operator()(bool, _Up&&) const
+    -> enable_if_t<_CCCL_TRAIT(is_same, _Ap, bool), type_identity<_Tp>>;
 };
 
 template <size_t _Idx>
@@ -2078,8 +2081,9 @@ _LIBCUDACXX_HIDE_FROM_ABI constexpr _Rp visit(_Visitor&& __visitor, _Vs&&... __v
 }
 
 template <class... _Types>
-_LIBCUDACXX_HIDE_FROM_ABI auto swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs) noexcept(
-  noexcept(__lhs.swap(__rhs))) -> decltype(__lhs.swap(__rhs))
+_LIBCUDACXX_HIDE_FROM_ABI auto
+swap(variant<_Types...>& __lhs, variant<_Types...>& __rhs) noexcept(noexcept(__lhs.swap(__rhs)))
+  -> decltype(__lhs.swap(__rhs))
 {
   return __lhs.swap(__rhs);
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp
index 2c6c155d784..cc4d90341d4 100644
--- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_max.pass.cpp
@@ -20,8 +20,7 @@
 #include "test_macros.h"
 
 template <class T,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope ThreadScope,
           bool Signed = cuda::std::is_signed<T>::value>
 struct TestFn
diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp
index fae515acc94..8bd6a5cd685 100644
--- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_fetch_min.pass.cpp
@@ -20,8 +20,7 @@
 #include "test_macros.h"
 
 template <class T,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope ThreadScope,
           bool Signed = cuda::std::is_signed<T>::value>
 struct TestFn
diff --git a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h
index cc54eda725e..c2be3033275 100644
--- a/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h
+++ b/libcudacxx/test/libcudacxx/cuda/atomics/atomic.ext/atomic_helpers.h
@@ -27,8 +27,7 @@ struct UserAtomicType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -66,8 +65,7 @@ struct TestEachIntegralType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -83,8 +81,7 @@ struct TestEachFloatingPointType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp
index 5527b2359af..334053be29c 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_1d.pass.cpp
@@ -52,7 +52,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp
index 6df27820c79..21250677f10 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_2d.pass.cpp
@@ -57,7 +57,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp
index f765b02d540..88973305198 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_3d.pass.cpp
@@ -52,7 +52,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp
index fa46fde4d43..c40f5784da2 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_4d.pass.cpp
@@ -53,7 +53,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp
index 557a1277250..71089666b9c 100644
--- a/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/barrier/cp_async_bulk_tensor_5d.pass.cpp
@@ -53,7 +53,8 @@ int main(int, char**)
       // Required by concurrent_agents_launch to know how many we're launching
       cuda_thread_count = 512; init_tensor_map(gmem_tensor, GMEM_DIMS, SMEM_DIMS);),
     NV_IS_DEVICE,
-    (for (auto smem_coord
-          : TEST_SMEM_COORDS) { test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len); }));
+    (for (auto smem_coord : TEST_SMEM_COORDS) {
+      test<smem_len>(smem_coord, SMEM_DIMS_DEV, GMEM_DIMS_DEV, gmem_tensor, gmem_len);
+    }));
   return 0;
 }
diff --git a/libcudacxx/test/libcudacxx/cuda/memcpy_async.h b/libcudacxx/test/libcudacxx/cuda/memcpy_async.h
index 4d0504ab66d..7d75bf4b842 100644
--- a/libcudacxx/test/libcudacxx/cuda/memcpy_async.h
+++ b/libcudacxx/test/libcudacxx/cuda/memcpy_async.h
@@ -16,12 +16,9 @@
 #include "large_type.h"
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector,
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector,
           cuda::thread_scope BarrierScope,
           typename... CompletionF>
 __host__ __device__ __noinline__ void test_fully_specialized()
@@ -60,12 +57,9 @@ struct completion
 };
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector>
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector>
 __host__ __device__ __noinline__ void test_select_scope()
 {
   test_fully_specialized<T, SourceSelector, DestSelector, BarrierSelector, cuda::thread_scope_system>();
diff --git a/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h b/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h
index 574ed5ceb80..8b7fba78404 100644
--- a/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h
+++ b/libcudacxx/test/libcudacxx/cuda/memcpy_async/group_memcpy_async.h
@@ -71,12 +71,9 @@ static_assert(std::is_trivially_copy_constructible<storage<uint64_t>>::value, ""
 #endif
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector,
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector,
           cuda::thread_scope BarrierScope,
           typename... CompletionF>
 __device__ __noinline__ void test_fully_specialized()
@@ -123,12 +120,9 @@ struct completion
 };
 
 template <class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class BarrierSelector>
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class BarrierSelector>
 __host__ __device__ __noinline__ void test_select_scope()
 {
   test_fully_specialized<T, SourceSelector, DestSelector, BarrierSelector, cuda::thread_scope_system>();
diff --git a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp
index 14ae81da2da..6bedfb5bb8f 100644
--- a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp
+++ b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_producer_consumer.pass.cpp
@@ -51,8 +51,7 @@ __device__ __noinline__ void test_consumer(T* dest, T* source, cuda::pipeline<Pi
 }
 
 template <class T,
-          template <typename, typename>
-          class PipelineSelector,
+          template <typename, typename> class PipelineSelector,
           cuda::thread_scope PipelineScope,
           uint8_t PipelineStages>
 __device__ __noinline__ void test_fully_specialized()
diff --git a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h
index 787c6bd050e..86d9fa0f180 100644
--- a/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h
+++ b/libcudacxx/test/libcudacxx/cuda/pipeline_memcpy_async_thread_scope_generic.h
@@ -27,12 +27,9 @@ __host__ __device__ cuda::pipeline<scope> get_pipeline(cuda::pipeline_shared_sta
 
 template <cuda::thread_scope Scope,
           class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector,
-          template <typename, typename>
-          class PipelineSelector,
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector,
+          template <typename, typename> class PipelineSelector,
           uint8_t PipelineStages>
 __host__ __device__ __noinline__ void test_fully_specialized()
 {
@@ -86,10 +83,8 @@ __host__ __device__ __noinline__ void test_fully_specialized()
 
 template <cuda::thread_scope Scope,
           class T,
-          template <typename, typename>
-          class SourceSelector,
-          template <typename, typename>
-          class DestSelector>
+          template <typename, typename> class SourceSelector,
+          template <typename, typename> class DestSelector>
 __host__ __device__ __noinline__ void test_select_pipeline()
 {
   constexpr uint8_t stages_count = 2;
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp
index 888c473feac..a36fd0ada46 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/bool.pass.cpp
@@ -64,8 +64,7 @@
 
 template <template <cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ __noinline__ void do_test()
 {
   {
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp
index 4f457985ed5..4af01d2a60b 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point.pass.cpp
@@ -148,8 +148,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<float, Scope>, float, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp
index 82dba2c3302..e74a5b8bcb9 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref.pass.cpp
@@ -144,8 +144,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<float, Scope>, float, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp
index 87953269665..e9959955657 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/floating_point_ref_constness.pass.cpp
@@ -84,8 +84,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<float, Scope>, float, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp
index 5aa401c3f75..267afbd1034 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<char, Scope>, char, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp
index c3e809be2c0..617fb4481b9 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/1b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<char, Scope>, char, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp
index faf48d1d970..9efb262fd46 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<short, Scope>, short, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp
index 7fa15876db7..f9697062d5e 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/2b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<short, Scope>, short, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp
index 7df29b0dc04..1af63517f50 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp
index 9c85979457d..cb2efeaab43 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/4b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp
index 0ba82452f85..b0a90e5fccc 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_cuda.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<long, Scope>, long, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp
index 8263f359e6f..9d71b47735a 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/8b_integral_std.pass.cpp
@@ -98,8 +98,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<long, Scope>, long, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp
index b2da9693495..1193b49bede 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref.pass.cpp
@@ -164,8 +164,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp
index 426551da2f3..7125e74b316 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.generic/integral/integral_ref_constness.pass.cpp
@@ -164,8 +164,7 @@ __host__ __device__ __noinline__ void test()
 
 template <template <typename, cuda::thread_scope> class Atomic,
           cuda::thread_scope Scope,
-          template <typename, typename>
-          class Selector>
+          template <typename, typename> class Selector>
 __host__ __device__ void test_for_all_types()
 {
   test<Atomic<int, Scope>, int, Selector>();
diff --git a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h
index 1344c1aa2fb..9c0a8ef670c 100644
--- a/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h
+++ b/libcudacxx/test/libcudacxx/std/atomics/atomics.types.operations/atomics.types.operations.req/atomic_helpers.h
@@ -28,8 +28,7 @@ struct UserAtomicType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -67,8 +66,7 @@ struct TestEachIntegralType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -84,8 +82,7 @@ struct TestEachFloatingPointType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -104,8 +101,7 @@ struct TestEachAtomicType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
@@ -132,8 +128,7 @@ struct TestEachIntegralRefType
 };
 
 template <template <class, template <typename, typename> class, cuda::thread_scope> class TestFunctor,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           cuda::thread_scope Scope
 #if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 600
           = cuda::thread_scope_system
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp
index a2650b5bc68..3171d716ed0 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable.compile.pass.cpp
@@ -51,25 +51,25 @@ static_assert(equality_comparable<int S::*>, "");
 static_assert(equality_comparable<int (S::*)()>, "");
 static_assert(equality_comparable<int (S::*)() noexcept>, "");
 static_assert(equality_comparable<int (S::*)() &>, "");
-static_assert(equality_comparable<int (S::*)() & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() & noexcept >, "");
 static_assert(equality_comparable<int (S::*)() &&>, "");
 static_assert(equality_comparable < int(S::*)() && noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const>, "");
 static_assert(equality_comparable<int (S::*)() const noexcept>, "");
 static_assert(equality_comparable<int (S::*)() const&>, "");
-static_assert(equality_comparable<int (S::*)() const & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() const& noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const&&>, "");
 static_assert(equality_comparable < int(S::*)() const&& noexcept >, "");
 static_assert(equality_comparable<int (S::*)() volatile>, "");
 static_assert(equality_comparable<int (S::*)() volatile noexcept>, "");
 static_assert(equality_comparable<int (S::*)() volatile&>, "");
-static_assert(equality_comparable<int (S::*)() volatile & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() volatile & noexcept >, "");
 static_assert(equality_comparable<int (S::*)() volatile&&>, "");
 static_assert(equality_comparable < int(S::*)() volatile && noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const volatile>, "");
 static_assert(equality_comparable<int (S::*)() const volatile noexcept>, "");
 static_assert(equality_comparable<int (S::*)() const volatile&>, "");
-static_assert(equality_comparable<int (S::*)() const volatile & noexcept>, "");
+static_assert(equality_comparable < int(S::*)() const volatile& noexcept >, "");
 static_assert(equality_comparable<int (S::*)() const volatile&&>, "");
 static_assert(equality_comparable < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp
index f2299543519..83561c30db7 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concept.equalitycomparable/equality_comparable_with.compile.pass.cpp
@@ -79,13 +79,13 @@ static_assert(!check_equality_comparable_with<int, int (S::*)() volatile & noexc
 static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int*, int*>(), "");
 static_assert(check_equality_comparable_with<int*, int[5]>(), "");
@@ -108,13 +108,13 @@ static_assert(!check_equality_comparable_with<int*, int (S::*)() volatile & noex
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int*, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int*, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int[5], int[5]>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (*)()>(), "");
@@ -136,13 +136,13 @@ static_assert(!check_equality_comparable_with<int[5], int (S::*)() volatile & no
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int[5], int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int[5], int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (*)(), int (*)()>(), "");
 static_assert(check_equality_comparable_with<int (*)(), int (&)()>(), "");
@@ -163,13 +163,13 @@ static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() volatile &
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (*)(), int (S::*)() const volatile && noexcept>(), "");
 
 #ifdef INVESTIGATE_COMPILER_BUG
 static_assert(check_equality_comparable_with<int (&)(), int (&)()>(), "");
@@ -191,13 +191,13 @@ static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() volatile &
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int (&)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (&)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)(), int (S::*)()>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -218,13 +218,13 @@ static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() volatil
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const>(), "");
@@ -242,13 +242,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)(
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const, int (S::*)() const>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -267,13 +267,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() v
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() volatile>(), "");
@@ -290,14 +290,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile, int (S::*)() volatile>(), "");
@@ -315,13 +314,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)(
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() volatile noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile>(), "");
@@ -337,17 +336,14 @@ static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, in
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() volatile noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -363,14 +359,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(
@@ -388,20 +383,17 @@ static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexce
 static_assert(
   !check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept,
-              int (S::*)() const&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const && noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile&&>(),
               "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() &, int (S::*)() &>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -414,13 +406,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() volat
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const&>(), "");
@@ -430,13 +422,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() & noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const&, int (S::*)() const&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -447,13 +439,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)()
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() volatile&>(), "");
@@ -462,14 +454,13 @@ static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const & noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() volatile&>(), "");
@@ -479,30 +470,28 @@ static_assert(check_equality_comparable_with<int (S::*)() volatile&, int (S::*)(
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile&>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(
   !check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept,
-              int (S::*)() volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() volatile && noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile & noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const volatile&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
@@ -510,98 +499,87 @@ static_assert(check_equality_comparable_with<int (S::*)() const volatile&, int (
               "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile&,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile&, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(
   check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const&& noexcept > (),
+static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const && noexcept>(),
               "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile&&>(),
               "");
-static_assert(!check_equality_comparable_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile && noexcept>(),
+  "");
 
 static_assert(check_equality_comparable_with<int (S::*)() &&, int (S::*)() &&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() &&, int (S::*)() && noexcept > (), "");
+static_assert(check_equality_comparable_with<int (S::*)() &&, int (S::*)() && noexcept>(), "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() &&, int (S::*)() const volatile&& noexcept > (), "");
-
-static_assert(check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() && noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() const&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() && noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(!check_equality_comparable_with<int (S::*)() &&, int (S::*)() const volatile && noexcept>(), "");
+
+static_assert(check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const&&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() const&&, int (S::*)() const&& noexcept > (), "");
+static_assert(check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const && noexcept>(), "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&&, int (S::*)() const volatile&& noexcept > (), "");
-
-static_assert(check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_equality_comparable_with < int(S::*)() const&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(!check_equality_comparable_with<int (S::*)() const&&, int (S::*)() const volatile && noexcept>(), "");
+
+static_assert(check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() const && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() volatile&&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() volatile&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() volatile && noexcept>(), "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
 static_assert(!check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile&&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile&&, int (S::*)() const volatile && noexcept>(), "");
 
-static_assert(check_equality_comparable_with < int(S::*)() volatile && noexcept,
-              int (S::*)() volatile&& noexcept > (),
-              "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile && noexcept,
-              int (S::*)() const volatile&& > (),
-              "");
-static_assert(!check_equality_comparable_with < int(S::*)() volatile && noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(check_equality_comparable_with<int (S::*)() volatile && noexcept, int (S::*)() volatile && noexcept>(),
               "");
+static_assert(!check_equality_comparable_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(
+  !check_equality_comparable_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_equality_comparable_with<int (S::*)() const volatile&&, int (S::*)() const volatile&&>(), "");
 #ifndef TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() const volatile&&,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(check_equality_comparable_with<int (S::*)() const volatile&&, int (S::*)() const volatile && noexcept>(),
               "");
 #endif // TEST_COMPILER_BROKEN_SMF_NOEXCEPT
-static_assert(check_equality_comparable_with < int(S::*)() const volatile&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  check_equality_comparable_with<int (S::*)() const volatile && noexcept, int (S::*)() const volatile && noexcept>(),
+  "");
 
 static_assert(!check_equality_comparable_with<nullptr_t, int>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int*>(), "");
@@ -627,13 +605,13 @@ static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() volatile &
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile&>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile & noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() &&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() && noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() && noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const&&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() const&& noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const && noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() volatile&&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() volatile&& noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() volatile && noexcept>(), "");
 static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile&&>(), "");
-static_assert(check_equality_comparable_with < nullptr_t, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(check_equality_comparable_with<nullptr_t, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!equality_comparable_with<void, int>, "");
 static_assert(!equality_comparable_with<void, int*>, "");
@@ -651,13 +629,13 @@ static_assert(!equality_comparable_with<void, int (S::*)() volatile noexcept>, "
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile noexcept>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() & noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &&>, "");
 static_assert(!equality_comparable_with < void, int (S::*)() && noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&&>, "");
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp
index a76fb02b07a..a5d7fda4f85 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered.pass.cpp
@@ -88,25 +88,25 @@ static_assert(!totally_ordered<int S::*>, "");
 static_assert(!totally_ordered<int (S::*)()>, "");
 static_assert(!totally_ordered<int (S::*)() noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() &>, "");
-static_assert(!totally_ordered<int (S::*)() & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() & noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() &&>, "");
 static_assert(!totally_ordered < int(S::*)() && noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const>, "");
 static_assert(!totally_ordered<int (S::*)() const noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() const&>, "");
-static_assert(!totally_ordered<int (S::*)() const & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() const& noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const&&>, "");
 static_assert(!totally_ordered < int(S::*)() const&& noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() volatile>, "");
 static_assert(!totally_ordered<int (S::*)() volatile noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() volatile&>, "");
-static_assert(!totally_ordered<int (S::*)() volatile & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() volatile & noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() volatile&&>, "");
 static_assert(!totally_ordered < int(S::*)() volatile && noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const volatile>, "");
 static_assert(!totally_ordered<int (S::*)() const volatile noexcept>, "");
 static_assert(!totally_ordered<int (S::*)() const volatile&>, "");
-static_assert(!totally_ordered<int (S::*)() const volatile & noexcept>, "");
+static_assert(!totally_ordered < int(S::*)() const volatile& noexcept >, "");
 static_assert(!totally_ordered<int (S::*)() const volatile&&>, "");
 static_assert(!totally_ordered < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp
index a67a915346c..493cf05f637 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.compare/concepts.totallyordered/totally_ordered_with.pass.cpp
@@ -85,13 +85,13 @@ static_assert(!check_totally_ordered_with<int, int (S::*)() volatile & noexcept>
 static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_totally_ordered_with<int*, int*>(), "");
 static_assert(check_totally_ordered_with<int*, int[5]>(), "");
@@ -114,13 +114,13 @@ static_assert(!check_totally_ordered_with<int*, int (S::*)() volatile & noexcept
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int*, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int*, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_totally_ordered_with<int[5], int[5]>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (*)()>(), "");
@@ -142,13 +142,13 @@ static_assert(!check_totally_ordered_with<int[5], int (S::*)() volatile & noexce
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int[5], int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int[5], int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(check_totally_ordered_with<int (*)(), int (*)()>(), "");
 static_assert(check_totally_ordered_with<int (*)(), int (&)()>(), "");
@@ -169,13 +169,13 @@ static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() volatile & noe
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (*)(), int (S::*)() const volatile && noexcept>(), "");
 #ifdef INVESTIGATE_COMPILER_BUG
 static_assert(check_totally_ordered_with<int (&)(), int (&)()>(), "");
 #endif // INVESTIGATE_COMPILER_BUG
@@ -196,13 +196,13 @@ static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() volatile & noe
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int (&)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (&)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)()>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() noexcept>(), "");
@@ -221,13 +221,13 @@ static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() volatile &
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)(), int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)(), int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const>(), "");
@@ -245,13 +245,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() vo
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const noexcept>(), "");
@@ -268,13 +268,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() volat
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() volatile>(), "");
@@ -290,13 +290,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile noexcept>(), "");
@@ -311,13 +311,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() vo
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() volatile noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile>(), "");
@@ -332,14 +332,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() volatile noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile>(), "");
@@ -353,13 +352,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile noexcept>(),
               "");
@@ -374,17 +373,15 @@ static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept,
 static_assert(
   !check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept,
-              int (S::*)() volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() volatile && noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_totally_ordered_with<int (S::*)() const volatile noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() &>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() & noexcept>(), "");
@@ -395,13 +392,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() volatile
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const&>(), "");
@@ -411,13 +408,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)()
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() & noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const & noexcept>(), "");
@@ -426,13 +423,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() vola
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() volatile&>(), "");
@@ -440,14 +437,13 @@ static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S:
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const & noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() volatile&>(), "");
@@ -455,109 +451,99 @@ static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() v
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile & noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile & noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() volatile & noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(
   !check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const && noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() volatile && noexcept>(),
               "");
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
+static_assert(
+  !check_totally_ordered_with<int (S::*)() const volatile & noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() &&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() &&, int (S::*)() const volatile && noexcept>(), "");
 
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() && noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() && noexcept, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&&, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&&, int (S::*)() const volatile&& noexcept > (), "");
-
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() const&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const&&, int (S::*)() const volatile && noexcept>(), "");
+
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() const && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() const && noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&&, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile&&, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile&&, int (S::*)() const volatile && noexcept>(), "");
 
-static_assert(!check_totally_ordered_with < int(S::*)() volatile && noexcept, int (S::*)() volatile&& noexcept > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile && noexcept, int (S::*)() const volatile&& > (), "");
-static_assert(!check_totally_ordered_with < int(S::*)() volatile && noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() volatile && noexcept, int (S::*)() volatile && noexcept>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile&&>(), "");
+static_assert(!check_totally_ordered_with<int (S::*)() volatile && noexcept, int (S::*)() const volatile && noexcept>(),
               "");
 
 static_assert(!check_totally_ordered_with<int (S::*)() const volatile&&, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&&,
-              int (S::*)() const volatile&& noexcept > (),
-              "");
-static_assert(!check_totally_ordered_with < int(S::*)() const volatile&& noexcept,
-              int (S::*)() const volatile&& noexcept > (),
+static_assert(!check_totally_ordered_with<int (S::*)() const volatile&&, int (S::*)() const volatile && noexcept>(),
               "");
+static_assert(
+  !check_totally_ordered_with<int (S::*)() const volatile && noexcept, int (S::*)() const volatile && noexcept>(), "");
 
 #if !defined(TEST_COMPILER_GCC) && defined(INVESTIGATE_COMPILER_BUG)
 static_assert(!check_totally_ordered_with<nullptr_t, int>(), "");
@@ -586,13 +572,13 @@ static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() volatile & noe
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile&>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile & noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() &&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() && noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() && noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const&&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() const&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const && noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() volatile&&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() volatile && noexcept>(), "");
 static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile&&>(), "");
-static_assert(!check_totally_ordered_with < nullptr_t, int (S::*)() const volatile&& noexcept > (), "");
+static_assert(!check_totally_ordered_with<nullptr_t, int (S::*)() const volatile && noexcept>(), "");
 
 static_assert(!equality_comparable_with<void, int>, "");
 static_assert(!equality_comparable_with<void, int*>, "");
@@ -610,13 +596,13 @@ static_assert(!equality_comparable_with<void, int (S::*)() volatile noexcept>, "
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile noexcept>, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() & noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const volatile&>, "");
-static_assert(!equality_comparable_with<void, int (S::*)() const volatile & noexcept>, "");
+static_assert(!equality_comparable_with < void, int (S::*)() const volatile& noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() &&>, "");
 static_assert(!equality_comparable_with < void, int (S::*)() && noexcept >, "");
 static_assert(!equality_comparable_with<void, int (S::*)() const&&>, "");
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp
index 6769ea45c40..51314e675ac 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/copyable.compile.pass.cpp
@@ -36,25 +36,25 @@ static_assert(copyable<int S::*>, "");
 static_assert(copyable<int (S::*)()>, "");
 static_assert(copyable<int (S::*)() noexcept>, "");
 static_assert(copyable<int (S::*)() &>, "");
-static_assert(copyable<int (S::*)() & noexcept>, "");
+static_assert(copyable < int(S::*)() & noexcept >, "");
 static_assert(copyable<int (S::*)() &&>, "");
 static_assert(copyable < int(S::*)() && noexcept >, "");
 static_assert(copyable<int (S::*)() const>, "");
 static_assert(copyable<int (S::*)() const noexcept>, "");
 static_assert(copyable<int (S::*)() const&>, "");
-static_assert(copyable<int (S::*)() const & noexcept>, "");
+static_assert(copyable < int(S::*)() const& noexcept >, "");
 static_assert(copyable<int (S::*)() const&&>, "");
 static_assert(copyable < int(S::*)() const&& noexcept >, "");
 static_assert(copyable<int (S::*)() volatile>, "");
 static_assert(copyable<int (S::*)() volatile noexcept>, "");
 static_assert(copyable<int (S::*)() volatile&>, "");
-static_assert(copyable<int (S::*)() volatile & noexcept>, "");
+static_assert(copyable < int(S::*)() volatile & noexcept >, "");
 static_assert(copyable<int (S::*)() volatile&&>, "");
 static_assert(copyable < int(S::*)() volatile && noexcept >, "");
 static_assert(copyable<int (S::*)() const volatile>, "");
 static_assert(copyable<int (S::*)() const volatile noexcept>, "");
 static_assert(copyable<int (S::*)() const volatile&>, "");
-static_assert(copyable<int (S::*)() const volatile & noexcept>, "");
+static_assert(copyable < int(S::*)() const volatile& noexcept >, "");
 static_assert(copyable<int (S::*)() const volatile&&>, "");
 static_assert(copyable < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp
index 52cd49b311d..68006e6d465 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/movable.compile.pass.cpp
@@ -37,25 +37,25 @@ static_assert(movable<int S::*>, "");
 static_assert(movable<int (S::*)()>, "");
 static_assert(movable<int (S::*)() noexcept>, "");
 static_assert(movable<int (S::*)() &>, "");
-static_assert(movable<int (S::*)() & noexcept>, "");
+static_assert(movable < int(S::*)() & noexcept >, "");
 static_assert(movable<int (S::*)() &&>, "");
 static_assert(movable < int(S::*)() && noexcept >, "");
 static_assert(movable<int (S::*)() const>, "");
 static_assert(movable<int (S::*)() const noexcept>, "");
 static_assert(movable<int (S::*)() const&>, "");
-static_assert(movable<int (S::*)() const & noexcept>, "");
+static_assert(movable < int(S::*)() const& noexcept >, "");
 static_assert(movable<int (S::*)() const&&>, "");
 static_assert(movable < int(S::*)() const&& noexcept >, "");
 static_assert(movable<int (S::*)() volatile>, "");
 static_assert(movable<int (S::*)() volatile noexcept>, "");
 static_assert(movable<int (S::*)() volatile&>, "");
-static_assert(movable<int (S::*)() volatile & noexcept>, "");
+static_assert(movable < int(S::*)() volatile & noexcept >, "");
 static_assert(movable<int (S::*)() volatile&&>, "");
 static_assert(movable < int(S::*)() volatile && noexcept >, "");
 static_assert(movable<int (S::*)() const volatile>, "");
 static_assert(movable<int (S::*)() const volatile noexcept>, "");
 static_assert(movable<int (S::*)() const volatile&>, "");
-static_assert(movable<int (S::*)() const volatile & noexcept>, "");
+static_assert(movable < int(S::*)() const volatile& noexcept >, "");
 static_assert(movable<int (S::*)() const volatile&&>, "");
 static_assert(movable < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp
index cf8a4608289..d476b3cbf29 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/regular.compile.pass.cpp
@@ -40,25 +40,25 @@ static_assert(regular<int S::*>, "");
 static_assert(regular<int (S::*)()>, "");
 static_assert(regular<int (S::*)() noexcept>, "");
 static_assert(regular<int (S::*)() &>, "");
-static_assert(regular<int (S::*)() & noexcept>, "");
+static_assert(regular < int(S::*)() & noexcept >, "");
 static_assert(regular<int (S::*)() &&>, "");
 static_assert(regular < int(S::*)() && noexcept >, "");
 static_assert(regular<int (S::*)() const>, "");
 static_assert(regular<int (S::*)() const noexcept>, "");
 static_assert(regular<int (S::*)() const&>, "");
-static_assert(regular<int (S::*)() const & noexcept>, "");
+static_assert(regular < int(S::*)() const& noexcept >, "");
 static_assert(regular<int (S::*)() const&&>, "");
 static_assert(regular < int(S::*)() const&& noexcept >, "");
 static_assert(regular<int (S::*)() volatile>, "");
 static_assert(regular<int (S::*)() volatile noexcept>, "");
 static_assert(regular<int (S::*)() volatile&>, "");
-static_assert(regular<int (S::*)() volatile & noexcept>, "");
+static_assert(regular < int(S::*)() volatile & noexcept >, "");
 static_assert(regular<int (S::*)() volatile&&>, "");
 static_assert(regular < int(S::*)() volatile && noexcept >, "");
 static_assert(regular<int (S::*)() const volatile>, "");
 static_assert(regular<int (S::*)() const volatile noexcept>, "");
 static_assert(regular<int (S::*)() const volatile&>, "");
-static_assert(regular<int (S::*)() const volatile & noexcept>, "");
+static_assert(regular < int(S::*)() const volatile& noexcept >, "");
 static_assert(regular<int (S::*)() const volatile&&>, "");
 static_assert(regular < int(S::*)() const volatile&& noexcept >, "");
 
@@ -69,25 +69,25 @@ static_assert(regular<int U::*>, "");
 static_assert(regular<int (U::*)()>, "");
 static_assert(regular<int (U::*)() noexcept>, "");
 static_assert(regular<int (U::*)() &>, "");
-static_assert(regular<int (U::*)() & noexcept>, "");
+static_assert(regular < int(U::*)() & noexcept >, "");
 static_assert(regular<int (U::*)() &&>, "");
 static_assert(regular < int(U::*)() && noexcept >, "");
 static_assert(regular<int (U::*)() const>, "");
 static_assert(regular<int (U::*)() const noexcept>, "");
 static_assert(regular<int (U::*)() const&>, "");
-static_assert(regular<int (U::*)() const & noexcept>, "");
+static_assert(regular < int(U::*)() const& noexcept >, "");
 static_assert(regular<int (U::*)() const&&>, "");
 static_assert(regular < int(U::*)() const&& noexcept >, "");
 static_assert(regular<int (U::*)() volatile>, "");
 static_assert(regular<int (U::*)() volatile noexcept>, "");
 static_assert(regular<int (U::*)() volatile&>, "");
-static_assert(regular<int (U::*)() volatile & noexcept>, "");
+static_assert(regular < int(U::*)() volatile & noexcept >, "");
 static_assert(regular<int (U::*)() volatile&&>, "");
 static_assert(regular < int(U::*)() volatile && noexcept >, "");
 static_assert(regular<int (U::*)() const volatile>, "");
 static_assert(regular<int (U::*)() const volatile noexcept>, "");
 static_assert(regular<int (U::*)() const volatile&>, "");
-static_assert(regular<int (U::*)() const volatile & noexcept>, "");
+static_assert(regular < int(U::*)() const volatile& noexcept >, "");
 static_assert(regular<int (U::*)() const volatile&&>, "");
 static_assert(regular < int(U::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp
index 44e7b55b803..b2087fd4a75 100644
--- a/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/concepts/concepts.object/semiregular.compile.pass.cpp
@@ -36,25 +36,25 @@ static_assert(semiregular<int S::*>, "");
 static_assert(semiregular<int (S::*)()>, "");
 static_assert(semiregular<int (S::*)() noexcept>, "");
 static_assert(semiregular<int (S::*)() &>, "");
-static_assert(semiregular<int (S::*)() & noexcept>, "");
+static_assert(semiregular < int(S::*)() & noexcept >, "");
 static_assert(semiregular<int (S::*)() &&>, "");
 static_assert(semiregular < int(S::*)() && noexcept >, "");
 static_assert(semiregular<int (S::*)() const>, "");
 static_assert(semiregular<int (S::*)() const noexcept>, "");
 static_assert(semiregular<int (S::*)() const&>, "");
-static_assert(semiregular<int (S::*)() const & noexcept>, "");
+static_assert(semiregular < int(S::*)() const& noexcept >, "");
 static_assert(semiregular<int (S::*)() const&&>, "");
 static_assert(semiregular < int(S::*)() const&& noexcept >, "");
 static_assert(semiregular<int (S::*)() volatile>, "");
 static_assert(semiregular<int (S::*)() volatile noexcept>, "");
 static_assert(semiregular<int (S::*)() volatile&>, "");
-static_assert(semiregular<int (S::*)() volatile & noexcept>, "");
+static_assert(semiregular < int(S::*)() volatile & noexcept >, "");
 static_assert(semiregular<int (S::*)() volatile&&>, "");
 static_assert(semiregular < int(S::*)() volatile && noexcept >, "");
 static_assert(semiregular<int (S::*)() const volatile>, "");
 static_assert(semiregular<int (S::*)() const volatile noexcept>, "");
 static_assert(semiregular<int (S::*)() const volatile&>, "");
-static_assert(semiregular<int (S::*)() const volatile & noexcept>, "");
+static_assert(semiregular < int(S::*)() const volatile& noexcept >, "");
 static_assert(semiregular<int (S::*)() const volatile&&>, "");
 static_assert(semiregular < int(S::*)() const volatile&& noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp b/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp
index eb4ffd3998d..e5b6aaf298e 100644
--- a/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/containers/sequences/array/array.creation/to_array.pass.cpp
@@ -94,7 +94,7 @@ __host__ __device__ constexpr bool tests()
 #if defined(TEST_COMPILER_NVRTC) && defined(TEST_COMPILER_MSVC)
   // Test C99 compound literal.
   {
-    auto arr = cuda::std::to_array((int[]){3, 4});
+    auto arr = cuda::std::to_array((int[]) {3, 4});
     ASSERT_SAME_TYPE(decltype(arr), cuda::std::array<int, 2>);
     assert(arr[0] == 3);
     assert(arr[1] == 4);
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp
index 6afc7f82fe8..b913eaac535 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.assoc.types/incrementable.traits/incrementable_traits.compile.pass.cpp
@@ -158,12 +158,12 @@ static_assert(!check_has_difference_type<int (*)() noexcept>, "");
 static_assert(!check_has_difference_type<int (&)()>, "");
 static_assert(!check_has_difference_type<int (&)() noexcept>, "");
 
-#define TEST_POINTER_TO_MEMBER_FUNCTION(type, cv)                               \
-  static_assert(!check_has_difference_type<int (type::*)() cv>, "");            \
-  static_assert(!check_has_difference_type<int (type::*)() cv noexcept>, "");   \
-  static_assert(!check_has_difference_type<int (type::*)() cv&>, "");           \
-  static_assert(!check_has_difference_type<int (type::*)() cv & noexcept>, ""); \
-  static_assert(!check_has_difference_type<int (type::*)() cv&&>, "");          \
+#define TEST_POINTER_TO_MEMBER_FUNCTION(type, cv)                                 \
+  static_assert(!check_has_difference_type<int (type::*)() cv>, "");              \
+  static_assert(!check_has_difference_type<int (type::*)() cv noexcept>, "");     \
+  static_assert(!check_has_difference_type<int (type::*)() cv&>, "");             \
+  static_assert(!check_has_difference_type < int(type::*)() cv & noexcept >, ""); \
+  static_assert(!check_has_difference_type<int (type::*)() cv&&>, "");            \
   static_assert(!check_has_difference_type < int(type::*)() cv && noexcept >, "");
 
 struct empty
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp
index 7331af44c38..b90e5b74218 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.readable/indirectly_readable.compile.pass.cpp
@@ -221,19 +221,19 @@ static_assert(!cuda::std::indirectly_readable<int S::*>, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)()>, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() noexcept>, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() &>, "");
-static_assert(!cuda::std::indirectly_readable<int (S::*)() & noexcept>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() & noexcept >, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() &&>, "");
 static_assert(!cuda::std::indirectly_readable < int(S::*)() && noexcept >, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() const>, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() const noexcept>, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() const&>, "");
-static_assert(!cuda::std::indirectly_readable<int (S::*)() const & noexcept>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() const& noexcept >, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() const&&>, "");
 static_assert(!cuda::std::indirectly_readable < int(S::*)() const&& noexcept >, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile>, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile noexcept>, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile&>, "");
-static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile & noexcept>, "");
+static_assert(!cuda::std::indirectly_readable < int(S::*)() volatile & noexcept >, "");
 static_assert(!cuda::std::indirectly_readable<int (S::*)() volatile&&>, "");
 static_assert(!cuda::std::indirectly_readable < int(S::*)() volatile && noexcept >, "");
 
diff --git a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp
index 333a27abcd1..0087bb825dd 100644
--- a/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/iterators/iterator.requirements/iterator.concepts/iterator.concept.winc/weakly_incrementable.compile.pass.cpp
@@ -36,12 +36,12 @@ struct S
 {};
 static_assert(!cuda::std::weakly_incrementable<int S::*>, "");
 
-#define CHECK_POINTER_TO_MEMBER_FUNCTIONS(qualifier)                                      \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier>, "");            \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier noexcept>, "");   \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&>, "");           \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier & noexcept>, ""); \
-  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&&>, "");          \
+#define CHECK_POINTER_TO_MEMBER_FUNCTIONS(qualifier)                                        \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier>, "");              \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier noexcept>, "");     \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&>, "");             \
+  static_assert(!cuda::std::weakly_incrementable < int(S::*)() qualifier & noexcept >, ""); \
+  static_assert(!cuda::std::weakly_incrementable<int (S::*)() qualifier&&>, "");            \
   static_assert(!cuda::std::weakly_incrementable < int(S::*)() qualifier && noexcept >, "");
 
 #define NO_QUALIFIER
diff --git a/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp b/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp
index f956bbe8119..59abb795d1b 100644
--- a/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/thread/thread.barrier/completion.pass.cpp
@@ -23,8 +23,7 @@
 #include "test_macros.h"
 
 template <template <typename> class Barrier,
-          template <typename, typename>
-          class Selector,
+          template <typename, typename> class Selector,
           typename Initializer = constructor_initializer>
 __host__ __device__ void test()
 {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp
index dd0689a868e..caacd9f073b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.bind_front/bind_front.pass.cpp
@@ -49,8 +49,8 @@ template <class... Args>
 struct is_bind_frontable
 {
   template <class... LocalArgs>
-  __host__ __device__ static auto
-  test(int) -> decltype((void) cuda::std::bind_front(cuda::std::declval<LocalArgs>()...), cuda::std::true_type());
+  __host__ __device__ static auto test(int)
+    -> decltype((void) cuda::std::bind_front(cuda::std::declval<LocalArgs>()...), cuda::std::true_type());
 
   template <class...>
   __host__ __device__ static cuda::std::false_type test(...);
diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp
index 3585e6dce99..b35ce02fee3 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/func.invoke/invoke.pass.cpp
@@ -122,7 +122,7 @@ template <class Signature, class Expect, class Functor>
 __host__ __device__ void test_b12(Functor&& f)
 {
   // Create the callable object.
-  typedef Signature TestClass::*ClassFunc;
+  typedef Signature TestClass::* ClassFunc;
   ClassFunc func_ptr = &TestClass::operator();
 
   // Create the dummy arg.
@@ -145,7 +145,7 @@ template <class Expect, class Functor>
 __host__ __device__ void test_b34(Functor&& f)
 {
   // Create the callable object.
-  typedef int TestClass::*ClassFunc;
+  typedef int TestClass::* ClassFunc;
   ClassFunc func_ptr = &TestClass::data;
 
   // Check that the deduced return type of invoke is what is expected.
diff --git a/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp b/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp
index faa8bde67e7..2cd67ca6247 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/function.objects/refwrap/refwrap.invoke/invoke.compile.fail.cpp
@@ -35,7 +35,7 @@ __host__ __device__ void test_int_1()
 {
   // member data pointer
   {
-    int A_int_1::*fp = &A_int_1::data_;
+    int A_int_1::* fp = &A_int_1::data_;
     cuda::std::reference_wrapper<int A_int_1::*> r1(fp);
     A_int_1 a;
     assert(r1(a) == 5);
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp
index 96988b28bc0..8a187fa4297 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/common_type.pass.cpp
@@ -88,7 +88,8 @@ template <class Tp>
 using always_bool = typename always_bool_imp<Tp>::type;
 
 template <class... Args>
-__host__ __device__ constexpr auto no_common_type_imp(int) -> always_bool<typename cuda::std::common_type<Args...>::type>
+__host__ __device__ constexpr auto no_common_type_imp(int)
+  -> always_bool<typename cuda::std::common_type<Args...>::type>
 {
   return false;
 }
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp
index 2f7eb636ae4..445927790d3 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of.pass.cpp
@@ -404,7 +404,7 @@ int main(int, char**)
     test_result_of<PMS3CV(S&, int, long), const int&>();
   }
   { // pointer to member data
-    typedef char S::*PMD;
+    typedef char S::* PMD;
     test_result_of<PMD(S&), char&>();
     test_result_of<PMD(S*), char&>();
     test_result_of<PMD(S* const), char&>();
diff --git a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp
index dac77e6d9b6..a34dbeca162 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/meta/meta.trans/meta.trans.other/result_of11.pass.cpp
@@ -77,7 +77,7 @@ __host__ __device__ void test_result_of_imp()
 int main(int, char**)
 {
   {
-    typedef char F::*PMD;
+    typedef char F::* PMD;
     test_result_of_imp<PMD(F&), char&>();
     test_result_of_imp<PMD(F const&), char const&>();
     test_result_of_imp<PMD(F volatile&), char volatile&>();
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
index 25a0d2aff05..04da8fea7e2 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ullong.pass.cpp
@@ -20,7 +20,7 @@ template <cuda::std::size_t N>
 __host__ __device__ TEST_CONSTEXPR_CXX14 void test_to_ullong()
 {
   const cuda::std::size_t M = sizeof(unsigned long long) * CHAR_BIT < N ? sizeof(unsigned long long) * CHAR_BIT : N;
-  const bool is_M_zero      = cuda::std::integral_constant<bool, M == 0>::value; // avoid compiler warnings
+  const bool is_M_zero      = cuda::std::integral_constant < bool, M == 0 > ::value; // avoid compiler warnings
   const cuda::std::size_t X =
     is_M_zero ? sizeof(unsigned long long) * CHAR_BIT - 1 : sizeof(unsigned long long) * CHAR_BIT - M;
   const unsigned long long max = is_M_zero ? 0 : (unsigned long long) (-1) >> X;
diff --git a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
index 61953443dfd..c44a923622c 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/template.bitset/bitset.members/to_ulong.pass.cpp
@@ -21,7 +21,7 @@ template <cuda::std::size_t N>
 __host__ __device__ TEST_CONSTEXPR_CXX14 void test_to_ulong()
 {
   const cuda::std::size_t M   = sizeof(unsigned long) * CHAR_BIT < N ? sizeof(unsigned long) * CHAR_BIT : N;
-  const bool is_M_zero        = cuda::std::integral_constant<bool, M == 0>::value; // avoid compiler warnings
+  const bool is_M_zero        = cuda::std::integral_constant < bool, M == 0 > ::value; // avoid compiler warnings
   const cuda::std::size_t X   = is_M_zero ? sizeof(unsigned long) * CHAR_BIT - 1 : sizeof(unsigned long) * CHAR_BIT - M;
   const cuda::std::size_t max = is_M_zero ? 0 : cuda::std::size_t(cuda::std::numeric_limits<unsigned long>::max()) >> X;
   cuda::std::size_t tests[]   = {
diff --git a/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
index 2a72004f266..31e4d56c3f8 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/tuple/tuple.tuple/tuple.apply/apply_extended_types.pass.cpp
@@ -137,7 +137,7 @@ __host__ __device__ void test_ext_int_0()
   typedef int (T::*mem2_t)() const;
   mem2_t mem2 = &T::mem2;
 
-  typedef int const T::*obj1_t;
+  typedef int const T::* obj1_t;
   obj1_t obj1 = &T::obj1;
 
   // member function w/ref
diff --git a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp
index a27ed8d89fd..fbba514ca6b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap.pass.cpp
@@ -75,8 +75,8 @@ struct NotMoveAssignable
 };
 
 template <class Tp>
-__host__ __device__ auto
-can_swap_test(int) -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
+__host__ __device__ auto can_swap_test(int)
+  -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
 
 template <class Tp>
 __host__ __device__ auto can_swap_test(...) -> cuda::std::false_type;
diff --git a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp
index ba4cbef2595..4cc02b7405b 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/utility/utility.swap/swap_array.pass.cpp
@@ -59,8 +59,8 @@ struct NotMoveConstructible
 };
 
 template <class Tp>
-__host__ __device__ auto
-can_swap_test(int) -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
+__host__ __device__ auto can_swap_test(int)
+  -> decltype(cuda::std::swap(cuda::std::declval<Tp>(), cuda::std::declval<Tp>()));
 
 template <class Tp>
 __host__ __device__ auto can_swap_test(...) -> cuda::std::false_type;
diff --git a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
index 84f234ea4ee..69da5a87905 100644
--- a/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
+++ b/libcudacxx/test/libcudacxx/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
@@ -472,8 +472,8 @@ void test_exceptions_different_alternatives()
 #endif // !TEST_HAS_NO_EXCEPTIONS
 
 template <class Var>
-__host__ __device__ constexpr auto
-has_swap_member_imp(int) -> decltype(cuda::std::declval<Var&>().swap(cuda::std::declval<Var&>()), true)
+__host__ __device__ constexpr auto has_swap_member_imp(int)
+  -> decltype(cuda::std::declval<Var&>().swap(cuda::std::declval<Var&>()), true)
 {
   return true;
 }
diff --git a/libcudacxx/test/support/archetypes.h b/libcudacxx/test/support/archetypes.h
index 1621e2d07e9..07f18944854 100644
--- a/libcudacxx/test/support/archetypes.h
+++ b/libcudacxx/test/support/archetypes.h
@@ -343,7 +343,8 @@ namespace NonThrowingTypes
 namespace NonTrivialTypes
 {
 #define DEFINE_CTOR \
-  {}
+  {                 \
+  }
 #define DEFINE_CTOR_ANNOTATIONS __host__ __device__
 #define DEFINE_ASSIGN \
   {                   \
diff --git a/libcudacxx/test/support/charconv_test_helpers.h b/libcudacxx/test/support/charconv_test_helpers.h
index 09e50e90efe..c334ebfab54 100644
--- a/libcudacxx/test/support/charconv_test_helpers.h
+++ b/libcudacxx/test/support/charconv_test_helpers.h
@@ -128,8 +128,8 @@ struct to_chars_test_base
     return r;
   }
 
-  static auto
-  fromchars(char const* p, char const* ep, int base = 10) -> decltype(fromchars(p, ep, base, std::is_signed<X>()))
+  static auto fromchars(char const* p, char const* ep, int base = 10)
+    -> decltype(fromchars(p, ep, base, std::is_signed<X>()))
   {
     return fromchars(p, ep, base, std::is_signed<X>());
   }
diff --git a/libcudacxx/test/support/concurrent_agents.h b/libcudacxx/test/support/concurrent_agents.h
index 0e17de0b2c5..6b57b3531a0 100644
--- a/libcudacxx/test/support/concurrent_agents.h
+++ b/libcudacxx/test/support/concurrent_agents.h
@@ -50,8 +50,7 @@ __host__ __device__ void concurrent_agents_launch(Fs... fs)
      __syncthreads();),
     (std::thread threads[]{std::thread{std::forward<Fs>(fs)}...};
 
-     for (auto&& thread
-          : threads) { thread.join(); }))
+     for (auto&& thread : threads) { thread.join(); }))
 }
 
 #endif // _CONCURRENT_AGENTS_H
diff --git a/libcudacxx/test/support/counting_predicates.h b/libcudacxx/test/support/counting_predicates.h
index 85bce9875c6..227466e139d 100644
--- a/libcudacxx/test/support/counting_predicates.h
+++ b/libcudacxx/test/support/counting_predicates.h
@@ -91,16 +91,16 @@ class counting_predicate
   {}
 
   template <class... Args>
-  __host__ __device__ TEST_CONSTEXPR_CXX14 auto
-  operator()(Args&&... args) -> decltype(pred_(cuda::std::forward<Args>(args)...))
+  __host__ __device__ TEST_CONSTEXPR_CXX14 auto operator()(Args&&... args)
+    -> decltype(pred_(cuda::std::forward<Args>(args)...))
   {
     ++(*count_);
     return pred_(cuda::std::forward<Args>(args)...);
   }
 
   template <class... Args>
-  __host__ __device__ TEST_CONSTEXPR_CXX14 auto
-  operator()(Args&&... args) const -> decltype(pred_(cuda::std::forward<Args>(args)...))
+  __host__ __device__ TEST_CONSTEXPR_CXX14 auto operator()(Args&&... args) const
+    -> decltype(pred_(cuda::std::forward<Args>(args)...))
   {
     ++(*count_);
     return pred_(cuda::std::forward<Args>(args)...);
diff --git a/libcudacxx/test/support/cuda_space_selector.h b/libcudacxx/test/support/cuda_space_selector.h
index 2217699fbee..c848ff95e26 100644
--- a/libcudacxx/test/support/cuda_space_selector.h
+++ b/libcudacxx/test/support/cuda_space_selector.h
@@ -129,8 +129,7 @@ struct default_initializer
 };
 
 template <typename T,
-          template <typename, cuda::std::size_t>
-          class Provider,
+          template <typename, cuda::std::size_t> class Provider,
           typename Initializer           = constructor_initializer,
           cuda::std::size_t SharedOffset = 0>
 class memory_selector
diff --git a/libcudacxx/test/support/is_transparent.h b/libcudacxx/test/support/is_transparent.h
index 35fa5ed1af7..39241874449 100644
--- a/libcudacxx/test/support/is_transparent.h
+++ b/libcudacxx/test/support/is_transparent.h
@@ -17,8 +17,8 @@
 struct transparent_less
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -28,8 +28,8 @@ struct transparent_less
 struct transparent_less_not_referenceable
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -39,8 +39,8 @@ struct transparent_less_not_referenceable
 struct transparent_less_no_type
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -52,8 +52,8 @@ struct transparent_less_no_type
 struct transparent_less_private
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
@@ -65,8 +65,8 @@ struct transparent_less_private
 struct transparent_less_not_a_type
 {
   template <class T, class U>
-  constexpr auto operator()(T&& t, U&& u) const
-    noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u))) -> decltype(std::forward<T>(t) < std::forward<U>(u))
+  constexpr auto operator()(T&& t, U&& u) const noexcept(noexcept(std::forward<T>(t) < std::forward<U>(u)))
+    -> decltype(std::forward<T>(t) < std::forward<U>(u))
   {
     return std::forward<T>(t) < std::forward<U>(u);
   }
diff --git a/libcudacxx/test/support/rapid-cxx-test.h b/libcudacxx/test/support/rapid-cxx-test.h
index 6429df670dc..792bb6451db 100644
--- a/libcudacxx/test/support/rapid-cxx-test.h
+++ b/libcudacxx/test/support/rapid-cxx-test.h
@@ -444,87 +444,87 @@
 //
 ////////////////////////////////////////////////////////////////////////////////
 
-#define TEST_WARN_EQUAL_COLLECTIONS(...)                                         \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_WARN_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                           \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::warn;                           \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
+#define TEST_WARN_EQUAL_COLLECTIONS(...)                                        \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_WARN_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                          \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::warn;                          \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
   } while (false)
 #
 
-#define TEST_CHECK_EQUAL_COLLECTIONS(...)                                        \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_CHECK_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                          \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::check;                          \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
+#define TEST_CHECK_EQUAL_COLLECTIONS(...)                                       \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_CHECK_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                         \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::check;                         \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
   } while (false)
 #
 
-#define TEST_REQUIRE_EQUAL_COLLECTIONS(...)                                      \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_REQUIRE_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                        \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::require;                        \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
-    if (m_f.type != ::rapid_cxx_test::failure_type::none)                        \
-    {                                                                            \
-      return;                                                                    \
-    }                                                                            \
+#define TEST_REQUIRE_EQUAL_COLLECTIONS(...)                                     \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_REQUIRE_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                       \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::require;                       \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
+    if (m_f.type != ::rapid_cxx_test::failure_type::none)                       \
+    {                                                                           \
+      return;                                                                   \
+    }                                                                           \
   } while (false)
 #
 
-#define TEST_ASSERT_EQUAL_COLLECTIONS(...)                                       \
-  do                                                                             \
-  {                                                                              \
-    TEST_SET_CHECKPOINT();                                                       \
-    ::rapid_cxx_test::test_outcome m_f(                                          \
-      ::rapid_cxx_test::failure_type::none,                                      \
-      __FILE__,                                                                  \
-      TEST_FUNC_NAME(),                                                          \
-      __LINE__,                                                                  \
-      "TEST_ASSERT_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                         \
-      "");                                                                       \
-    if (not ::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
-    {                                                                            \
-      m_f.type = ::rapid_cxx_test::failure_type::assert;                         \
-    }                                                                            \
-    ::rapid_cxx_test::get_reporter().report(m_f);                                \
-    if (m_f.type != ::rapid_cxx_test::failure_type::none)                        \
-    {                                                                            \
-      ::std::abort();                                                            \
-    }                                                                            \
+#define TEST_ASSERT_EQUAL_COLLECTIONS(...)                                      \
+  do                                                                            \
+  {                                                                             \
+    TEST_SET_CHECKPOINT();                                                      \
+    ::rapid_cxx_test::test_outcome m_f(                                         \
+      ::rapid_cxx_test::failure_type::none,                                     \
+      __FILE__,                                                                 \
+      TEST_FUNC_NAME(),                                                         \
+      __LINE__,                                                                 \
+      "TEST_ASSERT_EQUAL_COLLECTIONS(" #__VA_ARGS__ ")",                        \
+      "");                                                                      \
+    if (not::rapid_cxx_test::detail::check_equal_collections_impl(__VA_ARGS__)) \
+    {                                                                           \
+      m_f.type = ::rapid_cxx_test::failure_type::assert;                        \
+    }                                                                           \
+    ::rapid_cxx_test::get_reporter().report(m_f);                               \
+    if (m_f.type != ::rapid_cxx_test::failure_type::none)                       \
+    {                                                                           \
+      ::std::abort();                                                           \
+    }                                                                           \
   } while (false)
 #
 
diff --git a/libcudacxx/test/support/test_convertible.h b/libcudacxx/test/support/test_convertible.h
index 843a4e36849..5de6472f652 100644
--- a/libcudacxx/test/support/test_convertible.h
+++ b/libcudacxx/test/support/test_convertible.h
@@ -24,8 +24,8 @@ template <class Tp>
 __host__ __device__ void eat_type(Tp);
 
 template <class Tp, class... Args>
-__host__ __device__ constexpr auto
-test_convertible_imp(int) -> decltype(eat_type<Tp>({cuda::std::declval<Args>()...}), true)
+__host__ __device__ constexpr auto test_convertible_imp(int)
+  -> decltype(eat_type<Tp>({cuda::std::declval<Args>()...}), true)
 {
   return true;
 }
diff --git a/thrust/testing/async_transform.cu b/thrust/testing/async_transform.cu
index 0f94f9d88f3..bfb30006ff2 100644
--- a/thrust/testing/async_transform.cu
+++ b/thrust/testing/async_transform.cu
@@ -128,10 +128,8 @@ DEFINE_SYNC_TRANSFORM_UNARY_INVOKER(
 ///////////////////////////////////////////////////////////////////////////////
 
 template <template <typename> class AsyncTransformUnaryInvoker,
-          template <typename>
-          class SyncTransformUnaryInvoker,
-          template <typename>
-          class UnaryOperation>
+          template <typename> class SyncTransformUnaryInvoker,
+          template <typename> class UnaryOperation>
 struct test_async_transform_unary
 {
   template <typename T>
@@ -222,10 +220,8 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
 ///////////////////////////////////////////////////////////////////////////////
 
 template <template <typename> class AsyncTransformUnaryInvoker,
-          template <typename>
-          class SyncTransformUnaryInvoker,
-          template <typename>
-          class UnaryOperation>
+          template <typename> class SyncTransformUnaryInvoker,
+          template <typename> class UnaryOperation>
 struct test_async_transform_unary_inplace
 {
   template <typename T>
@@ -305,10 +301,8 @@ DECLARE_GENERIC_SIZED_UNITTEST_WITH_TYPES_AND_NAME(
 ///////////////////////////////////////////////////////////////////////////////
 
 template <template <typename> class AsyncTransformUnaryInvoker,
-          template <typename>
-          class SyncTransformUnaryInvoker,
-          template <typename>
-          class UnaryOperation>
+          template <typename> class SyncTransformUnaryInvoker,
+          template <typename> class UnaryOperation>
 struct test_async_transform_unary_counting_iterator
 {
   template <typename T>
diff --git a/thrust/testing/cuda/transform.cu b/thrust/testing/cuda/transform.cu
index 2e474ccfb5a..594194cd183 100644
--- a/thrust/testing/cuda/transform.cu
+++ b/thrust/testing/cuda/transform.cu
@@ -347,8 +347,8 @@ DECLARE_UNITTEST(TestTransformBinaryCudaStreams);
 
 struct sum_five
 {
-  _CCCL_HOST_DEVICE auto
-  operator()(std::int8_t a, std::int16_t b, std::int32_t c, std::int64_t d, float e) const -> double
+  _CCCL_HOST_DEVICE auto operator()(std::int8_t a, std::int16_t b, std::int32_t c, std::int64_t d, float e) const
+    -> double
   {
     return a + b + c + d + e;
   }
@@ -373,8 +373,8 @@ public:
   }
 
   template <typename Tuple>
-  _CCCL_HOST_DEVICE auto
-  operator()(Tuple&& t) const -> decltype(detail::zip_detail::apply(std::declval<sum_five>(), THRUST_FWD(t)))
+  _CCCL_HOST_DEVICE auto operator()(Tuple&& t) const
+    -> decltype(detail::zip_detail::apply(std::declval<sum_five>(), THRUST_FWD(t)))
   {
     // not calling func, so we would get a wrong result if we were called
     return {};
diff --git a/thrust/testing/unittest/testframework.h b/thrust/testing/unittest/testframework.h
index f3f9c942b35..cd5799e0ca0 100644
--- a/thrust/testing/unittest/testframework.h
+++ b/thrust/testing/unittest/testframework.h
@@ -571,10 +571,8 @@ class VariableUnitTest : public UnitTest
 
 template <template <typename> class TestName,
           typename TypeList,
-          template <typename, typename>
-          class Vector,
-          template <typename>
-          class Alloc>
+          template <typename, typename> class Vector,
+          template <typename> class Alloc>
 struct VectorUnitTest : public UnitTest
 {
   VectorUnitTest()
diff --git a/thrust/thrust/detail/functional/actor.h b/thrust/thrust/detail/functional/actor.h
index 2de51b62285..c727be09306 100644
--- a/thrust/thrust/detail/functional/actor.h
+++ b/thrust/thrust/detail/functional/actor.h
@@ -82,8 +82,8 @@ template <unsigned int Pos>
 struct argument
 {
   template <typename... Ts>
-  _CCCL_HOST_DEVICE auto
-  eval(Ts&&... args) const -> decltype(thrust::get<Pos>(thrust::tuple<Ts&&...>{THRUST_FWD(args)...}))
+  _CCCL_HOST_DEVICE auto eval(Ts&&... args) const
+    -> decltype(thrust::get<Pos>(thrust::tuple<Ts&&...>{THRUST_FWD(args)...}))
   {
     return thrust::get<Pos>(thrust::tuple<Ts&&...>{THRUST_FWD(args)...});
   }
diff --git a/thrust/thrust/detail/functional/operators.h b/thrust/thrust/detail/functional/operators.h
index f4f89f5f7d7..57c52b92520 100644
--- a/thrust/thrust/detail/functional/operators.h
+++ b/thrust/thrust/detail/functional/operators.h
@@ -263,8 +263,8 @@ struct unary_plus
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(+THRUST_FWD(t1))) -> decltype(+THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(+THRUST_FWD(t1)))
+    -> decltype(+THRUST_FWD(t1))
   {
     return +THRUST_FWD(t1);
   }
@@ -277,8 +277,8 @@ struct prefix_increment
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(++THRUST_FWD(t1))) -> decltype(++THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(++THRUST_FWD(t1)))
+    -> decltype(++THRUST_FWD(t1))
   {
     return ++THRUST_FWD(t1);
   }
@@ -291,8 +291,8 @@ struct postfix_increment
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(THRUST_FWD(t1)++)) -> decltype(THRUST_FWD(t1)++)
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(THRUST_FWD(t1)++))
+    -> decltype(THRUST_FWD(t1)++)
   {
     return THRUST_FWD(t1)++;
   }
@@ -305,8 +305,8 @@ struct prefix_decrement
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(--THRUST_FWD(t1))) -> decltype(--THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(--THRUST_FWD(t1)))
+    -> decltype(--THRUST_FWD(t1))
   {
     return --THRUST_FWD(t1);
   }
@@ -319,8 +319,8 @@ struct postfix_decrement
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(THRUST_FWD(t1)--)) -> decltype(THRUST_FWD(t1)--)
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(THRUST_FWD(t1)--))
+    -> decltype(THRUST_FWD(t1)--)
   {
     return THRUST_FWD(t1)--;
   }
@@ -333,8 +333,8 @@ struct bit_not
 
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const
-    noexcept(noexcept(~THRUST_FWD(t1))) -> decltype(~THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1) const noexcept(noexcept(~THRUST_FWD(t1)))
+    -> decltype(~THRUST_FWD(t1))
   {
     return ~THRUST_FWD(t1);
   }
diff --git a/thrust/thrust/detail/tuple_transform.h b/thrust/thrust/detail/tuple_transform.h
index 49ccdf11cdd..c7cdfcd5b2e 100644
--- a/thrust/thrust/detail/tuple_transform.h
+++ b/thrust/thrust/detail/tuple_transform.h
@@ -35,8 +35,7 @@ namespace detail
 {
 
 template <typename Tuple,
-          template <typename>
-          class UnaryMetaFunction,
+          template <typename> class UnaryMetaFunction,
           typename UnaryFunction,
           typename IndexSequence = thrust::make_index_sequence<thrust::tuple_size<Tuple>::value>>
 struct tuple_transform_functor;
diff --git a/thrust/thrust/detail/type_traits/pointer_traits.h b/thrust/thrust/detail/type_traits/pointer_traits.h
index 9ac54ee7860..7570aba2242 100644
--- a/thrust/thrust/detail/type_traits/pointer_traits.h
+++ b/thrust/thrust/detail/type_traits/pointer_traits.h
@@ -84,8 +84,7 @@ struct rebind_pointer<Ptr<OldT, Tail...>, T>
 template <template <typename, typename, typename, typename...> class Ptr,
           typename OldT,
           typename Tag,
-          template <typename...>
-          class Ref,
+          template <typename...> class Ref,
           typename... RefTail,
           typename... PtrTail,
           typename T>
@@ -100,11 +99,9 @@ struct rebind_pointer<Ptr<OldT, Tag, Ref<OldT, RefTail...>, PtrTail...>, T>
 template <template <typename, typename, typename, typename...> class Ptr,
           typename OldT,
           typename Tag,
-          template <typename...>
-          class Ref,
+          template <typename...> class Ref,
           typename... RefTail,
-          template <typename...>
-          class DerivedPtr,
+          template <typename...> class DerivedPtr,
           typename... DerivedPtrTail,
           typename T>
 struct rebind_pointer<Ptr<OldT, Tag, Ref<OldT, RefTail...>, DerivedPtr<OldT, DerivedPtrTail...>>, T>
@@ -130,8 +127,7 @@ struct rebind_pointer<Ptr<OldT, Tag, typename std::add_lvalue_reference<OldT>::t
 template <template <typename, typename, typename, typename...> class Ptr,
           typename OldT,
           typename Tag,
-          template <typename...>
-          class DerivedPtr,
+          template <typename...> class DerivedPtr,
           typename... DerivedPtrTail,
           typename T>
 struct rebind_pointer<Ptr<OldT, Tag, typename std::add_lvalue_reference<OldT>::type, DerivedPtr<OldT, DerivedPtrTail...>>,
diff --git a/thrust/thrust/functional.h b/thrust/thrust/functional.h
index 16c464f702e..a72a3473840 100644
--- a/thrust/thrust/functional.h
+++ b/thrust/thrust/functional.h
@@ -884,8 +884,8 @@ struct project1st<void, void>
   using is_transparent = void;
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1, typename T2>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&&) const
-    noexcept(noexcept(THRUST_FWD(t1))) -> decltype(THRUST_FWD(t1))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&& t1, T2&&) const noexcept(noexcept(THRUST_FWD(t1)))
+    -> decltype(THRUST_FWD(t1))
   {
     return THRUST_FWD(t1);
   }
@@ -943,8 +943,8 @@ struct project2nd<void, void>
   using is_transparent = void;
   _CCCL_EXEC_CHECK_DISABLE
   template <typename T1, typename T2>
-  _CCCL_HOST_DEVICE constexpr auto operator()(T1&&, T2&& t2) const
-    noexcept(noexcept(THRUST_FWD(t2))) -> decltype(THRUST_FWD(t2))
+  _CCCL_HOST_DEVICE constexpr auto operator()(T1&&, T2&& t2) const noexcept(noexcept(THRUST_FWD(t2)))
+    -> decltype(THRUST_FWD(t2))
   {
     return THRUST_FWD(t2);
   }
@@ -967,15 +967,15 @@ struct not_fun_t
   F f;
 
   template <typename... Ts>
-  _CCCL_HOST_DEVICE auto
-  operator()(Ts&&... args) noexcept(noexcept(!f(std::forward<Ts>(args)...))) -> decltype(!f(std::forward<Ts>(args)...))
+  _CCCL_HOST_DEVICE auto operator()(Ts&&... args) noexcept(noexcept(!f(std::forward<Ts>(args)...)))
+    -> decltype(!f(std::forward<Ts>(args)...))
   {
     return !f(std::forward<Ts>(args)...);
   }
 
   template <typename... Ts>
-  _CCCL_HOST_DEVICE auto operator()(Ts&&... args) const
-    noexcept(noexcept(!f(std::forward<Ts>(args)...))) -> decltype(!f(std::forward<Ts>(args)...))
+  _CCCL_HOST_DEVICE auto operator()(Ts&&... args) const noexcept(noexcept(!f(std::forward<Ts>(args)...)))
+    -> decltype(!f(std::forward<Ts>(args)...))
   {
     return !f(std::forward<Ts>(args)...);
   }
diff --git a/thrust/thrust/optional.h b/thrust/thrust/optional.h
index 788f6597953..3cef29ae7ec 100644
--- a/thrust/thrust/optional.h
+++ b/thrust/thrust/optional.h
@@ -154,16 +154,18 @@ template <
 #  endif
   typename = enable_if_t<std::is_member_pointer<decay_t<Fn>>::value>,
   int      = 0>
-_CCCL_HOST_DEVICE constexpr auto invoke(Fn&& f, Args&&... args) noexcept(
-  noexcept(std::mem_fn(f)(std::forward<Args>(args)...))) -> decltype(std::mem_fn(f)(std::forward<Args>(args)...))
+_CCCL_HOST_DEVICE constexpr auto
+invoke(Fn&& f, Args&&... args) noexcept(noexcept(std::mem_fn(f)(std::forward<Args>(args)...)))
+  -> decltype(std::mem_fn(f)(std::forward<Args>(args)...))
 {
   return std::mem_fn(f)(std::forward<Args>(args)...);
 }
 
 _CCCL_EXEC_CHECK_DISABLE
 template <typename Fn, typename... Args, typename = enable_if_t<!std::is_member_pointer<decay_t<Fn>>::value>>
-_CCCL_HOST_DEVICE constexpr auto invoke(Fn&& f, Args&&... args) noexcept(noexcept(
-  std::forward<Fn>(f)(std::forward<Args>(args)...))) -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...))
+_CCCL_HOST_DEVICE constexpr auto
+invoke(Fn&& f, Args&&... args) noexcept(noexcept(std::forward<Fn>(f)(std::forward<Args>(args)...)))
+  -> decltype(std::forward<Fn>(f)(std::forward<Args>(args)...))
 {
   return std::forward<Fn>(f)(std::forward<Args>(args)...);
 }
diff --git a/thrust/thrust/system/cuda/detail/transform.h b/thrust/thrust/system/cuda/detail/transform.h
index 4cf879c3eaa..64154a8f6da 100644
--- a/thrust/thrust/system/cuda/detail/transform.h
+++ b/thrust/thrust/system/cuda/detail/transform.h
@@ -276,8 +276,8 @@ OutputIt THRUST_FUNCTION cub_transform_many(
 }
 
 template <typename... Ts, std::size_t... Is>
-THRUST_FUNCTION auto
-convert_to_std_tuple(tuple<Ts...> t, ::cuda::std::index_sequence<Is...>) -> ::cuda::std::tuple<Ts...>
+THRUST_FUNCTION auto convert_to_std_tuple(tuple<Ts...> t, ::cuda::std::index_sequence<Is...>)
+  -> ::cuda::std::tuple<Ts...>
 {
   return ::cuda::std::tuple<Ts...>{get<Is>(t)...};
 }
diff --git a/thrust/thrust/type_traits/is_contiguous_iterator.h b/thrust/thrust/type_traits/is_contiguous_iterator.h
index 7950422a4e0..95f6c503b15 100644
--- a/thrust/thrust/type_traits/is_contiguous_iterator.h
+++ b/thrust/thrust/type_traits/is_contiguous_iterator.h
@@ -229,8 +229,8 @@ using unwrap_contiguous_iterator_t = typename detail::contiguous_iterator_traits
 
 //! Converts a contiguous iterator to its underlying raw pointer.
 template <typename ContiguousIterator>
-_CCCL_HOST_DEVICE auto
-unwrap_contiguous_iterator(ContiguousIterator it) -> unwrap_contiguous_iterator_t<ContiguousIterator>
+_CCCL_HOST_DEVICE auto unwrap_contiguous_iterator(ContiguousIterator it)
+  -> unwrap_contiguous_iterator_t<ContiguousIterator>
 {
   static_assert(thrust::is_contiguous_iterator<ContiguousIterator>::value,
                 "unwrap_contiguous_iterator called with non-contiguous iterator.");