Skip to content

Commit

Permalink
scratch
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexMaclean committed Jan 13, 2025
1 parent 08028d6 commit c57002f
Show file tree
Hide file tree
Showing 8 changed files with 127 additions and 103 deletions.
10 changes: 9 additions & 1 deletion llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "llvm/IR/Value.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/NVPTXAddrSpace.h"
#include "llvm/Transforms/InstCombine/InstCombiner.h"
#include <optional>
using namespace llvm;
Expand Down Expand Up @@ -562,4 +563,11 @@ Value *NVPTXTTIImpl::rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
}
}
return nullptr;
}
}

unsigned NVPTXTTIImpl::getAssumedAddrSpace(const Value *V) const {
if (isa<AllocaInst>(V))
return ADDRESS_SPACE_LOCAL;

return -1;
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXTargetTransformInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,8 @@ class NVPTXTTIImpl : public BasicTTIImplBase<NVPTXTTIImpl> {

Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
Value *NewV) const;

unsigned getAssumedAddrSpace(const Value *V) const;
};

} // end namespace llvm
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/local-stack-frame.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
; Ensure we access the local stack properly

; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
; PTX32: cvta.local.u32 %SP, %SPL;
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX32: st.volatile.u32 [%SP], %r{{[0-9]+}};
; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0;
; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}};
; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
; PTX64: cvta.local.u64 %SP, %SPL;
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX64: st.volatile.u32 [%SP], %r{{[0-9]+}};
; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
define void @foo(i32 %a) {
%local = alloca i32, align 4
store volatile i32 %a, ptr %local
Expand Down
43 changes: 16 additions & 27 deletions llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -29,31 +29,32 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: .reg .pred %p<2>;
; PTX-NEXT: .reg .b16 %rs<3>;
; PTX-NEXT: .reg .b32 %r<11>;
; PTX-NEXT: .reg .b64 %rd<9>;
; PTX-NEXT: .reg .b64 %rd<10>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.u64 %SPL, __local_depot0;
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.u8 %rs1, [non_kernel_function_param_1];
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1;
; PTX-NEXT: ld.param.s32 %rd1, [non_kernel_function_param_2];
; PTX-NEXT: ld.param.u64 %rd2, [non_kernel_function_param_0+8];
; PTX-NEXT: st.u64 [%SP+8], %rd2;
; PTX-NEXT: ld.param.u64 %rd3, [non_kernel_function_param_0];
; PTX-NEXT: st.u64 [%SP], %rd3;
; PTX-NEXT: mov.u64 %rd4, gi;
; PTX-NEXT: cvta.global.u64 %rd5, %rd4;
; PTX-NEXT: add.u64 %rd6, %SP, 0;
; PTX-NEXT: selp.b64 %rd7, %rd6, %rd5, %p1;
; PTX-NEXT: add.s64 %rd8, %rd7, %rd1;
; PTX-NEXT: ld.u8 %r1, [%rd8];
; PTX-NEXT: ld.u8 %r2, [%rd8+1];
; PTX-NEXT: add.u64 %rd1, %SP, 0;
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
; PTX-NEXT: ld.param.s32 %rd3, [non_kernel_function_param_2];
; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8];
; PTX-NEXT: st.local.u64 [%rd2+8], %rd4;
; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0];
; PTX-NEXT: st.local.u64 [%rd2], %rd5;
; PTX-NEXT: mov.u64 %rd6, gi;
; PTX-NEXT: cvta.global.u64 %rd7, %rd6;
; PTX-NEXT: selp.b64 %rd8, %rd1, %rd7, %p1;
; PTX-NEXT: add.s64 %rd9, %rd8, %rd3;
; PTX-NEXT: ld.u8 %r1, [%rd9];
; PTX-NEXT: ld.u8 %r2, [%rd9+1];
; PTX-NEXT: shl.b32 %r3, %r2, 8;
; PTX-NEXT: or.b32 %r4, %r3, %r1;
; PTX-NEXT: ld.u8 %r5, [%rd8+2];
; PTX-NEXT: ld.u8 %r5, [%rd9+2];
; PTX-NEXT: shl.b32 %r6, %r5, 16;
; PTX-NEXT: ld.u8 %r7, [%rd8+3];
; PTX-NEXT: ld.u8 %r7, [%rd9+3];
; PTX-NEXT: shl.b32 %r8, %r7, 24;
; PTX-NEXT: or.b32 %r9, %r8, %r6;
; PTX-NEXT: or.b32 %r10, %r9, %r4;
Expand Down Expand Up @@ -90,7 +91,6 @@ define ptx_kernel void @grid_const_int(ptr byval(i32) align 4 %input1, i32 %inpu
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP]], [[INPUT2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT3]], align 4
; OPT-NEXT: ret void
;
%tmp = load i32, ptr %input1, align 4
%add = add i32 %tmp, %input2
store i32 %add, ptr %out
Expand Down Expand Up @@ -125,7 +125,6 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
; OPT-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
; OPT-NEXT: store i32 [[ADD]], ptr [[OUT5]], align 4
; OPT-NEXT: ret void
;
%gep1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%gep2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%int1 = load i32, ptr %gep1
Expand Down Expand Up @@ -166,7 +165,6 @@ define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; OPT-NEXT: [[INPUT_PARAM_GEN:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT_PARAM_GEN]])
; OPT-NEXT: ret void
;
%call = call i32 @escape(ptr %input)
ret void
}
Expand Down Expand Up @@ -224,7 +222,6 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; OPT-NEXT: store i32 [[A]], ptr [[A_ADDR]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape3(ptr [[INPUT_PARAM_GEN]], ptr [[A_ADDR]], ptr [[B_PARAM_GEN]])
; OPT-NEXT: ret void
;
%a.addr = alloca i32, align 4
store i32 %a, ptr %a.addr, align 4
%call = call i32 @escape3(ptr %input, ptr %a.addr, ptr %b)
Expand Down Expand Up @@ -252,7 +249,6 @@ define ptx_kernel void @grid_const_memory_escape(ptr byval(%struct.s) align 4 %i
; OPT-NEXT: [[INPUT1:%.*]] = call ptr @llvm.nvvm.ptr.param.to.gen.p0.p101(ptr addrspace(101) [[INPUT_PARAM]])
; OPT-NEXT: store ptr [[INPUT1]], ptr [[ADDR5]], align 8
; OPT-NEXT: ret void
;
store ptr %input, ptr %addr, align 8
ret void
}
Expand Down Expand Up @@ -286,7 +282,6 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
; OPT-NEXT: [[TMP2:%.*]] = call i64 asm "add.s64 $0, $1, $2
; OPT-NEXT: store i64 [[TMP2]], ptr [[RESULT5]], align 8
; OPT-NEXT: ret void
;
%tmpptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%tmpptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
%1 = call i64 asm "add.s64 $0, $1, $2;", "=l,l,l"(ptr %tmpptr1, ptr %tmpptr2) #1
Expand Down Expand Up @@ -335,7 +330,6 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; OPT-NEXT: store i32 [[TWICE]], ptr [[OUTPUT5]], align 4
; OPT-NEXT: [[CALL:%.*]] = call i32 @escape(ptr [[INPUT1_GEN]])
; OPT-NEXT: ret void
;
%val = load i32, ptr %input
%twice = add i32 %val, %val
store i32 %twice, ptr %output
Expand Down Expand Up @@ -389,7 +383,6 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; OPT-NEXT: [[ADD:%.*]] = add i32 [[VAL1]], [[VAL2]]
; OPT-NEXT: [[CALL2:%.*]] = call i32 @escape(ptr [[PTR1]])
; OPT-NEXT: ret i32 [[ADD]]
;
%ptr1 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 0
%val1 = load i32, ptr %ptr1
%ptr2 = getelementptr inbounds %struct.s, ptr %input, i32 0, i32 1
Expand Down Expand Up @@ -442,7 +435,6 @@ define ptx_kernel void @grid_const_phi(ptr byval(%struct.s) align 4 %input1, ptr
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
;

%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
Expand Down Expand Up @@ -508,7 +500,6 @@ define ptx_kernel void @grid_const_phi_ngc(ptr byval(%struct.s) align 4 %input1,
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
br i1 %less, label %first, label %second
Expand Down Expand Up @@ -562,7 +553,6 @@ define ptx_kernel void @grid_const_select(ptr byval(i32) align 4 %input1, ptr by
; OPT-NEXT: [[VALLOADED:%.*]] = load i32, ptr [[PTRNEW]], align 4
; OPT-NEXT: store i32 [[VALLOADED]], ptr [[INOUT2]], align 4
; OPT-NEXT: ret void
;
%val = load i32, ptr %inout
%less = icmp slt i32 %val, 0
%ptrnew = select i1 %less, ptr %input1, ptr %input2
Expand Down Expand Up @@ -594,7 +584,6 @@ define ptx_kernel i32 @grid_const_ptrtoint(ptr byval(i32) %input) {
; OPT-NEXT: [[PTRVAL:%.*]] = ptrtoint ptr [[INPUT1]] to i32
; OPT-NEXT: [[KEEPALIVE:%.*]] = add i32 [[INPUT3]], [[PTRVAL]]
; OPT-NEXT: ret i32 [[KEEPALIVE]]
;
%val = load i32, ptr %input
%ptrval = ptrtoint ptr %input to i32
%keepalive = add i32 %val, %ptrval
Expand Down
13 changes: 7 additions & 6 deletions llvm/test/CodeGen/NVPTX/lower-args.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,25 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
; PTX-NEXT: .local .align 8 .b8 __local_depot1[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b64 %rd<5>;
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.u64 %SPL, __local_depot1;
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.u64 %rd1, [load_padding_param_0];
; PTX-NEXT: st.u64 [%SP], %rd1;
; PTX-NEXT: add.u64 %rd2, %SP, 0;
; PTX-NEXT: add.u64 %rd1, %SP, 0;
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
; PTX-NEXT: ld.param.u64 %rd3, [load_padding_param_0];
; PTX-NEXT: st.local.u64 [%rd2], %rd3;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: st.param.b64 [param0], %rd1;
; PTX-NEXT: .param .b64 retval0;
; PTX-NEXT: call.uni (retval0),
; PTX-NEXT: escape,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: ld.param.b64 %rd3, [retval0];
; PTX-NEXT: ld.param.b64 %rd4, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
%tmp = call ptr @escape(ptr nonnull align 16 %arg)
Expand Down
104 changes: 53 additions & 51 deletions llvm/test/CodeGen/NVPTX/variadics-backend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,35 +148,34 @@ entry:
define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics2(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b16 %rs<4>;
; CHECK-PTX-NEXT: .reg .b32 %r<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3];
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd3+4];
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd3+7];
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1;
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd3+5];
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd3+6];
; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
; CHECK-PTX-NEXT: st.u16 [%SP], %rs5;
; CHECK-PTX-NEXT: ld.u64 %rd4, [%rd3+8];
; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 0;
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd5, %rd4, -8;
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd5];
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd5+4];
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd5+7];
; CHECK-PTX-NEXT: st.local.u8 [%rd3+2], %rs1;
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5+6];
; CHECK-PTX-NEXT: st.local.u8 [%rd3+1], %rs2;
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd5+5];
; CHECK-PTX-NEXT: st.local.u8 [%rd3], %rs3;
; CHECK-PTX-NEXT: ld.u64 %rd6, [%rd5+8];
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd5, %r5;
; CHECK-PTX-NEXT: add.s64 %rd6, %rd5, %rd4;
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd6;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd7, %r5;
; CHECK-PTX-NEXT: add.s64 %rd8, %rd7, %rd6;
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd8;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-PTX-NEXT: ret;
entry:
Expand Down Expand Up @@ -213,39 +212,39 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
; CHECK-PTX-NEXT: .reg .b16 %rs<8>;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<9>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
; CHECK-PTX-NEXT: mov.u64 %rd3, __const_$_bar_$_s1;
; CHECK-PTX-NEXT: add.s64 %rd4, %rd3, 7;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd4];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2;
; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3];
; CHECK-PTX-NEXT: st.local.u8 [%rd2+2], %rs2;
; CHECK-PTX-NEXT: add.s64 %rd5, %rd3, 6;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd5];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
; CHECK-PTX-NEXT: st.local.u8 [%rd2+1], %rs4;
; CHECK-PTX-NEXT: add.s64 %rd6, %rd3, 5;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd6];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
; CHECK-PTX-NEXT: st.local.u8 [%rd2], %rs6;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs9;
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 8;
; CHECK-PTX-NEXT: mov.b16 %rs7, 1;
; CHECK-PTX-NEXT: st.u8 [%SP+12], %rs7;
; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
; CHECK-PTX-NEXT: add.u64 %rd8, %SP, 8;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd8;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics2,
Expand Down Expand Up @@ -384,26 +383,29 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.local.u64 [%rd2], %rd3;
; CHECK-PTX-NEXT: mov.u64 %rd4, __const_$_qux_$_s;
; CHECK-PTX-NEXT: add.s64 %rd5, %rd4, 8;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd6, [%rd5];
; CHECK-PTX-NEXT: st.local.u64 [%rd2+8], %rd6;
; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
; CHECK-PTX-NEXT: ld.u64 %rd8, [%SP];
; CHECK-PTX-NEXT: ld.u64 %rd9, [%SP+8];
; CHECK-PTX-NEXT: add.u64 %rd10, %SP, 16;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1;
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4;
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd8;
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd9;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd10;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics4,
Expand Down
17 changes: 17 additions & 0 deletions llvm/test/Transforms/InferAddressSpaces/NVPTX/alloca.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
; RUN: opt -S -passes=infer-address-spaces %s | FileCheck %s

target triple = "nvptx64-nvidia-cuda"


define float @load_alloca() {
; CHECK-LABEL: define float @load_alloca() {
; CHECK-NEXT: [[ADDR:%.*]] = alloca float, align 4
; CHECK-NEXT: [[TMP1:%.*]] = addrspacecast ptr [[ADDR]] to ptr addrspace(5)
; CHECK-NEXT: [[VAL:%.*]] = load float, ptr addrspace(5) [[TMP1]], align 4
; CHECK-NEXT: ret float [[VAL]]
;
%addr = alloca float
%val = load float, ptr %addr
ret float %val
}
Loading

0 comments on commit c57002f

Please sign in to comment.