Skip to content

Commit

Permalink
update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexMaclean committed Jan 7, 2025
1 parent e980945 commit 5c69260
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 86 deletions.
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/NVPTX/local-stack-frame.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
; Ensure we access the local stack properly

; PTX32: mov.u32 %SPL, __local_depot{{[0-9]+}};
; PTX32: cvta.local.u32 %SP, %SPL;
; PTX32: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX32: st.volatile.u32 [%SP], %r{{[0-9]+}};
; PTX32: add.u32 %r[[SP_REG:[0-9]+]], %SPL, 0;
; PTX32: st.local.u32 [%r[[SP_REG]]], %r{{[0-9]+}};
; PTX64: mov.u64 %SPL, __local_depot{{[0-9]+}};
; PTX64: cvta.local.u64 %SP, %SPL;
; PTX64: ld.param.u32 %r{{[0-9]+}}, [foo_param_0];
; PTX64: st.volatile.u32 [%SP], %r{{[0-9]+}};
; PTX64: add.u64 %rd[[SP_REG:[0-9]+]], %SPL, 0;
; PTX64: st.local.u32 [%rd[[SP_REG]]], %r{{[0-9]+}};
define void @foo(i32 %a) {
%local = alloca i32, align 4
store volatile i32 %a, ptr %local
Expand Down
14 changes: 7 additions & 7 deletions llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,17 @@ define dso_local noundef i32 @non_kernel_function(ptr nocapture noundef readonly
; PTX-NEXT: ld.param.u8 %rs1, [non_kernel_function_param_1];
; PTX-NEXT: and.b16 %rs2, %rs1, 1;
; PTX-NEXT: setp.eq.b16 %p1, %rs2, 1;
; PTX-NEXT: ld.param.s32 %rd1, [non_kernel_function_param_2];
; PTX-NEXT: add.u64 %rd2, %SP, 0;
; PTX-NEXT: or.b64 %rd3, %rd2, 8;
; PTX-NEXT: add.u64 %rd1, %SP, 0;
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
; PTX-NEXT: ld.param.s32 %rd3, [non_kernel_function_param_2];
; PTX-NEXT: ld.param.u64 %rd4, [non_kernel_function_param_0+8];
; PTX-NEXT: st.u64 [%rd3], %rd4;
; PTX-NEXT: st.local.u64 [%rd2+8], %rd4;
; PTX-NEXT: ld.param.u64 %rd5, [non_kernel_function_param_0];
; PTX-NEXT: st.u64 [%SP], %rd5;
; PTX-NEXT: st.local.u64 [%rd2], %rd5;
; PTX-NEXT: mov.u64 %rd6, gi;
; PTX-NEXT: cvta.global.u64 %rd7, %rd6;
; PTX-NEXT: selp.b64 %rd8, %rd2, %rd7, %p1;
; PTX-NEXT: add.s64 %rd9, %rd8, %rd1;
; PTX-NEXT: selp.b64 %rd8, %rd1, %rd7, %p1;
; PTX-NEXT: add.s64 %rd9, %rd8, %rd3;
; PTX-NEXT: ld.u8 %r1, [%rd9];
; PTX-NEXT: ld.u8 %r2, [%rd9+1];
; PTX-NEXT: shl.b32 %r3, %r2, 8;
Expand Down
13 changes: 7 additions & 6 deletions llvm/test/CodeGen/NVPTX/lower-args.ll
Original file line number Diff line number Diff line change
Expand Up @@ -40,24 +40,25 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
; PTX-NEXT: .local .align 8 .b8 __local_depot1[8];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
; PTX-NEXT: .reg .b64 %rd<5>;
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.u64 %SPL, __local_depot1;
; PTX-NEXT: cvta.local.u64 %SP, %SPL;
; PTX-NEXT: ld.param.u64 %rd1, [load_padding_param_0];
; PTX-NEXT: st.u64 [%SP], %rd1;
; PTX-NEXT: add.u64 %rd2, %SP, 0;
; PTX-NEXT: add.u64 %rd1, %SP, 0;
; PTX-NEXT: add.u64 %rd2, %SPL, 0;
; PTX-NEXT: ld.param.u64 %rd3, [load_padding_param_0];
; PTX-NEXT: st.local.u64 [%rd2], %rd3;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: st.param.b64 [param0], %rd1;
; PTX-NEXT: .param .b64 retval0;
; PTX-NEXT: call.uni (retval0),
; PTX-NEXT: escape,
; PTX-NEXT: (
; PTX-NEXT: param0
; PTX-NEXT: );
; PTX-NEXT: ld.param.b64 %rd3, [retval0];
; PTX-NEXT: ld.param.b64 %rd4, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
%tmp = call ptr @escape(ptr nonnull align 16 %arg)
Expand Down
112 changes: 57 additions & 55 deletions llvm/test/CodeGen/NVPTX/variadics-backend.ll
Original file line number Diff line number Diff line change
Expand Up @@ -148,39 +148,38 @@ entry:
define dso_local i32 @variadics2(i32 noundef %first, ...) {
; CHECK-PTX-LABEL: variadics2(
; CHECK-PTX: {
; CHECK-PTX-NEXT: .local .align 2 .b8 __local_depot2[4];
; CHECK-PTX-NEXT: .local .align 1 .b8 __local_depot2[3];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<6>;
; CHECK-PTX-NEXT: .reg .b16 %rs<4>;
; CHECK-PTX-NEXT: .reg .b32 %r<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
; CHECK-PTX-NEXT: .reg .b64 %rd<13>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot2;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.param.u32 %r1, [variadics2_param_0];
; CHECK-PTX-NEXT: ld.param.u64 %rd1, [variadics2_param_1];
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd3, %rd2, -8;
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd3];
; CHECK-PTX-NEXT: or.b64 %rd4, %rd3, 4;
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd4];
; CHECK-PTX-NEXT: or.b64 %rd5, %rd3, 5;
; CHECK-PTX-NEXT: or.b64 %rd6, %rd3, 7;
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd6];
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs1;
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd5];
; CHECK-PTX-NEXT: or.b64 %rd7, %rd3, 6;
; CHECK-PTX-NEXT: add.u64 %rd3, %SPL, 0;
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 7;
; CHECK-PTX-NEXT: and.b64 %rd5, %rd4, -8;
; CHECK-PTX-NEXT: ld.u32 %r2, [%rd5];
; CHECK-PTX-NEXT: or.b64 %rd6, %rd5, 4;
; CHECK-PTX-NEXT: ld.s8 %r3, [%rd6];
; CHECK-PTX-NEXT: or.b64 %rd7, %rd5, 5;
; CHECK-PTX-NEXT: or.b64 %rd8, %rd5, 7;
; CHECK-PTX-NEXT: ld.u8 %rs1, [%rd8];
; CHECK-PTX-NEXT: st.local.u8 [%rd3+2], %rs1;
; CHECK-PTX-NEXT: or.b64 %rd9, %rd5, 6;
; CHECK-PTX-NEXT: ld.u8 %rs2, [%rd9];
; CHECK-PTX-NEXT: st.local.u8 [%rd3+1], %rs2;
; CHECK-PTX-NEXT: ld.u8 %rs3, [%rd7];
; CHECK-PTX-NEXT: shl.b16 %rs4, %rs3, 8;
; CHECK-PTX-NEXT: or.b16 %rs5, %rs4, %rs2;
; CHECK-PTX-NEXT: st.u16 [%SP], %rs5;
; CHECK-PTX-NEXT: ld.u64 %rd8, [%rd3+8];
; CHECK-PTX-NEXT: st.local.u8 [%rd3], %rs3;
; CHECK-PTX-NEXT: ld.u64 %rd10, [%rd5+8];
; CHECK-PTX-NEXT: add.s32 %r4, %r1, %r2;
; CHECK-PTX-NEXT: add.s32 %r5, %r4, %r3;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd9, %r5;
; CHECK-PTX-NEXT: add.s64 %rd10, %rd9, %rd8;
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd10;
; CHECK-PTX-NEXT: cvt.u64.u32 %rd11, %r5;
; CHECK-PTX-NEXT: add.s64 %rd12, %rd11, %rd10;
; CHECK-PTX-NEXT: cvt.u32.u64 %r6, %rd12;
; CHECK-PTX-NEXT: st.param.b32 [func_retval0], %r6;
; CHECK-PTX-NEXT: ret;
entry:
Expand Down Expand Up @@ -217,40 +216,40 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot3[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b16 %rs<10>;
; CHECK-PTX-NEXT: .reg .b16 %rs<8>;
; CHECK-PTX-NEXT: .reg .b32 %r<4>;
; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
; CHECK-PTX-NEXT: .reg .b64 %rd<10>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot3;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: mov.u64 %rd1, __const_$_bar_$_s1;
; CHECK-PTX-NEXT: add.s64 %rd2, %rd1, 7;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd2];
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
; CHECK-PTX-NEXT: mov.u64 %rd3, __const_$_bar_$_s1;
; CHECK-PTX-NEXT: add.s64 %rd4, %rd3, 7;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs1, [%rd4];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs2, %rs1;
; CHECK-PTX-NEXT: st.u8 [%SP+2], %rs2;
; CHECK-PTX-NEXT: add.s64 %rd3, %rd1, 5;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd3];
; CHECK-PTX-NEXT: st.local.u8 [%rd2+2], %rs2;
; CHECK-PTX-NEXT: add.s64 %rd5, %rd3, 6;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs3, [%rd5];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs4, %rs3;
; CHECK-PTX-NEXT: add.s64 %rd4, %rd1, 6;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd4];
; CHECK-PTX-NEXT: st.local.u8 [%rd2+1], %rs4;
; CHECK-PTX-NEXT: add.s64 %rd6, %rd3, 5;
; CHECK-PTX-NEXT: ld.global.nc.u8 %rs5, [%rd6];
; CHECK-PTX-NEXT: cvt.u16.u8 %rs6, %rs5;
; CHECK-PTX-NEXT: shl.b16 %rs7, %rs6, 8;
; CHECK-PTX-NEXT: or.b16 %rs8, %rs7, %rs4;
; CHECK-PTX-NEXT: st.u16 [%SP], %rs8;
; CHECK-PTX-NEXT: st.local.u8 [%rd2], %rs6;
; CHECK-PTX-NEXT: mov.b32 %r1, 1;
; CHECK-PTX-NEXT: st.u32 [%SP+8], %r1;
; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 8;
; CHECK-PTX-NEXT: or.b64 %rd6, %rd5, 4;
; CHECK-PTX-NEXT: mov.b16 %rs9, 1;
; CHECK-PTX-NEXT: st.u8 [%rd6], %rs9;
; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
; CHECK-PTX-NEXT: add.u64 %rd7, %SP, 8;
; CHECK-PTX-NEXT: or.b64 %rd8, %rd7, 4;
; CHECK-PTX-NEXT: mov.b16 %rs7, 1;
; CHECK-PTX-NEXT: st.u8 [%rd8], %rs7;
; CHECK-PTX-NEXT: mov.b64 %rd9, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd9;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd7;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics2,
Expand Down Expand Up @@ -389,26 +388,29 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
; CHECK-PTX-NEXT: .reg .b32 %r<3>;
; CHECK-PTX-NEXT: .reg .b64 %rd<7>;
; CHECK-PTX-NEXT: .reg .b64 %rd<11>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
; CHECK-PTX-NEXT: mov.u64 %SPL, __local_depot7;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd1, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.u64 [%SP], %rd1;
; CHECK-PTX-NEXT: mov.u64 %rd2, __const_$_qux_$_s;
; CHECK-PTX-NEXT: add.s64 %rd3, %rd2, 8;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd4, [%rd3];
; CHECK-PTX-NEXT: st.u64 [%SP+8], %rd4;
; CHECK-PTX-NEXT: mov.b64 %rd5, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd5;
; CHECK-PTX-NEXT: add.u64 %rd6, %SP, 16;
; CHECK-PTX-NEXT: add.u64 %rd2, %SPL, 0;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd3, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.local.u64 [%rd2], %rd3;
; CHECK-PTX-NEXT: mov.u64 %rd4, __const_$_qux_$_s;
; CHECK-PTX-NEXT: add.s64 %rd5, %rd4, 8;
; CHECK-PTX-NEXT: ld.global.nc.u64 %rd6, [%rd5];
; CHECK-PTX-NEXT: st.local.u64 [%rd2+8], %rd6;
; CHECK-PTX-NEXT: mov.b64 %rd7, 1;
; CHECK-PTX-NEXT: st.u64 [%SP+16], %rd7;
; CHECK-PTX-NEXT: ld.u64 %rd8, [%SP];
; CHECK-PTX-NEXT: ld.u64 %rd9, [%SP+8];
; CHECK-PTX-NEXT: add.u64 %rd10, %SP, 16;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd1;
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd4;
; CHECK-PTX-NEXT: st.param.b64 [param0], %rd8;
; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd9;
; CHECK-PTX-NEXT: .param .b64 param1;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd6;
; CHECK-PTX-NEXT: st.param.b64 [param1], %rd10;
; CHECK-PTX-NEXT: .param .b32 retval0;
; CHECK-PTX-NEXT: call.uni (retval0),
; CHECK-PTX-NEXT: variadics4,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,38 +9,43 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
; CHECK-NEXT: .local .align 8 .b8 __local_depot0[32];
; CHECK-NEXT: .reg .b32 %SP;
; CHECK-NEXT: .reg .b32 %SPL;
; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<13>;
; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-NEXT: .reg .b64 %rd<17>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: mov.u32 %SPL, __local_depot0;
; CHECK-NEXT: cvta.local.u32 %SP, %SPL;
; CHECK-NEXT: ld.param.u32 %r1, [caller_St8x4_param_1];
; CHECK-NEXT: add.u32 %r3, %SPL, 0;
; CHECK-NEXT: ld.param.u64 %rd1, [caller_St8x4_param_0+24];
; CHECK-NEXT: st.u64 [%SP+24], %rd1;
; CHECK-NEXT: st.local.u64 [%r3+24], %rd1;
; CHECK-NEXT: ld.param.u64 %rd2, [caller_St8x4_param_0+16];
; CHECK-NEXT: st.u64 [%SP+16], %rd2;
; CHECK-NEXT: st.local.u64 [%r3+16], %rd2;
; CHECK-NEXT: ld.param.u64 %rd3, [caller_St8x4_param_0+8];
; CHECK-NEXT: st.u64 [%SP+8], %rd3;
; CHECK-NEXT: st.local.u64 [%r3+8], %rd3;
; CHECK-NEXT: ld.param.u64 %rd4, [caller_St8x4_param_0];
; CHECK-NEXT: st.u64 [%SP], %rd4;
; CHECK-NEXT: st.local.u64 [%r3], %rd4;
; CHECK-NEXT: ld.u64 %rd5, [%SP+8];
; CHECK-NEXT: ld.u64 %rd6, [%SP];
; CHECK-NEXT: ld.u64 %rd7, [%SP+24];
; CHECK-NEXT: ld.u64 %rd8, [%SP+16];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 16 .b8 param0[32];
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd4, %rd3};
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd2, %rd1};
; CHECK-NEXT: st.param.v2.b64 [param0], {%rd6, %rd5};
; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd8, %rd7};
; CHECK-NEXT: .param .align 16 .b8 retval0[32];
; CHECK-NEXT: call.uni (retval0),
; CHECK-NEXT: callee_St8x4,
; CHECK-NEXT: (
; CHECK-NEXT: param0
; CHECK-NEXT: );
; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0];
; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16];
; CHECK-NEXT: ld.param.v2.b64 {%rd9, %rd10}, [retval0];
; CHECK-NEXT: ld.param.v2.b64 {%rd11, %rd12}, [retval0+16];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: st.u64 [%r1], %rd5;
; CHECK-NEXT: st.u64 [%r1+8], %rd6;
; CHECK-NEXT: st.u64 [%r1+16], %rd7;
; CHECK-NEXT: st.u64 [%r1+24], %rd8;
; CHECK-NEXT: st.u64 [%r1], %rd9;
; CHECK-NEXT: st.u64 [%r1+8], %rd10;
; CHECK-NEXT: st.u64 [%r1+16], %rd11;
; CHECK-NEXT: st.u64 [%r1+24], %rd12;
; CHECK-NEXT: ret;
%call = tail call fastcc [4 x i64] @callee_St8x4(ptr noundef nonnull byval(%struct.St8x4) align 8 %in) #2
%.fca.0.extract = extractvalue [4 x i64] %call, 0
Expand Down

0 comments on commit 5c69260

Please sign in to comment.