-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[RISCV][VLOPT] Add strided, unit strided, and indexed loads to isSupported #121705
base: main
Are you sure you want to change the base?
[RISCV][VLOPT] Add strided, unit strided, and indexed loads to isSupported #121705
Conversation
@llvm/pr-subscribers-backend-risc-v Author: Michael Maitland (michaelmaitland) ChangesAdd to getOperandInfo too since that is needed to reduce the VL. Patch is 140.18 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/121705.diff 29 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
index 32d552625a8e8b..7ea0f797f85d73 100644
--- a/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
+++ b/llvm/lib/Target/RISCV/RISCVVLOptimizer.cpp
@@ -257,17 +257,31 @@ static OperandInfo getOperandInfo(const MachineOperand &MO,
// Vector Unit-Stride Instructions
// Vector Strided Instructions
/// Dest EEW encoded in the instruction and EMUL=(EEW/SEW)*LMUL
+ case RISCV::VLE8_V:
case RISCV::VSE8_V:
+ case RISCV::VLM_V:
+ case RISCV::VSM_V:
+ case RISCV::VLSE8_V:
case RISCV::VSSE8_V:
+ case RISCV::VLE8FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(3, MI), 3);
+ case RISCV::VLE16_V:
case RISCV::VSE16_V:
+ case RISCV::VLSE16_V:
case RISCV::VSSE16_V:
+ case RISCV::VLE16FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(4, MI), 4);
+ case RISCV::VLE32_V:
case RISCV::VSE32_V:
+ case RISCV::VLSE32_V:
case RISCV::VSSE32_V:
+ case RISCV::VLE32FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(5, MI), 5);
+ case RISCV::VLE64_V:
case RISCV::VSE64_V:
+ case RISCV::VLSE64_V:
case RISCV::VSSE64_V:
+ case RISCV::VLE64FF_V:
return OperandInfo(RISCVVType::getEMULEqualsEEWDivSEWTimesLMUL(6, MI), 6);
// Vector Indexed Instructions
@@ -732,6 +746,22 @@ static bool isSupportedInstr(const MachineInstr &MI) {
return false;
switch (RVV->BaseInstr) {
+ // Vector Unit-Stride Instructions
+ // Vector Strided Instructions
+ case RISCV::VLE8_V:
+ case RISCV::VLM_V:
+ case RISCV::VLSE8_V:
+ case RISCV::VLE8FF_V:
+ case RISCV::VLE16_V:
+ case RISCV::VLSE16_V:
+ case RISCV::VLE16FF_V:
+ case RISCV::VLE32_V:
+ case RISCV::VLSE32_V:
+ case RISCV::VLE32FF_V:
+ case RISCV::VLE64_V:
+ case RISCV::VLSE64_V:
+ case RISCV::VLE64FF_V:
+
// Vector Single-Width Integer Add and Subtract
case RISCV::VADD_VI:
case RISCV::VADD_VV:
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 10d24927d9b783..4d34621cd5f243 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -1445,10 +1445,9 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64(<vscale x 1 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
-; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v9, (a6), zero
; RV32-NEXT: lui a4, 61681
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v11, v8, a5, v0.t
@@ -1595,9 +1594,7 @@ define <vscale x 1 x i64> @vp_bitreverse_nxv1i64_unmasked(<vscale x 1 x i64> %va
; RV32-NEXT: vand.vx v13, v8, a1
; RV32-NEXT: vand.vx v12, v12, a1
; RV32-NEXT: vor.vv v11, v12, v11
-; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v13, v13, a4
; RV32-NEXT: vor.vv v10, v10, v13
; RV32-NEXT: vsrl.vi v13, v8, 8
@@ -1730,10 +1727,9 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64(<vscale x 2 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
-; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v10, (a6), zero
; RV32-NEXT: lui a4, 61681
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v12, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v14, v8, a5, v0.t
@@ -1880,9 +1876,7 @@ define <vscale x 2 x i64> @vp_bitreverse_nxv2i64_unmasked(<vscale x 2 x i64> %va
; RV32-NEXT: vand.vx v18, v8, a1
; RV32-NEXT: vand.vx v16, v16, a1
; RV32-NEXT: vor.vv v10, v16, v10
-; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v16, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v18, v18, a4
; RV32-NEXT: vor.vv v12, v12, v18
; RV32-NEXT: vsrl.vi v18, v8, 8
@@ -2015,10 +2009,9 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64(<vscale x 4 x i64> %va, <vscale
; RV32-NEXT: addi a6, sp, 8
; RV32-NEXT: sw a4, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
-; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma
+; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
; RV32-NEXT: lui a4, 61681
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a3, v0.t
; RV32-NEXT: addi a5, a5, -256
; RV32-NEXT: vand.vx v20, v8, a5, v0.t
@@ -2165,9 +2158,7 @@ define <vscale x 4 x i64> @vp_bitreverse_nxv4i64_unmasked(<vscale x 4 x i64> %va
; RV32-NEXT: vand.vx v28, v8, a1
; RV32-NEXT: vand.vx v24, v24, a1
; RV32-NEXT: vor.vv v12, v24, v12
-; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v28, v28, a4
; RV32-NEXT: vor.vv v16, v16, v28
; RV32-NEXT: vsrl.vi v28, v8, 8
@@ -2315,7 +2306,6 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
@@ -2323,7 +2313,6 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64(<vscale x 7 x i64> %va, <vscale
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
@@ -2528,9 +2517,7 @@ define <vscale x 7 x i64> @vp_bitreverse_nxv7i64_unmasked(<vscale x 7 x i64> %va
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v0, v8, 8
@@ -2704,7 +2691,6 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a5), zero
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 3
@@ -2712,7 +2698,6 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64(<vscale x 8 x i64> %va, <vscale
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
; RV32-NEXT: lui a3, 4080
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v24, v8, a3, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 16
@@ -2917,9 +2902,7 @@ define <vscale x 8 x i64> @vp_bitreverse_nxv8i64_unmasked(<vscale x 8 x i64> %va
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v0, v8, 8
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 0dc1d0c32ac449..0c58cca0f94726 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -523,11 +523,9 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v9, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v10, v8, a1, v0.t
-; RV32-NEXT: vsetvli a3, zero, e64, m1, ta, ma
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v10, v8, a0, v0.t
; RV32-NEXT: vlse64.v v11, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v10, v10, a4, v0.t
; RV32-NEXT: vor.vv v9, v9, v10, v0.t
; RV32-NEXT: vand.vx v10, v8, a5, v0.t
@@ -538,7 +536,7 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64(<vscale x 1 x i64> %va, <vscale x 1
; RV32-NEXT: vor.vv v9, v9, v10, v0.t
; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v12, v8, a4, v0.t
-; RV32-NEXT: vand.vx v12, v12, a1, v0.t
+; RV32-NEXT: vand.vx v12, v12, a0, v0.t
; RV32-NEXT: vor.vv v10, v12, v10, v0.t
; RV32-NEXT: vsrl.vi v12, v8, 24, v0.t
; RV32-NEXT: vand.vx v12, v12, a5, v0.t
@@ -609,15 +607,13 @@ define <vscale x 1 x i64> @vp_bswap_nxv1i64_unmasked(<vscale x 1 x i64> %va, i32
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsll.vx v10, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v11, v8, a2
; RV32-NEXT: vsrl.vx v12, v8, a4
-; RV32-NEXT: vand.vx v13, v8, a1
-; RV32-NEXT: vand.vx v12, v12, a1
+; RV32-NEXT: vand.vx v13, v8, a0
+; RV32-NEXT: vand.vx v12, v12, a0
; RV32-NEXT: vor.vv v11, v12, v11
-; RV32-NEXT: vsetvli a1, zero, e64, m1, ta, ma
; RV32-NEXT: vlse64.v v12, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
; RV32-NEXT: vsll.vx v13, v13, a4
; RV32-NEXT: vor.vv v10, v10, v13
; RV32-NEXT: vsrl.vi v13, v8, 8
@@ -695,11 +691,9 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v10, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v12, v8, a1, v0.t
-; RV32-NEXT: vsetvli a3, zero, e64, m2, ta, ma
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v12, v8, a0, v0.t
; RV32-NEXT: vlse64.v v14, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v12, v12, a4, v0.t
; RV32-NEXT: vor.vv v10, v10, v12, v0.t
; RV32-NEXT: vand.vx v12, v8, a5, v0.t
@@ -710,7 +704,7 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64(<vscale x 2 x i64> %va, <vscale x 2
; RV32-NEXT: vor.vv v10, v10, v12, v0.t
; RV32-NEXT: vsrl.vx v12, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v16, v8, a4, v0.t
-; RV32-NEXT: vand.vx v16, v16, a1, v0.t
+; RV32-NEXT: vand.vx v16, v16, a0, v0.t
; RV32-NEXT: vor.vv v12, v16, v12, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 24, v0.t
; RV32-NEXT: vand.vx v16, v16, a5, v0.t
@@ -781,15 +775,13 @@ define <vscale x 2 x i64> @vp_bswap_nxv2i64_unmasked(<vscale x 2 x i64> %va, i32
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsll.vx v12, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v14, v8, a2
; RV32-NEXT: vsrl.vx v16, v8, a4
-; RV32-NEXT: vand.vx v18, v8, a1
-; RV32-NEXT: vand.vx v16, v16, a1
+; RV32-NEXT: vand.vx v18, v8, a0
+; RV32-NEXT: vand.vx v16, v16, a0
; RV32-NEXT: vor.vv v14, v16, v14
-; RV32-NEXT: vsetvli a1, zero, e64, m2, ta, ma
; RV32-NEXT: vlse64.v v16, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma
; RV32-NEXT: vsll.vx v18, v18, a4
; RV32-NEXT: vor.vv v12, v12, v18
; RV32-NEXT: vsrl.vi v18, v8, 8
@@ -867,11 +859,9 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v20, v8, a1, v0.t
-; RV32-NEXT: vsetvli a3, zero, e64, m4, ta, ma
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v20, v8, a0, v0.t
; RV32-NEXT: vlse64.v v12, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v20, v20, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v20, v0.t
; RV32-NEXT: vand.vx v20, v8, a5, v0.t
@@ -882,7 +872,7 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64(<vscale x 4 x i64> %va, <vscale x 4
; RV32-NEXT: vor.vv v16, v16, v20, v0.t
; RV32-NEXT: vsrl.vx v20, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
-; RV32-NEXT: vand.vx v24, v24, a1, v0.t
+; RV32-NEXT: vand.vx v24, v24, a0, v0.t
; RV32-NEXT: vor.vv v20, v24, v20, v0.t
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV32-NEXT: vand.vx v24, v24, a5, v0.t
@@ -953,15 +943,13 @@ define <vscale x 4 x i64> @vp_bswap_nxv4i64_unmasked(<vscale x 4 x i64> %va, i32
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsll.vx v16, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v20, v8, a2
; RV32-NEXT: vsrl.vx v24, v8, a4
-; RV32-NEXT: vand.vx v28, v8, a1
-; RV32-NEXT: vand.vx v24, v24, a1
+; RV32-NEXT: vand.vx v28, v8, a0
+; RV32-NEXT: vand.vx v24, v24, a0
; RV32-NEXT: vor.vv v20, v24, v20
-; RV32-NEXT: vsetvli a1, zero, e64, m4, ta, ma
; RV32-NEXT: vlse64.v v24, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma
; RV32-NEXT: vsll.vx v28, v28, a4
; RV32-NEXT: vor.vv v16, v16, v28
; RV32-NEXT: vsrl.vi v28, v8, 8
@@ -1043,51 +1031,49 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64(<vscale x 7 x i64> %va, <vscale x 7
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v24, v8, a0, v0.t
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vlse64.v v16, (a5), zero
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, 4080
+; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
+; RV32-NEXT: addi a3, sp, 16
+; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
+; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
-; RV32-NEXT: vlse64.v v16, (a5), zero
+; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
-; RV32-NEXT: slli a3, a3, 3
+; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: lui a3, 4080
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
-; RV32-NEXT: vand.vx v24, v8, a3, v0.t
-; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
-; RV32-NEXT: addi a0, sp, 16
-; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vv v24, v8, v16, v0.t
-; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
-; RV32-NEXT: vor.vv v16, v24, v16, v0.t
-; RV32-NEXT: csrr a0, vlenb
-; RV32-NEXT: slli a0, a0, 4
-; RV32-NEXT: add a0, sp, a0
-; RV32-NEXT: addi a0, a0, 16
-; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t
-; RV32-NEXT: vand.vx v24, v24, a1, v0.t
+; RV32-NEXT: vand.vx v24, v24, a0, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
-; RV32-NEXT: vand.vx v24, v24, a3, v0.t
+; RV32-NEXT: vand.vx v24, v24, a1, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
@@ -1193,24 +1179,22 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v24, v8, a2
-; RV32-NEXT: addi a1, a3, -256
+; RV32-NEXT: addi a0, a3, -256
; RV32-NEXT: vsrl.vx v16, v8, a2
; RV32-NEXT: vsrl.vx v0, v8, a4
-; RV32-NEXT: vand.vx v0, v0, a1
+; RV32-NEXT: vand.vx v0, v0, a0
; RV32-NEXT: vor.vv v16, v0, v16
-; RV32-NEXT: csrr a2, vlenb
-; RV32-NEXT: slli a2, a2, 3
-; RV32-NEXT: add a2, sp, a2
-; RV32-NEXT: addi a2, a2, 16
-; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill
-; RV32-NEXT: vand.vx v0, v8, a1
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vx v0, v8, a0
; RV32-NEXT: vsll.vx v0, v0, a4
; RV32-NEXT: vor.vv v16, v24, v0
-; RV32-NEXT: addi a1, sp, 16
-; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma
+; RV32-NEXT: addi a0, sp, 16
+; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill
; RV32-NEXT: vlse64.v v0, (a6), zero
-; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsrl.vi v16, v8, 24
; RV32-NEXT: vand.vx v16, v16, a5
; RV32-NEXT: vsrl.vi v24, v8, 8
@@ -1221,7 +1205,6 @@ define <vscale x 7 x i64> @vp_bswap_nxv7i64_unmasked(<vscale x 7 x i64> %va, i32
; RV32-NEXT: vsll.vi v8, v8, 24
; RV32-NEXT: vsll.vi v24, v24, 8
; RV32-NEXT: vor.vv v8, v8, v24
-; RV32-NEXT: addi a0, sp, 16
; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v8, v24, v8
; RV32-NEXT: csrr a0, vlenb
@@ -1318,51 +1301,49 @@ define <vscale x 8 x i64> @vp_bswap_nxv8i64(<vscale x 8 x i64> %va, <vscale x 8
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a2, v0.t
-; RV32-NEXT: addi a1, a3, -256
-; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: addi a0, a3, -256
+; RV32-NEXT: vand.vx v24, v8, a0, v0.t
; RV32-NEXT: vsll.vx v24, v24, a4, v0.t
; RV32-NEXT: vor.vv v16, v16, v24, v0.t
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 4
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: vlse64.v v16, (a5), zero
+; RV32-NEXT: csrr a1, vlenb
+; RV32-NEXT: slli a1, a1, 3
+; RV32-NEXT: add a1, sp, a1
+; RV32-NEXT: addi a1, a1, 16
+; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
+; RV32-NEXT: lui a1, 4080
+; RV32-NEXT: vand.vx v24, v8, a1, v0.t
+; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
+; RV32-NEXT: addi a3, sp, 16
+; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill
+; RV32-NEXT: vand.vv v24, v8, v16, v0.t
+; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
+; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload
+; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a3, vlenb
; RV32-NEXT: slli a3, a3, 4
; RV32-NEXT: add a3, sp, a3
; RV32-NEXT: addi a3, a3, 16
-; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill
-; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma
-; RV32-NEXT: ...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Didn't see changes from first commit, whoops
66be576
to
3d05abd
Compare
case RISCV::VLE8_V: | ||
case RISCV::VLM_V: | ||
case RISCV::VLSE8_V: | ||
case RISCV::VLE8FF_V: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it safe to reduce the VL of a fault-only-first load? I think that would potentially affect the result of its output VL operand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These instructions execute as a regular load except that they will only take a trap caused by a synchronous exception on element 0. If element 0 raises an exception, vl is not modified, and the trap is taken. If an element > 0 raises an exception, the corresponding trap is not taken, and the vector length vl is reduced to the index of the element that would have raised an exception.
My thought was that it should not affect the result. Is this an incorrect understanding?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It will affect the resulting VL though. I'm thinking if someone wrote something like:
%a = call { <vscale x 1 x i64>, i64 } @llvm.riscv.vleff.nxv1i64(<vscale x 1 x i64> undef, ptr %0, i64 %avl)
%vl.ff = extractvalue { <vscale x 1 x i64>, i64 } %a, 1
%vl.ff might change if the vleff had its AVL reduced.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it had its AVL reduced, which caused a fault which would have happened to no longer occur, would this produce a different result?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah exactly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Got it. Will remove.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can't reduce the VL of a load if the load was marked volatile in IR. We need to check the volatile flag of the memory operand.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added the check and a test
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We need to prevent reducing the VL of a volatile load.
Add to getOperandInfo too since that is needed to reduce the VL.