From 07e6c614294d27cf3662329eaac7462f5b1976a8 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Fri, 20 Dec 2024 02:32:46 -0800
Subject: [PATCH 1/5] [SingleSource/Vectorizer] Add unit tests for FindLastIV
 pattern.

---
 SingleSource/UnitTests/Vectorizer/common.h    |   8 +
 .../UnitTests/Vectorizer/find-last.cpp        | 246 ++++++++++++++++++
 .../Vectorizer/find-last.reference_output     |   9 +
 3 files changed, 263 insertions(+)
 create mode 100644 SingleSource/UnitTests/Vectorizer/find-last.cpp
 create mode 100644 SingleSource/UnitTests/Vectorizer/find-last.reference_output
diff --git a/SingleSource/UnitTests/Vectorizer/common.h b/SingleSource/UnitTests/Vectorizer/common.h
index d8cd421bf3..86b860a28e 100644
--- a/SingleSource/UnitTests/Vectorizer/common.h
+++ b/SingleSource/UnitTests/Vectorizer/common.h
@@ -9,6 +9,14 @@
     Init _Pragma("clang loop vectorize(enable)") Loop                          \
   };
 
+#define DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(Init, Loop, Type)                    \
+  auto ScalarFn = [](auto *A, auto *B, Type TC) -> Type {                      \
+    Init _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop     \
+  };                                                                           \
+  auto VectorFn = [](auto *A, auto *B, Type TC) -> Type {                      \
+    Init _Pragma("clang loop vectorize(enable)") Loop                          \
+  };
+
 #define DEFINE_SCALAR_AND_VECTOR_FN3(Loop)                                     \
   auto ScalarFn = [](auto *A, auto *B, auto *C, unsigned TC) {                 \
     _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop          \
diff --git a/SingleSource/UnitTests/Vectorizer/find-last.cpp b/SingleSource/UnitTests/Vectorizer/find-last.cpp
new file mode 100644
index 0000000000..035487397c
--- /dev/null
+++ b/SingleSource/UnitTests/Vectorizer/find-last.cpp
@@ -0,0 +1,246 @@
+#include <algorithm>
+#include <functional>
+#include <iostream>
+#include <limits>
+#include <memory>
+#include <stdint.h>
+
+#include "common.h"
+
+template <typename RetTy, typename Ty>
+using Fn2Ty = std::function<RetTy(Ty *, Ty *, RetTy)>;
+template <typename RetTy, typename Ty>
+static void checkVectorFunction(Fn2Ty<RetTy, Ty> ScalarFn,
+                                Fn2Ty<RetTy, Ty> VectorFn, const char *Name) {
+  std::cout << "Checking " << Name << "\n";
+
+  unsigned N = 1000;
+  std::unique_ptr<Ty[]> Src1(new Ty[N]);
+  std::unique_ptr<Ty[]> Src2(new Ty[N]);
+  init_data(Src1, N);
+  init_data(Src2, N);
+
+  // Test VectorFn with different input data.
+  {
+    // Check with random inputs.
+    auto Reference = ScalarFn(&Src1[0], &Src2[0], N);
+    auto ToCheck = VectorFn(&Src1[0], &Src2[0], N);
+    if (Reference != ToCheck) {
+      std::cerr << "Miscompare\n";
+      exit(1);
+    }
+  }
+
+  {
+    // Check with Src1 > Src2 for all elements.
+    for (unsigned I = 0; I != N; ++I) {
+      Src1[I] = std::numeric_limits<Ty>::max();
+      Src2[I] = std::numeric_limits<Ty>::min();
+    }
+    auto Reference = ScalarFn(&Src1[0], &Src2[0], N);
+    auto ToCheck = VectorFn(&Src1[0], &Src2[0], N);
+    if (Reference != ToCheck) {
+      std::cerr << "Miscompare\n";
+      exit(1);
+    }
+  }
+
+  {
+    // Check with Src1 < Src2 for all elements.
+    for (unsigned I = 0; I != N; ++I) {
+      Src1[I] = std::numeric_limits<Ty>::min();
+      Src2[I] = std::numeric_limits<Ty>::max();
+    }
+    auto Reference = ScalarFn(&Src1[0], &Src2[0], N);
+    auto ToCheck = VectorFn(&Src1[0], &Src2[0], N);
+    if (Reference != ToCheck) {
+      std::cerr << "Miscompare\n";
+      exit(1);
+    }
+  }
+
+  {
+    // Check with only Src1[998] > Src2[998].
+    for (unsigned I = 0; I != N; ++I)
+      Src1[I] = Src2[I] = std::numeric_limits<Ty>::min();
+    Src1[998] = std::numeric_limits<Ty>::max();
+    auto Reference = ScalarFn(&Src1[0], &Src2[0], N);
+    auto ToCheck = VectorFn(&Src1[0], &Src2[0], N);
+    if (Reference != ToCheck) {
+      std::cerr << "Miscompare\n";
+      exit(1);
+    }
+  }
+
+  {
+    // Check with only Src1[0] > Src2[0].
+    for (unsigned I = 0; I != N; ++I)
+      Src1[I] = Src2[I] = std::numeric_limits<Ty>::min();
+    Src1[0] = std::numeric_limits<Ty>::max();
+    auto Reference = ScalarFn(&Src1[0], &Src2[0], N);
+    auto ToCheck = VectorFn(&Src1[0], &Src2[0], N);
+    if (Reference != ToCheck) {
+      std::cerr << "Miscompare\n";
+      exit(1);
+    }
+  }
+
+  {
+    // Check with only Src1[N - 1] > Src2[N - 1].
+    for (unsigned I = 0; I != N; ++I)
+      Src1[I] = Src2[I] = std::numeric_limits<Ty>::min();
+    Src1[N - 1] = std::numeric_limits<Ty>::max();
+    auto Reference = ScalarFn(&Src1[0], &Src2[0], N);
+    auto ToCheck = VectorFn(&Src1[0], &Src2[0], N);
+    if (Reference != ToCheck) {
+      std::cerr << "Miscompare\n";
+      exit(1);
+    }
+  }
+
+  {
+    // Check with only Src1[0] > Src2[0] and Src1[N - 1] > Src2[N - 1].
+    for (unsigned I = 0; I != N; ++I)
+      Src1[I] = Src2[I] = std::numeric_limits<Ty>::min();
+    Src1[0] = Src1[N - 1] = std::numeric_limits<Ty>::max();
+    auto Reference = ScalarFn(&Src1[0], &Src2[0], N);
+    auto ToCheck = VectorFn(&Src1[0], &Src2[0], N);
+    if (Reference != ToCheck) {
+      std::cerr << "Miscompare\n";
+      exit(1);
+    }
+  }
+}
+
+int main(void) {
+  rng = std::mt19937(15);
+
+  {
+    // Find the last index where A[I] > B[I] and update Rdx when the condition
+    // is true.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = -1;,
+        for (int32_t I = 0; I < TC; I++) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_true_update");
+  }
+
+  {
+    // Update Rdx when the condition A[I] > B[I] is false.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = -1;,
+        for (int32_t I = 0; I < TC; I++) {
+          Rdx = A[I] > B[I] ? Rdx : I;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_false_update");
+  }
+
+  {
+    // Find the last index with the start value TC.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = TC;,
+        for (int32_t I = 0; I < TC; I++) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_start_TC");
+  }
+
+  {
+    // Increment the induction variable by 2.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = -1;,
+        for (int32_t I = 0; I < TC; I += 2) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_inc_2");
+  }
+
+  {
+    // Check with decreasing induction variable.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = -1;,
+        for (int32_t I = TC; I > 0; I--) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(
+        ScalarFn, VectorFn, "findlast_start_decreasing_induction");
+  }
+
+  {
+    // Check with the induction variable starts from 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = -1;,
+        for (int32_t I = 3; I < TC; I++) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_iv_start_3");
+  }
+
+  {
+    // Check with start value of 3 and induction variable starts at 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = 3;,
+        for (int32_t I = 3; I < TC; I++) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_start_3_iv_start_3");
+  }
+
+  {
+    // Check with start value of 2 and induction variable starts at 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = 2;,
+        for (int32_t I = 3; I < TC; I++) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_start_2_iv_start_3");
+  }
+
+  {
+    // Check with start value of 4 and induction variable starts at 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+        int32_t Rdx = 4;,
+        for (int32_t I = 3; I < TC; I++) {
+          Rdx = A[I] > B[I] ? I : Rdx;
+        }
+        return Rdx;,
+        int32_t
+        );
+    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
+                                          "findlast_start_4_iv_start_3");
+  }
+
+  return 0;
+}
diff --git a/SingleSource/UnitTests/Vectorizer/find-last.reference_output b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
new file mode 100644
index 0000000000..cfaf6d1a7d
--- /dev/null
+++ b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
@@ -0,0 +1,9 @@
+Checking findlast_true_update
+Checking findlast_false_update
+Checking findlast_start_TC
+Checking findlast_inc_2
+Checking findlast_start_decreasing_induction
+Checking findlast_iv_start_3
+Checking findlast_start_3_iv_start_3
+Checking findlast_start_2_iv_start_3
+Checking findlast_start_4_iv_start_3

From 803fa53317abf7c7fcc99c923cf15835efb820b2 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Mon, 13 Jan 2025 22:42:43 -0800
Subject: [PATCH 2/5] Add exit 0

---
 SingleSource/UnitTests/Vectorizer/find-last.reference_output | 1 +
 1 file changed, 1 insertion(+)

diff --git a/SingleSource/UnitTests/Vectorizer/find-last.reference_output b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
index cfaf6d1a7d..121c3de457 100644
--- a/SingleSource/UnitTests/Vectorizer/find-last.reference_output
+++ b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
@@ -7,3 +7,4 @@ Checking findlast_iv_start_3
 Checking findlast_start_3_iv_start_3
 Checking findlast_start_2_iv_start_3
 Checking findlast_start_4_iv_start_3
+exit 0

From dbfbf535425ce7045ef00b6d84241c6c4f1aa8ac Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Tue, 14 Jan 2025 00:03:28 -0800
Subject: [PATCH 3/5] NFC, Macro

---
 .../UnitTests/Vectorizer/find-last.cpp        | 126 +++++++++---------
 1 file changed, 62 insertions(+), 64 deletions(-)

diff --git a/SingleSource/UnitTests/Vectorizer/find-last.cpp b/SingleSource/UnitTests/Vectorizer/find-last.cpp
index 035487397c..f47a0c87d1 100644
--- a/SingleSource/UnitTests/Vectorizer/find-last.cpp
+++ b/SingleSource/UnitTests/Vectorizer/find-last.cpp
@@ -115,31 +115,36 @@ static void checkVectorFunction(Fn2Ty<RetTy, Ty> ScalarFn,
 int main(void) {
   rng = std::mt19937(15);
 
+#define INC_COND(Start, Step, RetTy) for (RetTy I = Start; I < TC; I += Step)
+#define DEC_COND(End, Step, RetTy) for (RetTy I = TC; I > End; I -= Step)
+
+#define DEFINE_FINDLAST_LOOP_BODY(TrueVal, FalseVal, ForCond)                  \
+  ForCond { Rdx = A[I] > B[I] ? TrueVal : FalseVal; }                          \
+  return Rdx;
+
   {
     // Find the last index where A[I] > B[I] and update Rdx when the condition
     // is true.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = -1;,
-        for (int32_t I = 0; I < TC; I++) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_true_update");
+					  "findlast_true_update");
   }
 
   {
     // Update Rdx when the condition A[I] > B[I] is false.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = -1;,
-        for (int32_t I = 0; I < TC; I++) {
-          Rdx = A[I] > B[I] ? Rdx : I;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ Rdx, /* FalseVal= */ I,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
                                           "findlast_false_update");
   }
@@ -147,13 +152,12 @@ int main(void) {
   {
     // Find the last index with the start value TC.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = TC;,
-        for (int32_t I = 0; I < TC; I++) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = TC;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
                                           "findlast_start_TC");
   }
@@ -161,13 +165,12 @@ int main(void) {
   {
     // Increment the induction variable by 2.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = -1;,
-        for (int32_t I = 0; I < TC; I += 2) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 2, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
                                           "findlast_inc_2");
   }
@@ -175,13 +178,12 @@ int main(void) {
   {
     // Check with decreasing induction variable.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = -1;,
-        for (int32_t I = TC; I > 0; I--) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    DEC_COND(/* End= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(
         ScalarFn, VectorFn, "findlast_start_decreasing_induction");
   }
@@ -189,13 +191,12 @@ int main(void) {
   {
     // Check with the induction variable starts from 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = -1;,
-        for (int32_t I = 3; I < TC; I++) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
                                           "findlast_iv_start_3");
   }
@@ -203,13 +204,12 @@ int main(void) {
   {
     // Check with start value of 3 and induction variable starts at 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = 3;,
-        for (int32_t I = 3; I < TC; I++) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = 3;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
                                           "findlast_start_3_iv_start_3");
   }
@@ -217,13 +217,12 @@ int main(void) {
   {
     // Check with start value of 2 and induction variable starts at 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = 2;,
-        for (int32_t I = 3; I < TC; I++) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = 2;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
                                           "findlast_start_2_iv_start_3");
   }
@@ -231,13 +230,12 @@ int main(void) {
   {
     // Check with start value of 4 and induction variable starts at 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
-        int32_t Rdx = 4;,
-        for (int32_t I = 3; I < TC; I++) {
-          Rdx = A[I] > B[I] ? I : Rdx;
-        }
-        return Rdx;,
-        int32_t
-        );
+	int32_t Rdx = 4;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
+	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
                                           "findlast_start_4_iv_start_3");
   }

From 8f6111e4d9cfb22320843c4ebfb6f177b4392596 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Tue, 14 Jan 2025 00:17:10 -0800
Subject: [PATCH 4/5] Test FFindLastIV

---
 .../UnitTests/Vectorizer/find-last.cpp        | 36 ++++++++++++++-----
 .../Vectorizer/find-last.reference_output     | 27 +++++++++-----
 2 files changed, 45 insertions(+), 18 deletions(-)

diff --git a/SingleSource/UnitTests/Vectorizer/find-last.cpp b/SingleSource/UnitTests/Vectorizer/find-last.cpp
index f47a0c87d1..fadccbd40c 100644
--- a/SingleSource/UnitTests/Vectorizer/find-last.cpp
+++ b/SingleSource/UnitTests/Vectorizer/find-last.cpp
@@ -133,7 +133,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_true_update");
+					  "findlast_icmp_true_update");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_true_update");
   }
 
   {
@@ -146,7 +148,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_false_update");
+					  "findlast_icmp_false_update");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_false_update");
   }
 
   {
@@ -159,7 +163,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_start_TC");
+					  "findlast_icmp_start_TC");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_start_TC");
   }
 
   {
@@ -172,7 +178,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 2, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_inc_2");
+					  "findlast_icmp_inc_2");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_inc_2");
   }
 
   {
@@ -185,7 +193,9 @@ int main(void) {
 	    DEC_COND(/* End= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(
-        ScalarFn, VectorFn, "findlast_start_decreasing_induction");
+        ScalarFn, VectorFn, "findlast_icmp_start_decreasing_induction");
+    checkVectorFunction<int32_t, float>(
+        ScalarFn, VectorFn, "findlast_fcmp_start_decreasing_induction");
   }
 
   {
@@ -198,7 +208,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_iv_start_3");
+					  "findlast_icmp_iv_start_3");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_iv_start_3");
   }
 
   {
@@ -211,7 +223,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_start_3_iv_start_3");
+					  "findlast_icmp_start_3_iv_start_3");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_start_3_iv_start_3");
   }
 
   {
@@ -224,7 +238,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_start_2_iv_start_3");
+					  "findlast_icmp_start_2_iv_start_3");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_start_2_iv_start_3");
   }
 
   {
@@ -237,7 +253,9 @@ int main(void) {
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-                                          "findlast_start_4_iv_start_3");
+					  "findlast_icmp_start_4_iv_start_3");
+    checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
+					"findlast_fcmp_start_4_iv_start_3");
   }
 
   return 0;
diff --git a/SingleSource/UnitTests/Vectorizer/find-last.reference_output b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
index 121c3de457..e37d337343 100644
--- a/SingleSource/UnitTests/Vectorizer/find-last.reference_output
+++ b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
@@ -1,10 +1,19 @@
-Checking findlast_true_update
-Checking findlast_false_update
-Checking findlast_start_TC
-Checking findlast_inc_2
-Checking findlast_start_decreasing_induction
-Checking findlast_iv_start_3
-Checking findlast_start_3_iv_start_3
-Checking findlast_start_2_iv_start_3
-Checking findlast_start_4_iv_start_3
+Checking findlast_icmp_true_update
+Checking findlast_fcmp_true_update
+Checking findlast_icmp_false_update
+Checking findlast_fcmp_false_update
+Checking findlast_icmp_start_TC
+Checking findlast_fcmp_start_TC
+Checking findlast_icmp_inc_2
+Checking findlast_fcmp_inc_2
+Checking findlast_icmp_start_decreasing_induction
+Checking findlast_fcmp_start_decreasing_induction
+Checking findlast_icmp_iv_start_3
+Checking findlast_fcmp_iv_start_3
+Checking findlast_icmp_start_3_iv_start_3
+Checking findlast_fcmp_start_3_iv_start_3
+Checking findlast_icmp_start_2_iv_start_3
+Checking findlast_fcmp_start_2_iv_start_3
+Checking findlast_icmp_start_4_iv_start_3
+Checking findlast_fcmp_start_4_iv_start_3
 exit 0

From f8c2b31f66a2c890734752ce19c82181b148b560 Mon Sep 17 00:00:00 2001
From: Mel Chen <mel.chen@sifive.com>
Date: Tue, 14 Jan 2025 00:45:19 -0800
Subject: [PATCH 5/5] Add s16 test case

---
 .../UnitTests/Vectorizer/find-last.cpp        | 180 +++++++++++++++---
 .../Vectorizer/find-last.reference_output     |  45 +++--
 2 files changed, 176 insertions(+), 49 deletions(-)

diff --git a/SingleSource/UnitTests/Vectorizer/find-last.cpp b/SingleSource/UnitTests/Vectorizer/find-last.cpp
index fadccbd40c..8ebd608175 100644
--- a/SingleSource/UnitTests/Vectorizer/find-last.cpp
+++ b/SingleSource/UnitTests/Vectorizer/find-last.cpp
@@ -123,8 +123,8 @@ int main(void) {
   return Rdx;
 
   {
-    // Find the last index where A[I] > B[I] and update Rdx when the condition
-    // is true.
+    // Find the last index where A[I] > B[I] and update 32-bits Rdx when the
+    // condition is true.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = -1;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -133,13 +133,27 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_true_update");
+					  "findlast_icmp_s32_true_update");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_true_update");
+					"findlast_fcmp_s32_true_update");
   }
 
   {
-    // Update Rdx when the condition A[I] > B[I] is false.
+    // Find the last index where A[I] > B[I] and update 16-bits Rdx when the
+    // condition is true.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(ScalarFn, VectorFn,
+					  "findlast_icmp_s16_true_update");
+  }
+
+  {
+    // Update 32-bits Rdx when the condition A[I] > B[I] is false.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = -1;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -148,13 +162,26 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_false_update");
+					  "findlast_icmp_s32_false_update");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_false_update");
+					"findlast_fcmp_s32_false_update");
+  }
+
+  {
+    // Update 16-bits Rdx when the condition A[I] > B[I] is false.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ Rdx, /* FalseVal= */ I,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(ScalarFn, VectorFn,
+					  "findlast_icmp_s16_false_update");
   }
 
   {
-    // Find the last index with the start value TC.
+    // Find the last 32-bits index with the start value TC.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = TC;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -163,13 +190,26 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_start_TC");
+					  "findlast_icmp_s32_start_TC");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_start_TC");
+					"findlast_fcmp_s32_start_TC");
   }
 
   {
-    // Increment the induction variable by 2.
+    // Find the last 16-bits index with the start value TC.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = TC;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(ScalarFn, VectorFn,
+					  "findlast_icmp_s16_start_TC");
+  }
+
+  {
+    // Increment the 32-bits induction variable by 2.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = -1;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -178,13 +218,26 @@ int main(void) {
 	    INC_COND(/* Start= */ 0, /* Step= */ 2, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_inc_2");
+					  "findlast_icmp_s32_inc_2");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_inc_2");
+					"findlast_fcmp_s32_inc_2");
   }
 
   {
-    // Check with decreasing induction variable.
+    // Increment the 16-bits induction variable by 2.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 0, /* Step= */ 2, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(ScalarFn, VectorFn,
+					  "findlast_icmp_s16_inc_2");
+  }
+
+  {
+    // Check with decreasing 32-bits induction variable.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = -1;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -193,13 +246,26 @@ int main(void) {
 	    DEC_COND(/* End= */ 0, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(
-        ScalarFn, VectorFn, "findlast_icmp_start_decreasing_induction");
+        ScalarFn, VectorFn, "findlast_icmp_s32_start_decreasing_induction");
     checkVectorFunction<int32_t, float>(
-        ScalarFn, VectorFn, "findlast_fcmp_start_decreasing_induction");
+        ScalarFn, VectorFn, "findlast_fcmp_s32_start_decreasing_induction");
+  }
+
+  {
+    // Check with decreasing 16-bits induction variable.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    DEC_COND(/* End= */ 0, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(
+        ScalarFn, VectorFn, "findlast_icmp_s16_start_decreasing_induction");
   }
 
   {
-    // Check with the induction variable starts from 3.
+    // Check with 32-bits the induction variable starts from 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = -1;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -208,13 +274,26 @@ int main(void) {
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
     checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_iv_start_3");
+					  "findlast_icmp_s32_iv_start_3");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_iv_start_3");
+					"findlast_fcmp_s32_iv_start_3");
   }
 
   {
-    // Check with start value of 3 and induction variable starts at 3.
+    // Check with 16-bits the induction variable starts from 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = -1;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(ScalarFn, VectorFn,
+					  "findlast_icmp_s16_iv_start_3");
+  }
+
+  {
+    // Check with start value of 3 and 32-bits induction variable starts at 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = 3;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -222,14 +301,27 @@ int main(void) {
 	    /* ForCond= */
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
-    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_start_3_iv_start_3");
+    checkVectorFunction<int32_t, int32_t>(
+	ScalarFn, VectorFn, "findlast_icmp_s32_start_3_iv_start_3");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_start_3_iv_start_3");
+					"findlast_fcmp_s32_start_3_iv_start_3");
   }
 
   {
-    // Check with start value of 2 and induction variable starts at 3.
+    // Check with start value of 3 and 16-bits induction variable starts at 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = 3;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(
+	ScalarFn, VectorFn, "findlast_icmp_s16_start_3_iv_start_3");
+  }
+
+  {
+    // Check with start value of 2 and 32-bits induction variable starts at 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = 2;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -237,14 +329,27 @@ int main(void) {
 	    /* ForCond= */
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
-    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_start_2_iv_start_3");
+    checkVectorFunction<int32_t, int32_t>(
+	ScalarFn, VectorFn, "findlast_icmp_s32_start_2_iv_start_3");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_start_2_iv_start_3");
+					"findlast_fcmp_s32_start_2_iv_start_3");
   }
 
   {
-    // Check with start value of 4 and induction variable starts at 3.
+    // Check with start value of 2 and 16-bits induction variable starts at 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = 2;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(
+	ScalarFn, VectorFn, "findlast_icmp_s16_start_2_iv_start_3");
+  }
+
+  {
+    // Check with start value of 4 and 32-bits induction variable starts at 3.
     DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
 	int32_t Rdx = 4;,
 	DEFINE_FINDLAST_LOOP_BODY(
@@ -252,10 +357,23 @@ int main(void) {
 	    /* ForCond= */
 	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int32_t)),
 	int32_t);
-    checkVectorFunction<int32_t, int32_t>(ScalarFn, VectorFn,
-					  "findlast_icmp_start_4_iv_start_3");
+    checkVectorFunction<int32_t, int32_t>(
+	ScalarFn, VectorFn, "findlast_icmp_s32_start_4_iv_start_3");
     checkVectorFunction<int32_t, float>(ScalarFn, VectorFn,
-					"findlast_fcmp_start_4_iv_start_3");
+					"findlast_fcmp_s32_start_4_iv_start_3");
+  }
+
+  {
+    // Check with start value of 4 and 16-bits induction variable starts at 3.
+    DEFINE_SCALAR_AND_VECTOR_FN2_TYPE(
+	int16_t Rdx = 4;,
+	DEFINE_FINDLAST_LOOP_BODY(
+	    /* TrueVal= */ I, /* FalseVal= */ Rdx,
+	    /* ForCond= */
+	    INC_COND(/* Start= */ 3, /* Step= */ 1, /* RetTy= */ int16_t)),
+	int16_t);
+    checkVectorFunction<int16_t, int16_t>(
+	ScalarFn, VectorFn, "findlast_icmp_s16_start_4_iv_start_3");
   }
 
   return 0;
diff --git a/SingleSource/UnitTests/Vectorizer/find-last.reference_output b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
index e37d337343..6b5cf100cf 100644
--- a/SingleSource/UnitTests/Vectorizer/find-last.reference_output
+++ b/SingleSource/UnitTests/Vectorizer/find-last.reference_output
@@ -1,19 +1,28 @@
-Checking findlast_icmp_true_update
-Checking findlast_fcmp_true_update
-Checking findlast_icmp_false_update
-Checking findlast_fcmp_false_update
-Checking findlast_icmp_start_TC
-Checking findlast_fcmp_start_TC
-Checking findlast_icmp_inc_2
-Checking findlast_fcmp_inc_2
-Checking findlast_icmp_start_decreasing_induction
-Checking findlast_fcmp_start_decreasing_induction
-Checking findlast_icmp_iv_start_3
-Checking findlast_fcmp_iv_start_3
-Checking findlast_icmp_start_3_iv_start_3
-Checking findlast_fcmp_start_3_iv_start_3
-Checking findlast_icmp_start_2_iv_start_3
-Checking findlast_fcmp_start_2_iv_start_3
-Checking findlast_icmp_start_4_iv_start_3
-Checking findlast_fcmp_start_4_iv_start_3
+Checking findlast_icmp_s32_true_update
+Checking findlast_fcmp_s32_true_update
+Checking findlast_icmp_s16_true_update
+Checking findlast_icmp_s32_false_update
+Checking findlast_fcmp_s32_false_update
+Checking findlast_icmp_s16_false_update
+Checking findlast_icmp_s32_start_TC
+Checking findlast_fcmp_s32_start_TC
+Checking findlast_icmp_s16_start_TC
+Checking findlast_icmp_s32_inc_2
+Checking findlast_fcmp_s32_inc_2
+Checking findlast_icmp_s16_inc_2
+Checking findlast_icmp_s32_start_decreasing_induction
+Checking findlast_fcmp_s32_start_decreasing_induction
+Checking findlast_icmp_s16_start_decreasing_induction
+Checking findlast_icmp_s32_iv_start_3
+Checking findlast_fcmp_s32_iv_start_3
+Checking findlast_icmp_s16_iv_start_3
+Checking findlast_icmp_s32_start_3_iv_start_3
+Checking findlast_fcmp_s32_start_3_iv_start_3
+Checking findlast_icmp_s16_start_3_iv_start_3
+Checking findlast_icmp_s32_start_2_iv_start_3
+Checking findlast_fcmp_s32_start_2_iv_start_3
+Checking findlast_icmp_s16_start_2_iv_start_3
+Checking findlast_icmp_s32_start_4_iv_start_3
+Checking findlast_fcmp_s32_start_4_iv_start_3
+Checking findlast_icmp_s16_start_4_iv_start_3
 exit 0