llvm · bcardosolopes · Jan 7, 2025 · Jan 3, 2025
@@ -2351,6 +2351,26 @@ emitCommonNeonCallPattern0(CIRGenFunction &cgf, llvm::StringRef intrincsName,
   return builder.createBitcast(res, resultType);
 }
 
+/// The function `emitCommonNeonVecAcrossCall` implements a common way
+/// to implement neon intrinsic which has the following pattern:
+///  1. There is only one argument which is of vector type
+///  2. The result of the neon intrinsic is the element type of the input.
+/// This type of intrinsic usually is for across operations of the input vector.
+
+static mlir::Value emitCommonNeonVecAcrossCall(CIRGenFunction &cgf,
+                                               llvm::StringRef intrincsName,
+                                               mlir::Type eltTy,
+                                               unsigned vecLen,
+                                               const clang::CallExpr *e) {
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  mlir::Value op = cgf.emitScalarExpr(e->getArg(0));
+  cir::VectorType vTy =
+      cir::VectorType::get(&cgf.getMLIRContext(), eltTy, vecLen);
+  llvm::SmallVector<mlir::Value, 1> args{op};
+  return emitNeonCall(builder, {vTy}, args, intrincsName, eltTy,
+                      cgf.getLoc(e->getExprLoc()));
+}
+
 mlir::Value CIRGenFunction::emitCommonNeonBuiltinExpr(
     unsigned builtinID, unsigned llvmIntrinsic, unsigned altLLVMIntrinsic,
     const char *nameHint, unsigned modifier, const CallExpr *e,
@@ -4246,25 +4266,29 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E,
     llvm_unreachable("NEON::BI__builtin_neon_vaddvq_s16 NYI");
   }
   case NEON::BI__builtin_neon_vmaxv_u8: {
-    llvm_unreachable("NEON::BI__builtin_neon_vmaxv_u8 NYI");
+    return emitCommonNeonVecAcrossCall(*this, "aarch64.neon.umaxv", UInt8Ty, 8,
+                                       E);
   }
   case NEON::BI__builtin_neon_vmaxv_u16: {
     llvm_unreachable("NEON::BI__builtin_neon_vmaxv_u16 NYI");
   }
   case NEON::BI__builtin_neon_vmaxvq_u8: {
-    llvm_unreachable("NEON::BI__builtin_neon_vmaxvq_u8 NYI");
+    return emitCommonNeonVecAcrossCall(*this, "aarch64.neon.umaxv", UInt8Ty, 16,
+                                       E);
   }
   case NEON::BI__builtin_neon_vmaxvq_u16: {
     llvm_unreachable("NEON::BI__builtin_neon_vmaxvq_u16 NYI");
   }
   case NEON::BI__builtin_neon_vmaxv_s8: {
-    llvm_unreachable("NEON::BI__builtin_neon_vmaxv_s8 NYI");
+    return emitCommonNeonVecAcrossCall(*this, "aarch64.neon.smaxv", SInt8Ty, 8,
+                                       E);
   }
   case NEON::BI__builtin_neon_vmaxv_s16: {
     llvm_unreachable("NEON::BI__builtin_neon_vmaxv_s16 NYI");
   }
   case NEON::BI__builtin_neon_vmaxvq_s8: {
-    llvm_unreachable("NEON::BI__builtin_neon_vmaxvq_s8 NYI");
+    return emitCommonNeonVecAcrossCall(*this, "aarch64.neon.smaxv", SInt8Ty, 16,
+                                       E);
   }
   case NEON::BI__builtin_neon_vmaxvq_s16: {
     llvm_unreachable("NEON::BI__builtin_neon_vmaxvq_s16 NYI");

diff --git a/clang/test/CIR/CodeGen/AArch64/neon-misc.c b/clang/test/CIR/CodeGen/AArch64/neon-misc.c
@@ -1727,3 +1727,51 @@ uint64_t test_vaddlvq_u32(uint32x4_t a) {
   // LLVM-NEXT:    [[VADDLVQ_U32_I:%.*]] = call i64 @llvm.aarch64.neon.uaddlv.i64.v4i32(<4 x i32> [[A]])
   // LLVM-NEXT:    ret i64 [[VADDLVQ_U32_I]]
 }
+
+int8_t test_vmaxv_s8(int8x8_t a) {
+  return vmaxv_s8(a);
+
+  // CIR-LABEL: vmaxv_s8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.smaxv" {{%.*}} : (!cir.vector<!s8i x 8>) -> !s8i
+
+  // LLVM-LABEL: @test_vmaxv_s8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.smaxv.i8.v8i8(<8 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}
+
+int8_t test_vmaxv_u8(uint8x8_t a) {
+  return vmaxv_u8(a);
+
+  // CIR-LABEL: vmaxv_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.umaxv" {{%.*}} : (!cir.vector<!u8i x 8>) -> !u8i
+
+  // LLVM-LABEL: @test_vmaxv_u8
+  // LLVM-SAME: (<8 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.umaxv.i8.v8i8(<8 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}
+
+int8_t test_vmaxvq_s8(int8x16_t a) {
+  return vmaxvq_s8(a);
+
+  // CIR-LABEL: vmaxvq_s8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.smaxv" {{%.*}} : (!cir.vector<!s8i x 16>) -> !s8i
+
+  // LLVM-LABEL: @test_vmaxvq_s8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.smaxv.i8.v16i8(<16 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}
+
+int8_t test_vmaxvq_u8(uint8x16_t a) {
+  return vmaxvq_u8(a);
+
+  // CIR-LABEL: vmaxvq_u8
+  // CIR: cir.llvm.intrinsic "aarch64.neon.umaxv" {{%.*}} : (!cir.vector<!u8i x 16>) -> !u8i
+
+  // LLVM-LABEL: @test_vmaxvq_u8
+  // LLVM-SAME: (<16 x i8> [[a:%.*]])
+  // LLVM: [[res:%.*]] = call i8 @llvm.aarch64.neon.umaxv.i8.v16i8(<16 x i8> [[a]])
+  // LLVM: ret i8 [[res]]
+}