llvm · andrewbriand · Jan 6, 2025
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -54500,10 +54500,22 @@ static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG,
 
   // If either operand is NaN, the 2nd source operand (Op0) is passed through.
   SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0);
-  SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO);
 
   // If Op0 is a NaN, select Op1. Otherwise, select the max. If both operands
   // are NaN, the NaN value of Op1 is the result.
+
+  // Manually lower f32 SSE Select to fix Bug 25959
+  if (!Subtarget.hasAVX() && VT == MVT::f32) {
+    unsigned SSECC = 3; // UNORD
+    SDValue IsOp0Nan = DAG.getNode(X86ISD::FSETCC, DL, VT, Op0, Op0,
+                                   DAG.getTargetConstant(SSECC, DL, MVT::i8));
+    SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, IsOp0Nan, MinOrMax);
+    SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Op1, IsOp0Nan);
+    return DAG.getNode(X86ISD::FOR, DL, VT, And, AndN);
+  }
+
+  SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO);
+
   return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax);
 }
 

diff --git a/llvm/test/CodeGen/X86/fmaxnum.ll b/llvm/test/CodeGen/X86/fmaxnum.ll
@@ -24,14 +24,12 @@ declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>)
 define float @test_fmaxf(float %x, float %y) {
 ; SSE-LABEL: test_fmaxf:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movaps %xmm0, %xmm2
-; SSE-NEXT:    cmpunordss %xmm0, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm3
-; SSE-NEXT:    andps %xmm1, %xmm3
-; SSE-NEXT:    maxss %xmm0, %xmm1
-; SSE-NEXT:    andnps %xmm1, %xmm2
-; SSE-NEXT:    orps %xmm3, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    movaps %xmm1, %xmm2
+; SSE-NEXT:    maxss %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fmaxf:
@@ -113,14 +111,12 @@ define x86_fp80 @test_fmaxl(x86_fp80 %x, x86_fp80 %y) {
 define float @test_intrinsic_fmaxf(float %x, float %y) {
 ; SSE-LABEL: test_intrinsic_fmaxf:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movaps %xmm0, %xmm2
-; SSE-NEXT:    cmpunordss %xmm0, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm3
-; SSE-NEXT:    andps %xmm1, %xmm3
-; SSE-NEXT:    maxss %xmm0, %xmm1
-; SSE-NEXT:    andnps %xmm1, %xmm2
-; SSE-NEXT:    orps %xmm3, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    movaps %xmm1, %xmm2
+; SSE-NEXT:    maxss %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test_intrinsic_fmaxf:

diff --git a/llvm/test/CodeGen/X86/fminnum.ll b/llvm/test/CodeGen/X86/fminnum.ll
@@ -24,14 +24,12 @@ declare <8 x double> @llvm.minnum.v8f64(<8 x double>, <8 x double>)
 define float @test_fminf(float %x, float %y) {
 ; SSE-LABEL: test_fminf:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movaps %xmm0, %xmm2
-; SSE-NEXT:    cmpunordss %xmm0, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm3
-; SSE-NEXT:    andps %xmm1, %xmm3
-; SSE-NEXT:    minss %xmm0, %xmm1
-; SSE-NEXT:    andnps %xmm1, %xmm2
-; SSE-NEXT:    orps %xmm3, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    movaps %xmm1, %xmm2
+; SSE-NEXT:    minss %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test_fminf:
@@ -113,14 +111,12 @@ define x86_fp80 @test_fminl(x86_fp80 %x, x86_fp80 %y) {
 define float @test_intrinsic_fminf(float %x, float %y) {
 ; SSE-LABEL: test_intrinsic_fminf:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movaps %xmm0, %xmm2
-; SSE-NEXT:    cmpunordss %xmm0, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm3
-; SSE-NEXT:    andps %xmm1, %xmm3
-; SSE-NEXT:    minss %xmm0, %xmm1
-; SSE-NEXT:    andnps %xmm1, %xmm2
-; SSE-NEXT:    orps %xmm3, %xmm2
-; SSE-NEXT:    movaps %xmm2, %xmm0
+; SSE-NEXT:    movaps %xmm1, %xmm2
+; SSE-NEXT:    minss %xmm0, %xmm2
+; SSE-NEXT:    cmpunordss %xmm0, %xmm0
+; SSE-NEXT:    andps %xmm0, %xmm1
+; SSE-NEXT:    andnps %xmm2, %xmm0
+; SSE-NEXT:    orps %xmm1, %xmm0
 ; SSE-NEXT:    retq
 ;
 ; AVX1-LABEL: test_intrinsic_fminf: