Skip to content

Commit

Permalink
[AMD] NFC: Remove unused conversion functions (#5693)
Browse files Browse the repository at this point in the history
  • Loading branch information
antiagainst authored Jan 24, 2025
1 parent d9fd9c5 commit ad16e3d
Showing 1 changed file with 0 additions and 121 deletions.
121 changes: 0 additions & 121 deletions third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -853,127 +853,6 @@ static ConverterT Fp16_to_Fp8E4M3FNUZ(AMD::ISAFamily isaFamily) {
: Fp16_to_Fp8E4M3FNUZ_SW;
}

// WARN: subnormal (0bs0000xxx) are not handled
static SmallVector<Value> Fp8E4M3_to_Bf16(Location loc,
ConversionPatternRewriter &rewriter,
const SmallVector<Value> &v) {
auto fp8x4VecTy = vec_ty(i8_ty, 4);
Value a0 = undef(fp8x4VecTy);
a0 = insert_element(fp8x4VecTy, a0, int_val(8, 0), i32_val(0));
a0 = insert_element(fp8x4VecTy, a0, v[0], i32_val(1));
a0 = insert_element(fp8x4VecTy, a0, int_val(8, 0), i32_val(2));
a0 = insert_element(fp8x4VecTy, a0, v[1], i32_val(3));
a0 = bitcast(a0, i32_ty);

Value a1 = undef(fp8x4VecTy);
a1 = insert_element(fp8x4VecTy, a1, int_val(8, 0), i32_val(0));
a1 = insert_element(fp8x4VecTy, a1, v[2], i32_val(1));
a1 = insert_element(fp8x4VecTy, a1, int_val(8, 0), i32_val(2));
a1 = insert_element(fp8x4VecTy, a1, v[3], i32_val(3));
a1 = bitcast(a1, i32_ty);

Value b0 = and_(i32_ty, a0, i32_val(0x7fff7fff));
Value b1 = and_(i32_ty, a1, i32_val(0x7fff7fff));
b0 = lshr(i32_ty, b0, i32_val(4));
b1 = lshr(i32_ty, b1, i32_val(4));

b0 = add(i32_ty, b0, i32_val(0x3c003c00));
b1 = add(i32_ty, b1, i32_val(0x3c003c00));
Value sign0 = and_(i32_ty, a0, i32_val(0x80008000));
Value sign1 = and_(i32_ty, a1, i32_val(0x80008000));

auto bf16x2VecTy = vec_ty(bf16_ty, 2);
Value bf16x2Vec0 = or_(i32_ty, sign0, b0);
Value bf16x2Vec1 = or_(i32_ty, sign1, b1);
bf16x2Vec0 = bitcast(bf16x2Vec0, bf16x2VecTy);
bf16x2Vec1 = bitcast(bf16x2Vec1, bf16x2VecTy);

return {extract_element(bf16_ty, bf16x2Vec0, i32_val(0)),
extract_element(bf16_ty, bf16x2Vec0, i32_val(1)),
extract_element(bf16_ty, bf16x2Vec1, i32_val(0)),
extract_element(bf16_ty, bf16x2Vec1, i32_val(1))};
}

static SmallVector<Value> Bf16_to_Fp8E4M3(Location loc,
ConversionPatternRewriter &rewriter,
const SmallVector<Value> &v) {
auto bf16x2VecTy = vec_ty(bf16_ty, 2);
Value bf16x2Vec0 = undef(bf16x2VecTy);
Value bf16x2Vec1 = undef(bf16x2VecTy);
bf16x2Vec0 = insert_element(bf16x2VecTy, bf16x2Vec0, v[0], i32_val(0));
bf16x2Vec0 = insert_element(bf16x2VecTy, bf16x2Vec0, v[1], i32_val(1));
bf16x2Vec1 = insert_element(bf16x2VecTy, bf16x2Vec1, v[2], i32_val(0));
bf16x2Vec1 = insert_element(bf16x2VecTy, bf16x2Vec1, v[3], i32_val(1));
bf16x2Vec0 = bitcast(bf16x2Vec0, i32_ty);
bf16x2Vec1 = bitcast(bf16x2Vec1, i32_ty);

Value sign0 = and_(i32_ty, bf16x2Vec0, i32_val(0x80008000));
Value sign1 = and_(i32_ty, bf16x2Vec1, i32_val(0x80008000));
auto fp8x4VecTy = vec_ty(i8_ty, 4);
Value sign = undef(fp8x4VecTy);
sign0 = bitcast(sign0, fp8x4VecTy);
sign1 = bitcast(sign1, fp8x4VecTy);
sign = insert_element(fp8x4VecTy, sign,
extract_element(i8_ty, sign0, i32_val(1)), i32_val(0));
sign = insert_element(fp8x4VecTy, sign,
extract_element(i8_ty, sign0, i32_val(3)), i32_val(1));
sign = insert_element(fp8x4VecTy, sign,
extract_element(i8_ty, sign1, i32_val(1)), i32_val(2));
sign = insert_element(fp8x4VecTy, sign,
extract_element(i8_ty, sign1, i32_val(3)), i32_val(3));
sign = bitcast(sign, i32_ty);

Value nosign0 = and_(i32_ty, bf16x2Vec0, i32_val(0x7fff7fff));
Value nosign1 = and_(i32_ty, bf16x2Vec1, i32_val(0x7fff7fff));

Value nosign_0_0 = and_(i32_ty, nosign0, i32_val(0xffff0000));
nosign_0_0 = umax(i32_ty, nosign_0_0, i32_val(0x3c000000));
nosign_0_0 = umin(i32_ty, nosign_0_0, i32_val(0x43f00000));
Value nosign_0_1 = and_(i32_ty, nosign0, i32_val(0x0000ffff));
nosign_0_1 = umax(i32_ty, nosign_0_1, i32_val(0x3c00));
nosign_0_1 = umin(i32_ty, nosign_0_1, i32_val(0x43f0));
nosign0 = or_(i32_ty, nosign_0_0, nosign_0_1);

Value nosign_1_0 = and_(i32_ty, nosign1, i32_val(0xffff0000));
nosign_1_0 = umax(i32_ty, nosign_1_0, i32_val(0x3c000000));
nosign_1_0 = umin(i32_ty, nosign_1_0, i32_val(0x43f00000));
Value nosign_1_1 = and_(i32_ty, nosign1, i32_val(0x0000ffff));
nosign_1_1 = umax(i32_ty, nosign_1_1, i32_val(0x3c00));
nosign_1_1 = umin(i32_ty, nosign_1_1, i32_val(0x43f0));
nosign1 = or_(i32_ty, nosign_1_0, nosign_1_1);

nosign0 = add(i32_ty, nosign0, i32_val(0x80008));
nosign1 = add(i32_ty, nosign1, i32_val(0x80008));
nosign0 = sub(i32_ty, nosign0, i32_val(0x3c003c00));
nosign1 = sub(i32_ty, nosign1, i32_val(0x3c003c00));
nosign0 = lshr(i32_ty, nosign0, i32_val(4));
nosign1 = lshr(i32_ty, nosign1, i32_val(4));

nosign0 = bitcast(nosign0, fp8x4VecTy);
nosign1 = bitcast(nosign1, fp8x4VecTy);
Value nosign = undef(fp8x4VecTy);
nosign =
insert_element(fp8x4VecTy, nosign,
extract_element(i8_ty, nosign0, i32_val(0)), i32_val(0));
nosign =
insert_element(fp8x4VecTy, nosign,
extract_element(i8_ty, nosign0, i32_val(2)), i32_val(1));
nosign =
insert_element(fp8x4VecTy, nosign,
extract_element(i8_ty, nosign1, i32_val(0)), i32_val(2));
nosign =
insert_element(fp8x4VecTy, nosign,
extract_element(i8_ty, nosign1, i32_val(2)), i32_val(3));
nosign = bitcast(nosign, i32_ty);

Value fp8x4Vec = or_(i32_ty, nosign, sign);
fp8x4Vec = bitcast(fp8x4Vec, fp8x4VecTy);
return {extract_element(i8_ty, fp8x4Vec, i32_val(0)),
extract_element(i8_ty, fp8x4Vec, i32_val(1)),
extract_element(i8_ty, fp8x4Vec, i32_val(2)),
extract_element(i8_ty, fp8x4Vec, i32_val(3))};
}

template <typename SourceOp, typename DestOp>
struct ElementwiseOpConversion
: public ElementwiseOpConversionBase<
Expand Down

0 comments on commit ad16e3d

Please sign in to comment.