From 36e75933d06623ab3e3ead5f821414fc5e2df719 Mon Sep 17 00:00:00 2001 From: Nirvedh Meshram <96096277+nirvedhmeshram@users.noreply.github.com> Date: Thu, 30 Jan 2025 10:20:14 -0600 Subject: [PATCH] [GPU] Allow vectorization for dynamic shapes with inner static dims (#19850) For the Tile and Fuse vectorize pipeline config setup, needing all dims static for vectorization seems unnecessary as dynamaic dims will get tiled to 1 anyway due to other checks that skip dynamic dims. Now, if the innermost dim is static we will end up vectorizing. Fixes : https://github.com/iree-org/iree/issues/19843 --------- Signed-off-by: Nirvedh Meshram --- .../Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp | 3 +-- .../Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir | 8 ++++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp index d90e18b90d36..3c06d851360b 100644 --- a/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp +++ b/compiler/src/iree/compiler/Codegen/Dialect/GPU/TargetUtils/ConfigUtils.cpp @@ -559,11 +559,10 @@ LogicalResult setTileAndFuseLoweringConfig(IREE::GPU::TargetAttr target, [](AffineMap map) { return map.isProjectedPermutation(); }); bool powTwo = llvm::all_of(linalgOp->getOperands(), elementHasPowerOfTwoBitwidth); - bool staticShape = llvm::none_of(loopBounds, ShapedType::isDynamic); // Require all affine maps to be projected permutation so that we can // generate vector transfer ops. - bool vectorizable = projPerm && powTwo && staticShape; + bool vectorizable = projPerm && powTwo; const unsigned minBitwidth = getMinElementBitwidth(linalgOp); // Make sure we use a tile size that results in some integral number of bytes. diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir index 46243914bdd2..fde9e940c977 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/test/ROCDL/config_tile_and_fuse.mlir @@ -179,8 +179,8 @@ module { // CHECK: linalg.matmul {{.*}}lowering_config = #iree_gpu.lowering_config // CHECK-SAME: promote_operands = [0, 1] // CHECK-SAME: reduction = [0, 0, 4] -// CHECK-SAME: thread = [1, 1, 0] -// CHECK-SAME: workgroup = [1, 64, 0] +// CHECK-SAME: thread = [1, 4, 0] +// CHECK-SAME: workgroup = [1, 256, 0] // ----- @@ -198,8 +198,8 @@ module { // CHECK-LABEL: func.func @elementwise_dynamic_dim // CHECK-SAME: #iree_codegen.translation_info // CHECK: linalg.add {{.*}}lowering_config = #iree_gpu.lowering_config -// CHECK-SAME: thread = [1, 1] -// CHECK-SAME: workgroup = [1, 64] +// CHECK-SAME: thread = [1, 4] +// CHECK-SAME: workgroup = [1, 256] // -----