From 164e9acb3cd7d0a6c8da047af1a23a95710e6202 Mon Sep 17 00:00:00 2001 From: lezcano Date: Tue, 11 Feb 2025 22:44:21 +0000 Subject: [PATCH] Clamp vectorisation to avoid computing the padding with unrealistic vectorisation lenghts --- lib/Analysis/Allocation.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/lib/Analysis/Allocation.cpp b/lib/Analysis/Allocation.cpp index 7024711a6434a..86bd4cac4a1d0 100644 --- a/lib/Analysis/Allocation.cpp +++ b/lib/Analysis/Allocation.cpp @@ -130,6 +130,16 @@ ScratchConfig getScratchConfigForCvt(RankedTensorType srcTy, unsigned contiguousShapeDim = scratchConfig.repShape[scratchConfig.order[0]]; scratchConfig.inVec = std::min(scratchConfig.inVec, contiguousShapeDim); scratchConfig.outVec = std::min(scratchConfig.outVec, contiguousShapeDim); + // Clamp the vector length to 128 / element bitwidth as this is the max + // vectorisation + auto inBitWidth = isa(srcTy.getElementType()) + ? kPtrBitWidth + : srcTy.getElementTypeBitWidth(); + auto outBitWidth = isa(dstTy.getElementType()) + ? kPtrBitWidth + : dstTy.getElementTypeBitWidth(); + scratchConfig.inVec = std::min(scratchConfig.inVec, 128 / inBitWidth); + scratchConfig.outVec = std::min(scratchConfig.outVec, 128 / outBitWidth); // No padding is required if the tensor is 1-D, or if all dimensions except // the first accessed dimension have a size of 1.