Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove (X, Y) coordinates from NpuDmaMemcpyNdOp #1971

Draft
wants to merge 20 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 5 additions & 7 deletions include/aie/Dialect/AIEX/IR/AIEX.td
Original file line number Diff line number Diff line change
Expand Up @@ -494,9 +494,9 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
let description = [{
An n-dimensional half DMA operator.

Programs a DMA on coordinates (`x`, `y`) to access a memory `memref` with an access
pattern specified by `offsets`, `sizes` and `strides` or `static_offsets`, `static_sizes`
and `static_strides`. The operator references the target channel through the `metadata`
Programs a DMA to access a memory `memref` with an access pattern specified by `offsets`,
`sizes` and `strides` or `static_offsets`, `static_sizes` and `static_strides`. The operator
references the target DMA coordinates (`x`, `y`) and channel through the `metadata`
symbol and specifies a descriptor `id` to be used, which will become the `bd_id` to be used
when lowered further. The `issue_token` attribute specifies whether the execution of this
operation should issue a token which can be received and read for synchronization purposes.
Expand Down Expand Up @@ -557,9 +557,7 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
}];

let arguments = (
ins I64Attr:$x,
I64Attr:$y,
AnyMemRef:$memref,
ins AnyMemRef:$memref,
// NOTE: these are in reverse order: offset3, offset2, ...
Variadic<I64>:$offsets,
Variadic<I64>:$sizes,
Expand All @@ -580,7 +578,7 @@ def AIE_NpuDmaMemcpyNdOp: AIEX_Op<"npu.dma_memcpy_nd", [
);

let assemblyFormat = [{
`(` $x `,` $y `,` $memref ``
`(` $memref ``
custom<DynamicIndexList>($offsets, $static_offsets) ``
custom<DynamicIndexList>($sizes, $static_sizes) ``
custom<DynamicIndexList>($strides, $static_strides) ``
Expand Down
17 changes: 0 additions & 17 deletions lib/Dialect/AIEX/IR/AIEXDialect.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,10 +370,6 @@ LogicalResult AIEX::NpuDmaMemcpyNdOp::verify() {
llvm::map_to_vector(llvm::reverse(getMixedStrides()), [](OpFoldResult s) {
return getConstantIntValue(s).value();
});
llvm::SmallVector<int64_t, 4> hardwareSizes(4);
llvm::SmallVector<int64_t, 4> hardwareStrides(4);
getHardwareStridesWraps(targetModel, buffer, inputSizes, inputStrides,
hardwareSizes, hardwareStrides);
int64_t offset = getOffsetInBytes();

// The experimental HSA target uses this op on AIE1, skip all the AIE2
Expand All @@ -385,19 +381,6 @@ LogicalResult AIEX::NpuDmaMemcpyNdOp::verify() {
return emitOpError("Offset must be 4-byte-aligned.");
}

// dma_memcpy_nd transfers of the form [1, 1, 1, len][0, 0, 0, 1] do not
// specify any data layout transformation, but simply express a contiguous
// transfer of `len`. For backwards compatibility, we allow this to proceed
// even if it exceeds the maximum stride/wrap size of any one dimension,
// and simply do not lower any data layout transformations, since there is
// no other way to express this at the dma_memcpy_nd interface otherwise.
bool skipTransformationChecks = isLinearTransferWithoutTransformation();
if (failed(verifyStridesWraps(*this, buffer, getX(), getY(), inputSizes,
inputStrides, hardwareSizes, hardwareStrides,
skipTransformationChecks))) {
return failure();
}

// packet header
if (auto packetInfo = getPacket()) {
if (packetInfo->getPktType() > 7)
Expand Down
2 changes: 1 addition & 1 deletion lib/Dialect/AIEX/Transforms/AIECtrlPacketToDma.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct AIECtrlPacketToDmaPass : AIECtrlPacketToDmaBase<AIECtrlPacketToDmaPass> {

StringRef metadata = builder.getStringAttr(shimDmaAllocName);
builder.create<NpuDmaMemcpyNdOp>(
builder.getUnknownLoc(), 0, 0, newBlockArg,
builder.getUnknownLoc(), newBlockArg, SmallVector<Value>{},
AndraBisca marked this conversation as resolved.
Show resolved Hide resolved
SmallVector<Value>{}, SmallVector<Value>{},
SmallVector<Value>{}, ArrayRef(staticOffsets),
ArrayRef(staticSizes), ArrayRef(staticStrides),
Expand Down
13 changes: 13 additions & 0 deletions lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,19 @@ struct DmaToNpuPattern : OpConversionPattern<NpuDmaMemcpyNdOp> {
// row
row = IntegerAttr::get(i32ty, 0);

// dma_memcpy_nd transfers of the form [1, 1, 1, len][0, 0, 0, 1] do not
// specify any data layout transformation, but simply express a contiguous
// transfer of `len`. For backwards compatibility, we allow this to proceed
// even if it exceeds the maximum stride/wrap size of any one dimension,
// and simply do not lower any data layout transformations, since there is
// no other way to express this at the dma_memcpy_nd interface otherwise.
bool skipTransformationChecks = op.isLinearTransferWithoutTransformation();
if (failed(verifyStridesWraps(op, bufferType, col, 0, inputSizes,
inputStrides, sizes, strides,
skipTransformationChecks))) {
return failure();
}

// arg_idx
AIEX::RuntimeSequenceOp seq_op =
op->getParentOfType<AIEX::RuntimeSequenceOp>();
Expand Down
4 changes: 0 additions & 4 deletions python/dialects/aiex.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,6 @@ def __init__(
strides: MixedValues | None = None,
issue_token: bool | None = None,
):
x = 0
y = 0
if tap and not (offsets is None and sizes is None and strides is None):
raise ValueError(
"NpuDmaMemcpyNd can take either a TileAccessPattern OR (sizes and/or strides and/or offsets), but not both."
Expand Down Expand Up @@ -92,8 +90,6 @@ def __init__(
if isinstance(metadata, ObjectFifoCreateOp):
metadata = metadata.sym_name.value
super().__init__(
x,
y,
mem,
dynamic_offsets,
dynamic_sizes,
Expand Down
4 changes: 2 additions & 2 deletions test/Conversion/DmaToNpu/aiert_insts.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ module {
%c8 = arith.constant 8 : i64
%c16 = arith.constant 16 : i64
%c32 = arith.constant 32 : i64
aiex.npu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c32][%c0,%c0,%c0, %c1]) { metadata = @of_toMem, id = 1 : i64, issue_token = true } : memref<64xi32>
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c2,%c0,%c0][%c1,%c2,%c2,%c8][%c0,%c16,%c8, %c1]) { metadata = @of_fromMem, id = 0 : i64, issue_token = false } : memref<4x2x8xi32>
aiex.npu.dma_memcpy_nd (%out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c32][%c0,%c0,%c0, %c1]) { metadata = @of_toMem, id = 1 : i64, issue_token = true } : memref<64xi32>
aiex.npu.dma_memcpy_nd (%in[%c0,%c2,%c0,%c0][%c1,%c2,%c2,%c8][%c0,%c16,%c8, %c1]) { metadata = @of_fromMem, id = 0 : i64, issue_token = false } : memref<4x2x8xi32>
}
aie.shim_dma_allocation @of_fromMem (MM2S, 0, 0)
aie.shim_dma_allocation @of_toMem (S2MM, 0, 0)
Expand Down
2 changes: 1 addition & 1 deletion test/Conversion/DmaToNpu/bad_dma_to_npu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module @shimDmaMemcpy{
aie.device(xcve2302) {
memref.global "public" @toMem : memref<1xbf16>
aiex.runtime_sequence(%arg0: memref<1xbf16>, %arg1: memref<1xbf16>, %arg2: memref<1xbf16>) {
aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][4, 4, 64, 64][0, 64, 256, 1]) {id = 0 : i64, metadata = @toMem} : memref<1xbf16>
aiex.npu.dma_memcpy_nd (%arg0[0, 0, 0, 0][4, 4, 64, 64][0, 64, 256, 1]) {id = 0 : i64, metadata = @toMem} : memref<1xbf16>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
}
aie.shim_dma_allocation @toMem (S2MM, 0, 0)
Expand Down
2 changes: 1 addition & 1 deletion test/Conversion/DmaToNpu/bad_dma_to_npu_datatype.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module @shimDmaMemcpy{
aie.device(xcve2302) {
memref.global "public" @toMem : memref<65536xi64>
aiex.runtime_sequence(%arg0: memref<65536xi64>, %arg1: memref<65536xi64>, %arg2: memref<65536xi64>) {
aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][4, 4, 64, 64][0, 64, 256, 1]) {id = 0 : i64, metadata = @toMem} : memref<65536xi64>
aiex.npu.dma_memcpy_nd (%arg0[0, 0, 0, 0][4, 4, 64, 64][0, 64, 256, 1]) {id = 0 : i64, metadata = @toMem} : memref<65536xi64>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
}
aie.shim_dma_allocation @toMem (S2MM, 0, 0)
Expand Down
4 changes: 2 additions & 2 deletions test/Conversion/DmaToNpu/dma_to_npu.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ module {
memref.global "public" @toMem : memref<16xi32>
memref.global "public" @fromMem : memref<16xi32>
aiex.runtime_sequence(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd (0, 1, %arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @fromMem, id = 0 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd (%arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64 } : memref<16xi32>
aiex.npu.dma_memcpy_nd (%arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @fromMem, id = 0 : i64 } : memref<16xi32>
}
aie.shim_dma_allocation @fromMem (MM2S, 0, 0)
aie.shim_dma_allocation @toMem (S2MM, 0, 0)
Expand Down
4 changes: 2 additions & 2 deletions test/Conversion/DmaToNpu/dma_to_npu_issue_token.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ module {
memref.global "public" @toMem : memref<16xi32>
memref.global "public" @fromMem : memref<16xi32>
aiex.runtime_sequence(%arg0: memref<16xi32>, %arg1: memref<16xi32>) {
aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64, issue_token = true } : memref<16xi32>
aiex.npu.dma_memcpy_nd (0, 1, %arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @fromMem, id = 0 : i64, issue_token = false } : memref<16xi32>
aiex.npu.dma_memcpy_nd (%arg0[0, 0, 0, 0][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @toMem, id = 1 : i64, issue_token = true } : memref<16xi32>
aiex.npu.dma_memcpy_nd (%arg1[0, 0, 0, 16][1, 1, 16, 16][0, 0, 64, 1]) { metadata = @fromMem, id = 0 : i64, issue_token = false } : memref<16xi32>
}
aie.shim_dma_allocation @fromMem (MM2S, 0, 0)
aie.shim_dma_allocation @toMem (S2MM, 0, 0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ module @shimDmaMemcpy{
aie.device(xcve2302) {
memref.global "public" @toMem : memref<65536xbf16>
aiex.runtime_sequence(%arg0: memref<65536xbf16>, %arg1: memref<65536xbf16>, %arg2: memref<65536xbf16>) {
aiex.npu.dma_memcpy_nd (2, 0, %arg0[0, 0, 0, 0][4, 4, 64, 64][0, 64, 256, 1]) {id = 0 : i64, metadata = @toMem} : memref<65536xbf16>
aiex.npu.dma_memcpy_nd (%arg0[0, 0, 0, 0][4, 4, 64, 64][0, 64, 256, 1]) {id = 0 : i64, metadata = @toMem} : memref<65536xbf16>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
}
aie.shim_dma_allocation @toMem (S2MM, 0, 2)
Expand Down
4 changes: 2 additions & 2 deletions test/Targets/AIETargetHSA/input_with_addresses.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ module {
aie.shim_dma_allocation @out0(S2MM, 0, 6)

aiex.runtime_sequence(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) {
aiex.npu.dma_memcpy_nd (0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 0 : i64, metadata = @out0} : memref<64xi32>
aiex.npu.dma_memcpy_nd (0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 1 : i64, metadata = @in0} : memref<64xi32>
aiex.npu.dma_memcpy_nd (%arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 0 : i64, metadata = @out0} : memref<64xi32>
aiex.npu.dma_memcpy_nd (%arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0, 1]) {id = 1 : i64, metadata = @in0} : memref<64xi32>
aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32}
}
}
Expand Down
2 changes: 1 addition & 1 deletion test/Targets/NPU/npu_dma_memcpy.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ module {
aie.shim_dma_allocation @airMemcpyId12(MM2S, 0, 0)
memref.global "public" @airMemcpyId12 : memref<1x2x1x32x32xi32, 1 : i32>
aiex.runtime_sequence (%arg0: memref<2x64x64xi32>, %arg1: memref<2x64x64xi32>, %arg2: memref<2x64x64xi32>) {
aiex.npu.dma_memcpy_nd(0, 0, %arg0[1, 0, 0, 0][1, 2, 32, 32][4096, 2048, 64, 1]) {id = 0 : i64, metadata = @airMemcpyId12} : memref<2x64x64xi32>
aiex.npu.dma_memcpy_nd(%arg0[1, 0, 0, 0][1, 2, 32, 32][4096, 2048, 64, 1]) {id = 0 : i64, metadata = @airMemcpyId12} : memref<2x64x64xi32>
}
}
}
32 changes: 16 additions & 16 deletions test/dialect/AIEX/bad_npu_nd.mlir
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ module {
%c1920 = arith.constant 1920 : i64
%c1080 = arith.constant 1080 : i64
// expected-error@+1 {{Size 0 exceeds the [0:1023] range}}
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1080,%c1920][%c0,%c0,%c1920,%c1]) { metadata = @of_fromMem, id = 0 : i64 } : memref<1920x1080xi32>
aiex.npu.dma_memcpy_nd (%in[%c0,%c0,%c0,%c0][%c1,%c1,%c1080,%c1920][%c0,%c0,%c1920,%c1]) { metadata = @of_fromMem, id = 0 : i64 } : memref<1920x1080xi32>
}
aie.shim_dma_allocation @of_fromMem (MM2S, 0, 0)
}
Expand All @@ -39,7 +39,7 @@ module {
%c32 = arith.constant 32 : i64
%c128 = arith.constant 128 : i64
// expected-error@+1 {{Size 3 exceeds the [1:64] range}}
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c128,%c2,%c2,%c8][%c0,%c16,%c8,%c1]) { metadata = @of_fromMem, id = 0 : i64 } : memref<128x4x2x8xi32>
aiex.npu.dma_memcpy_nd (%in[%c0,%c0,%c0,%c0][%c128,%c2,%c2,%c8][%c0,%c16,%c8,%c1]) { metadata = @of_fromMem, id = 0 : i64 } : memref<128x4x2x8xi32>
}
aie.shim_dma_allocation @of_fromMem (MM2S, 0, 0)
}
Expand All @@ -55,7 +55,7 @@ module {
%c2 = arith.constant 2 : i64
%c2097152 = arith.constant 2097152 : i64
// expected-error@+1 {{Stride 1 exceeds the [1:1048576] range}}
aiex.npu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c2,%c2][%c0,%c0,%c2097152,%c1]) { metadata = @of_fromMem, id = 0 : i64 } : memref<8388608xi32>
aiex.npu.dma_memcpy_nd (%in[%c0,%c0,%c0,%c0][%c1,%c1,%c2,%c2][%c0,%c0,%c2097152,%c1]) { metadata = @of_fromMem, id = 0 : i64 } : memref<8388608xi32>
}
aie.shim_dma_allocation @of_fromMem (MM2S, 0, 0)
}
Expand All @@ -73,7 +73,7 @@ module {
%c2 = arith.constant 2 : i64
%c8 = arith.constant 8 : i64
// expected-error@+1 {{Offset must be 4-byte-aligned}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c1][%c1,%c1,%c1,%c8][%c0,%c0,%c1,%c1]) { metadata = @fifo, id = 0 : i64 } : memref<8xi8>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c1][%c1,%c1,%c1,%c8][%c0,%c0,%c1,%c1]) { metadata = @fifo, id = 0 : i64 } : memref<8xi8>
}
aie.shim_dma_allocation @fifo (MM2S, 0, 0)
}
Expand All @@ -95,7 +95,7 @@ module {
%c2048 = arith.constant 2048 : i64
// Although 2048 exceeds the 0:1023 limit for size 0, since the elements are i8s,
// this should be a size of 512 in address granularity (4 bytes) and hence pass the test.
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c0][%c1,%c1,%c2,%c2048][%c0,%c0,%c4,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c0][%c1,%c1,%c2,%c2048][%c0,%c0,%c4,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -113,7 +113,7 @@ module {
%c8 = arith.constant 8 : i64
%c2048 = arith.constant 2048 : i64
// expected-error@+1 {{Size 0 exceeds the [0:1023] range}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c0][%c1,%c1,%c2,%c2048][%c0,%c0,%c4,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi16>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c0][%c1,%c1,%c2,%c2048][%c0,%c0,%c4,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi16>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -132,7 +132,7 @@ module {
%c2 = arith.constant 2 : i64 // Stride of 2 i8s = 2 bytes < 4 byte granularity, should not be possible
%c8 = arith.constant 8 : i64
// expected-error@+1 {{Stride 1 is 2 elements * 1 bytes = 2 bytes, which is not divisible by 4}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c8][%c0,%c0,%c2,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c8][%c0,%c0,%c2,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -149,7 +149,7 @@ module {
%c4 = arith.constant 4 : i64
%c8 = arith.constant 8 : i64
// expected-error@+1 {{2 elements at 1 bytes each equal 2 bytes, which is not divisible by 4}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c2][%c0,%c0,%c4,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c2][%c0,%c0,%c4,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -168,7 +168,7 @@ module {
%c4 = arith.constant 4 : i64
%c8 = arith.constant 8 : i64
// expected-error@+1 {{Stride 0 is 2 elements * 1 bytes = 2 bytes, which is not divisible by 4}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c8][%c0,%c0,%c0,%c2]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c8][%c0,%c0,%c0,%c2]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi8>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -186,7 +186,7 @@ module {
%c3 = arith.constant 3 : i64
%c8 = arith.constant 8 : i64
// expected-error@+1 {{3 elements at 2 bytes each equal 6 bytes, which is not divisible by 4}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c3][%c0,%c0,%c0,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi16>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c3][%c0,%c0,%c0,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi16>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -204,7 +204,7 @@ module {
%c4 = arith.constant 4 : i64
%c8 = arith.constant 8 : i64
// expected-error@+1 {{Unsupported tile type at (0, 0) Must be ShimNOC, Mem or Core.}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c4][%c0,%c0,%c0,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi16>
aiex.npu.dma_memcpy_nd (%a[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c4][%c0,%c0,%c0,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi16>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -223,9 +223,9 @@ module {
%c3 = arith.constant 3 : i64
%c8 = arith.constant 8 : i64
%c1572864 = arith.constant 1572864 : i64
aiex.npu.dma_memcpy_nd (0, 0, %a[%c1,%c0,%c0,%c0][%c1,%c1,%c1,%c2][%c1572864,%c0,%c0,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi32>
aiex.npu.dma_memcpy_nd (%a[%c1,%c0,%c0,%c0][%c1,%c1,%c1,%c2][%c1572864,%c0,%c0,%c1]) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi32>
// expected-error@+1 {{Stride 3 exceeds the [1:1048576] range.}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c1,%c0,%c0,%c0][%c2,%c1,%c1,%c2][%c1572864,%c0,%c0,%c1]) { metadata = @objectfifo, id = 1 : i64 } : memref<8xi32>
aiex.npu.dma_memcpy_nd (%a[%c1,%c0,%c0,%c0][%c2,%c1,%c1,%c2][%c1572864,%c0,%c0,%c1]) { metadata = @objectfifo, id = 1 : i64 } : memref<8xi32>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -243,9 +243,9 @@ module {
%c2 = arith.constant 2 : i64
%c3 = arith.constant 3 : i64
%c8 = arith.constant 8 : i64
aiex.npu.dma_memcpy_nd (0, 0, %a[%c1,%c0,%c0,%c0][%c1,%c1,%c1,%c2][%c0,%c0,%c0,%c1], packet = <pkt_id = 31, pkt_type = 7>) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi32>
aiex.npu.dma_memcpy_nd (%a[%c1,%c0,%c0,%c0][%c1,%c1,%c1,%c2][%c0,%c0,%c0,%c1], packet = <pkt_id = 31, pkt_type = 7>) { metadata = @objectfifo, id = 0 : i64 } : memref<8xi32>
// expected-error@+1 {{Packet ID field can only hold 5 bits.}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c1,%c0,%c0,%c0][%c2,%c1,%c1,%c2][%c0,%c0,%c0,%c1], packet = <pkt_id = 32, pkt_type = 2>) { metadata = @objectfifo, id = 1 : i64 } : memref<8xi32>
aiex.npu.dma_memcpy_nd (%a[%c1,%c0,%c0,%c0][%c2,%c1,%c1,%c2][%c0,%c0,%c0,%c1], packet = <pkt_id = 32, pkt_type = 2>) { metadata = @objectfifo, id = 1 : i64 } : memref<8xi32>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand All @@ -264,7 +264,7 @@ module {
%c3 = arith.constant 3 : i64
%c8 = arith.constant 8 : i64
// expected-error@+1 {{Packet type field can only hold 3 bits.}}
aiex.npu.dma_memcpy_nd (0, 0, %a[%c1,%c0,%c0,%c0][%c2,%c1,%c1,%c2][%c0,%c0,%c0,%c1], packet = <pkt_id = 2, pkt_type = 8>) { metadata = @objectfifo, id = 1 : i64 } : memref<8xi32>
aiex.npu.dma_memcpy_nd (%a[%c1,%c0,%c0,%c0][%c2,%c1,%c1,%c2][%c0,%c0,%c0,%c1], packet = <pkt_id = 2, pkt_type = 8>) { metadata = @objectfifo, id = 1 : i64 } : memref<8xi32>
}
aie.shim_dma_allocation @objectfifo (MM2S, 0, 0)
}
Expand Down
Loading
Loading