From 932eea6de7bd2ae765aa535b60404fafafa815ee Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Thu, 7 Mar 2024 06:08:54 -0700 Subject: [PATCH 01/13] Add generation of ipu sync --- lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp | 27 +++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp b/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp index 8c57a35343..8273050c67 100644 --- a/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp +++ b/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp @@ -332,6 +332,30 @@ struct DmaToIpuPattern : OpConversionPattern { } }; +void insertIpuSyncOpForResults(AIE::DeviceOp device) { + device.walk([&](mlir::func::FuncOp f) { + SmallVector dmas; + f.walk([&](AIEX::IpuDmaMemcpyNdOp dma) { dmas.push_back(dma); }); + for (auto dma : dmas) { + if (auto infoOp = getAllocOpForSymbol(device, dma.getMetadata())) { + if (infoOp->getChannelDir() == AIE::DMAChannelDir::S2MM) { + // Found dma op copying results to host + OpBuilder builder(dma); + auto col = builder.getI32IntegerAttr(infoOp->getCol()); + auto row = builder.getI32IntegerAttr(0); + auto dir = builder.getI32IntegerAttr(0); + auto chan = builder.getI32IntegerAttr(infoOp->getChannelIndex()); + auto col_num = builder.getI32IntegerAttr(1); + auto row_num = builder.getI32IntegerAttr(1); + builder.setInsertionPointAfter(dma); + builder.create(dma->getLoc(), col, row, dir, chan, + col_num, row_num); + } + } + } + }); +} + struct AIEDmaToIpuPass : AIEDmaToIpuBase { void runOnOperation() override { @@ -350,6 +374,9 @@ struct AIEDmaToIpuPass : AIEDmaToIpuBase { patterns.insert(&getContext()); patterns.insert(&getContext()); + // Insert sync op after copying data out to host + insertIpuSyncOpForResults(device); + if (failed(applyPartialConversion(device, target, std::move(patterns)))) signalPassFailure(); } From f22379805aad024a7eaf9187b61d52a3916cd736 Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Fri, 8 Mar 2024 07:46:54 -0700 Subject: [PATCH 02/13] Move ipu_sync generationto AIEDmaToIpu and update tests --- lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp | 4 ++- .../ipu-xrt/add_one_objFifo/aie2.py | 1 - .../ipu-xrt/log_hello_world/hello_world.py | 1 - .../ipu-xrt/matrix_multiplication/aie2.py | 1 - .../matrix_multiplication_array/aie2.py | 2 -- .../matrix_multiplication_column/aie2.py | 1 - .../matrix_vector_multiplication/aie2.py | 2 -- .../ipu-xrt/passthrough_hardware/aie2.py | 1 - .../ipu-xrt/vector_scalar/aie2.py | 1 - .../color_detect/aie2_colorDetect.py | 1 - .../color_threshold/aie2_colorThreshold.py | 1 - .../edge_detect/aie2_edgeDetect.py | 1 - .../vision_pipelines/passthrough/aie2.py | 1 - .../aie.mlir | 1 - test/ipu-xrt/add_314_using_dma_op/aie.mlir | 1 - test/ipu-xrt/add_one_objFifo/aie.mlir | 1 - test/ipu-xrt/add_one_using_dma/aie.mlir | 1 - test/ipu-xrt/cascade_flows/aie.mlir | 1 - ...dd_256_using_dma_op_no_double_buffering.py | 3 -- test/ipu-xrt/e2e/test_locks.py | 24 --------------- test/ipu-xrt/e2e/test_manual_dpu_args.py | 3 -- .../ipu-xrt/e2e/test_nonsquare_matrix_mult.py | 20 ------------- .../test_nonsquare_matrix_mult_vectorized.py | 20 ------------- .../ipu-xrt/e2e/test_offsets_sizes_strides.py | 5 ---- test/ipu-xrt/e2e/test_repeat_count.py | 20 ------------- .../e2e/test_shared_buffers_init_value.py | 8 ----- test/ipu-xrt/e2e/test_square_matrix_mult.py | 20 ------------- .../e2e/test_square_matrix_mult_vectorized.py | 20 ------------- test/ipu-xrt/e2e/test_tiled_matrix_add.py | 10 ------- ...iled_nonsquare_spatial_tile_matrix_mult.py | 3 -- .../test_tiled_nonsquare_tile_matrix_mult.py | 10 ------- ...d_nonsquare_tile_matrix_mult_vectorized.py | 30 ------------------- test/ipu-xrt/e2e/test_tiled_vec_add.py | 20 ------------- .../e2e/test_tiled_vec_add_vectorized.py | 20 ------------- test/ipu-xrt/e2e/test_vec_dot.py | 20 ------------- .../matrix_multiplication_using_dma/aie.mlir | 1 - test/ipu-xrt/two_col/aie.mlir | 1 - test/ipu-xrt/vector_scalar_using_dma/aie.mlir | 1 - 38 files changed, 3 insertions(+), 279 deletions(-) diff --git a/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp b/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp index 8273050c67..3a775175ef 100644 --- a/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp +++ b/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp @@ -335,6 +335,8 @@ struct DmaToIpuPattern : OpConversionPattern { void insertIpuSyncOpForResults(AIE::DeviceOp device) { device.walk([&](mlir::func::FuncOp f) { SmallVector dmas; + Operation* returnOp = nullptr; + f.walk([&](mlir::func::ReturnOp op) { returnOp = op.getOperation(); }); f.walk([&](AIEX::IpuDmaMemcpyNdOp dma) { dmas.push_back(dma); }); for (auto dma : dmas) { if (auto infoOp = getAllocOpForSymbol(device, dma.getMetadata())) { @@ -347,7 +349,7 @@ void insertIpuSyncOpForResults(AIE::DeviceOp device) { auto chan = builder.getI32IntegerAttr(infoOp->getChannelIndex()); auto col_num = builder.getI32IntegerAttr(1); auto row_num = builder.getI32IntegerAttr(1); - builder.setInsertionPointAfter(dma); + builder.setInsertionPoint(returnOp); builder.create(dma->getLoc(), col, row, dir, chan, col_num, row_num); } diff --git a/reference_designs/ipu-xrt/add_one_objFifo/aie2.py b/reference_designs/ipu-xrt/add_one_objFifo/aie2.py index dc6d37ff96..843d4fe1e9 100644 --- a/reference_designs/ipu-xrt/add_one_objFifo/aie2.py +++ b/reference_designs/ipu-xrt/add_one_objFifo/aie2.py @@ -67,7 +67,6 @@ def sequence(inTensor, notUsed, outTensor): ipu_dma_memcpy_nd( metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 64] ) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/log_hello_world/hello_world.py b/reference_designs/ipu-xrt/log_hello_world/hello_world.py index b017d110b7..9ff55d4ce2 100644 --- a/reference_designs/ipu-xrt/log_hello_world/hello_world.py +++ b/reference_designs/ipu-xrt/log_hello_world/hello_world.py @@ -56,7 +56,6 @@ def sequence(in_mem, out_mem, logout): ipu_dma_memcpy_nd( metadata="logoutOF", bd_id=2, mem=logout, sizes=[1, 1, 1, N] ) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py index 5e9dbeab9c..221b2e7cca 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py @@ -297,7 +297,6 @@ def sequence(A, B, C): strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py index f2993bfa99..e4d9bda2b8 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_array/aie2.py @@ -354,8 +354,6 @@ def sequence(A, B, C): sizes=[N_div_n_div_n_cols, K_div_k, k, n_in_i32s], strides=[n_x_n_cols_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - for i in range(n_cols): - ipu_sync(column=i, row=0, direction=0, channel=0) # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py index 5838dc598b..5b46311962 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py @@ -246,7 +246,6 @@ def sequence(A, B, C): strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py index fb4a8e97e8..387c1efb94 100644 --- a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py @@ -204,8 +204,6 @@ def sequence(A, B, C): strides=[0, 0, 0], ) - for i in range(n_cores): - ipu_sync(column=i, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/passthrough_hardware/aie2.py b/reference_designs/ipu-xrt/passthrough_hardware/aie2.py index cf2e665af4..dea568bc23 100755 --- a/reference_designs/ipu-xrt/passthrough_hardware/aie2.py +++ b/reference_designs/ipu-xrt/passthrough_hardware/aie2.py @@ -52,7 +52,6 @@ def core_body(): def sequence(A, B, C): ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vector_scalar/aie2.py b/reference_designs/ipu-xrt/vector_scalar/aie2.py index 851eae41cc..4658505db3 100755 --- a/reference_designs/ipu-xrt/vector_scalar/aie2.py +++ b/reference_designs/ipu-xrt/vector_scalar/aie2.py @@ -61,7 +61,6 @@ def core_body(): def sequence(A, B, C): ipu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, N]) ipu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, N]) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py b/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py index a14dc5f0b8..9b251445de 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py +++ b/reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py @@ -266,7 +266,6 @@ def sequence(I, B, O): mem=O, sizes=[1, 1, 1, height * lineWidthInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py index 6a49466b64..baee92c9fa 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py +++ b/reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py @@ -284,7 +284,6 @@ def sequence(inTensor, notUsed, outTensor): mem=outTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py b/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py index dd481cfc65..06be836dff 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py +++ b/reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py @@ -312,7 +312,6 @@ def sequence(I, B, O): mem=I, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py b/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py index 5422f803d1..d811748d04 100644 --- a/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py +++ b/reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py @@ -165,7 +165,6 @@ def sequence(inTensor, notUsed, outTensor): mem=outTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) - ipu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/test/ipu-xrt/add_256_using_dma_op_no_double_buffering/aie.mlir b/test/ipu-xrt/add_256_using_dma_op_no_double_buffering/aie.mlir index e44add4a05..60c34af00d 100644 --- a/test/ipu-xrt/add_256_using_dma_op_no_double_buffering/aie.mlir +++ b/test/ipu-xrt/add_256_using_dma_op_no_double_buffering/aie.mlir @@ -102,7 +102,6 @@ module { func.func @bobsyouruncle(%arg0: memref<64xi32>, %arg1: memref<32xi32>, %arg2: memref<64xi32>) { aiex.ipu.dma_memcpy_nd(0, 0, %arg0[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 0 : i64, metadata = @this_just_creates_a_symbol_and_the_type_means_nothing_in} : memref<64xi32> aiex.ipu.dma_memcpy_nd(0, 0, %arg2[0, 0, 0, 0][1, 1, 1, 64][0, 0, 0]) {id = 1 : i64, metadata = @this_just_creates_a_symbol_and_the_type_means_nothing_out} : memref<64xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } } diff --git a/test/ipu-xrt/add_314_using_dma_op/aie.mlir b/test/ipu-xrt/add_314_using_dma_op/aie.mlir index 646f263804..f8e382bbe8 100644 --- a/test/ipu-xrt/add_314_using_dma_op/aie.mlir +++ b/test/ipu-xrt/add_314_using_dma_op/aie.mlir @@ -67,7 +67,6 @@ module { %c64_i64 = arith.constant 64 : i64 aiex.ipu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32> aiex.ipu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64][%c1_i64, %c1_i64, %c1_i64, %c64_i64][%c0_i64, %c0_i64, %c0_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } diff --git a/test/ipu-xrt/add_one_objFifo/aie.mlir b/test/ipu-xrt/add_one_objFifo/aie.mlir index 3b55edb0d7..89666ce480 100644 --- a/test/ipu-xrt/add_one_objFifo/aie.mlir +++ b/test/ipu-xrt/add_one_objFifo/aie.mlir @@ -46,7 +46,6 @@ module { %c64 = arith.constant 64 : i64 aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32> aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<64xi32> - aiex.ipu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 } return } } diff --git a/test/ipu-xrt/add_one_using_dma/aie.mlir b/test/ipu-xrt/add_one_using_dma/aie.mlir index 058ae034bc..003249ed9f 100644 --- a/test/ipu-xrt/add_one_using_dma/aie.mlir +++ b/test/ipu-xrt/add_one_using_dma/aie.mlir @@ -78,7 +78,6 @@ module { %c64_i64 = arith.constant 64 : i64 aiex.ipu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c64_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 0 : i64, metadata = @objFifo_in0} : memref<64xi32> aiex.ipu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c64_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 1 : i64, metadata = @objFifo_out0} : memref<64xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } diff --git a/test/ipu-xrt/cascade_flows/aie.mlir b/test/ipu-xrt/cascade_flows/aie.mlir index 967c3ecedc..59a13f76bc 100644 --- a/test/ipu-xrt/cascade_flows/aie.mlir +++ b/test/ipu-xrt/cascade_flows/aie.mlir @@ -62,7 +62,6 @@ module { %c64 = arith.constant 64 : i64 aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<64xi32> aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c64][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<64xi32> - aiex.ipu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 } return } } diff --git a/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py b/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py index 8af3ee9cf5..aa877496a5 100644 --- a/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py +++ b/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py @@ -132,9 +132,6 @@ def bobsyouruncle( [0, 0, 0], ) - aiex.ipu_sync( - channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_locks.py b/test/ipu-xrt/e2e/test_locks.py index 3f50bf1da6..6fc1512696 100644 --- a/test/ipu-xrt/e2e/test_locks.py +++ b/test/ipu-xrt/e2e/test_locks.py @@ -129,14 +129,6 @@ def memtile_dma(): bd_id=bd_id, ) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=flow_to_shim.dest_channel, - column=column, - direction=0, - row=0, - ) - ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) @@ -266,14 +258,6 @@ def memtile_dma(): bd_id=bd_id, ) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=flow_to_shim.dest_channel, - column=shim_tile_column, - direction=0, - row=0, - ) - ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) @@ -424,14 +408,6 @@ def memtile_dma(): bd_id=bd_id, ) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=flow_to_shim.dest_channel, - column=shim_tile_column, - direction=0, - row=0, - ) - ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) diff --git a/test/ipu-xrt/e2e/test_manual_dpu_args.py b/test/ipu-xrt/e2e/test_manual_dpu_args.py index 3016384071..ef4109435d 100644 --- a/test/ipu-xrt/e2e/test_manual_dpu_args.py +++ b/test/ipu-xrt/e2e/test_manual_dpu_args.py @@ -135,7 +135,6 @@ def dma6(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) @@ -243,7 +242,6 @@ def dma6(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) @@ -326,7 +324,6 @@ def dma3(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) diff --git a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py index 20c5998709..482b9ad108 100644 --- a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py @@ -131,16 +131,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -349,16 +339,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py index ae1079fd4e..3897ed1271 100644 --- a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py +++ b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py @@ -152,16 +152,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -442,16 +432,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_offsets_sizes_strides.py b/test/ipu-xrt/e2e/test_offsets_sizes_strides.py index 1262d59bf3..6d4ef84d38 100644 --- a/test/ipu-xrt/e2e/test_offsets_sizes_strides.py +++ b/test/ipu-xrt/e2e/test_offsets_sizes_strides.py @@ -160,11 +160,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_repeat_count.py b/test/ipu-xrt/e2e/test_repeat_count.py index c769770283..b60cf3e2e6 100644 --- a/test/ipu-xrt/e2e/test_repeat_count.py +++ b/test/ipu-xrt/e2e/test_repeat_count.py @@ -121,16 +121,6 @@ def dma6(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=col, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) assert ctx.module.operation.verify() @@ -226,16 +216,6 @@ def dma3(): S2MM, channel_index, col, bd_id, repeats=iters - 1 ) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=col, - column_num=1, - direction=S2MM, - row=0, - row_num=1, - ) - ) xclbin.load_ipu_instructions(ipu_insts) diff --git a/test/ipu-xrt/e2e/test_shared_buffers_init_value.py b/test/ipu-xrt/e2e/test_shared_buffers_init_value.py index 461031ebb3..825dd3a665 100644 --- a/test/ipu-xrt/e2e/test_shared_buffers_init_value.py +++ b/test/ipu-xrt/e2e/test_shared_buffers_init_value.py @@ -187,14 +187,6 @@ def memtile_dma(): bd_id=bd_id, ) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=flow_to_shim.dest_channel, - column=shim_tile_column, - direction=S2MM, - row=0, - ) - ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) diff --git a/test/ipu-xrt/e2e/test_square_matrix_mult.py b/test/ipu-xrt/e2e/test_square_matrix_mult.py index 6f746fc490..80dbbc9923 100644 --- a/test/ipu-xrt/e2e/test_square_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_square_matrix_mult.py @@ -131,16 +131,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -339,16 +329,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py b/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py index b11e4463f8..933de57843 100644 --- a/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py +++ b/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py @@ -153,16 +153,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -441,16 +431,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_matrix_add.py b/test/ipu-xrt/e2e/test_tiled_matrix_add.py index 00755104ed..941e655909 100644 --- a/test/ipu-xrt/e2e/test_tiled_matrix_add.py +++ b/test/ipu-xrt/e2e/test_tiled_matrix_add.py @@ -158,11 +158,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -421,11 +416,6 @@ def ipu(): S2MM, output_c_tile_0_1_to_tile_0_0.dest_channel, col, bd_id ) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 - ) - ) @aie.memtile_dma(mem_tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py b/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py index 1b19015d33..3de08ca285 100644 --- a/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py @@ -340,7 +340,6 @@ def memtile_dma_c_1(): # fmt: off for i, (column, channel, bd_id) in enumerate(channels): ipu_insts.extend(shim_tensor_slice(M, N, tile_rows_C, tile_cols_C, offsets[i], column, S2MM, channel, bd_id, 2)) - ipu_insts.extend(aiex.ipu.sync(channel=channel, column=column)) # fmt: on compile_without_vectorization(ctx.module, workdir) @@ -665,7 +664,6 @@ def memtile_dma_c_1(): # fmt: off for i, (column, channel, bd_id) in enumerate(channels): ipu_insts.extend(shim_tensor_slice(M, N, tile_rows_C, tile_cols_C, offsets[i], column, S2MM, channel, bd_id, 2)) - ipu_insts.extend(aiex.ipu.sync(channel=channel, column=column)) # fmt: on mod_aie = mod_aie.finish() @@ -804,7 +802,6 @@ def memtile_dma(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, dest_channel, col, bd_id) ) - ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) wraps = list(map(np.asarray, views)) diff --git a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py index fdce41d8ae..92b3edceb2 100644 --- a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py @@ -209,11 +209,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -503,11 +498,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py index 036400fb1a..35a0d26e19 100644 --- a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py +++ b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py @@ -234,16 +234,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -596,16 +586,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -950,16 +930,6 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.write32(S2MM, channel_index, col, bd_id)) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_vec_add.py b/test/ipu-xrt/e2e/test_tiled_vec_add.py index ab0cd13769..6106969725 100644 --- a/test/ipu-xrt/e2e/test_tiled_vec_add.py +++ b/test/ipu-xrt/e2e/test_tiled_vec_add.py @@ -141,16 +141,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -364,16 +354,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py b/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py index d0990390ae..f9de6c4f2f 100644 --- a/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py +++ b/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py @@ -163,16 +163,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -447,16 +437,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_vec_dot.py b/test/ipu-xrt/e2e/test_vec_dot.py index 7a2012a1d0..a38fa1292c 100644 --- a/test/ipu-xrt/e2e/test_vec_dot.py +++ b/test/ipu-xrt/e2e/test_vec_dot.py @@ -147,16 +147,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -375,16 +365,6 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) - ipu_insts.extend( - aiex.ipu.sync( - channel=0, - column=0, - column_num=1, - direction=0, - row=0, - row_num=1, - ) - ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir b/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir index d54eaa445c..aed99f5dfe 100644 --- a/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir +++ b/test/ipu-xrt/matrix_multiplication_using_dma/aie.mlir @@ -116,7 +116,6 @@ module { aiex.ipu.dma_memcpy_nd(0, 0, %arg1[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c2_i64, %c4_i64, %c32_i64, %c32_i64] [%c32_i64, %c2048_i64, %c64_i64]) {id = 2 : i64, metadata = @inB} : memref<8192xi32> aiex.ipu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c4096_i64] [%c2_i64, %c4_i64, %c64_i64, %c16_i64] [%c0_i64, %c16_i64, %c64_i64]) {id = 3 : i64, metadata = @inA} : memref<8192xi32> aiex.ipu.dma_memcpy_nd(0, 0, %arg1[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c2_i64, %c4_i64, %c32_i64, %c32_i64] [%c32_i64, %c2048_i64, %c64_i64]) {id = 4 : i64, metadata = @inB} : memref<8192xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } diff --git a/test/ipu-xrt/two_col/aie.mlir b/test/ipu-xrt/two_col/aie.mlir index 10975fd06b..7b8214793b 100644 --- a/test/ipu-xrt/two_col/aie.mlir +++ b/test/ipu-xrt/two_col/aie.mlir @@ -133,7 +133,6 @@ module { aiex.ipu.rtp_write(1, 5, 1, 0) { buffer_sym_name = "rtp3" } aiex.ipu.dma_memcpy_nd (0, 0, %out[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c2048][%c0,%c0,%c0]) { metadata = @objFifo_out0, id = 1 : i64 } : memref<2048xi32> aiex.ipu.dma_memcpy_nd (0, 0, %in[%c0,%c0,%c0,%c0][%c1,%c1,%c1,%c2048][%c0,%c0,%c0]) { metadata = @objFifo_in0, id = 0 : i64 } : memref<2048xi32> - aiex.ipu.sync { column = 0 : i32, row = 0 : i32, direction = 0 : i32, channel = 0 : i32, column_num = 1 : i32, row_num = 1 : i32 } return } } diff --git a/test/ipu-xrt/vector_scalar_using_dma/aie.mlir b/test/ipu-xrt/vector_scalar_using_dma/aie.mlir index ebdd9aaefb..948316635a 100644 --- a/test/ipu-xrt/vector_scalar_using_dma/aie.mlir +++ b/test/ipu-xrt/vector_scalar_using_dma/aie.mlir @@ -68,7 +68,6 @@ module { %c4096_i64 = arith.constant 4096 : i64 aiex.ipu.dma_memcpy_nd(0, 0, %arg2[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c4096_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 0 : i64, metadata = @out} : memref<4096xi32> aiex.ipu.dma_memcpy_nd(0, 0, %arg0[%c0_i64, %c0_i64, %c0_i64, %c0_i64] [%c1_i64, %c1_i64, %c1_i64, %c4096_i64] [%c0_i64, %c0_i64, %c0_i64]) {id = 1 : i64, metadata = @in} : memref<4096xi32> - aiex.ipu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } From 9e6f5825664b76ae99ec7b295bd3c936c759bfc5 Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Fri, 8 Mar 2024 08:01:58 -0700 Subject: [PATCH 03/13] Revert e2e changes --- ...dd_256_using_dma_op_no_double_buffering.py | 3 ++ test/ipu-xrt/e2e/test_locks.py | 24 +++++++++++++++ test/ipu-xrt/e2e/test_manual_dpu_args.py | 3 ++ .../ipu-xrt/e2e/test_nonsquare_matrix_mult.py | 20 +++++++++++++ .../test_nonsquare_matrix_mult_vectorized.py | 20 +++++++++++++ .../ipu-xrt/e2e/test_offsets_sizes_strides.py | 5 ++++ test/ipu-xrt/e2e/test_repeat_count.py | 20 +++++++++++++ .../e2e/test_shared_buffers_init_value.py | 8 +++++ test/ipu-xrt/e2e/test_square_matrix_mult.py | 20 +++++++++++++ .../e2e/test_square_matrix_mult_vectorized.py | 20 +++++++++++++ test/ipu-xrt/e2e/test_tiled_matrix_add.py | 10 +++++++ ...iled_nonsquare_spatial_tile_matrix_mult.py | 3 ++ .../test_tiled_nonsquare_tile_matrix_mult.py | 10 +++++++ ...d_nonsquare_tile_matrix_mult_vectorized.py | 30 +++++++++++++++++++ test/ipu-xrt/e2e/test_tiled_vec_add.py | 20 +++++++++++++ .../e2e/test_tiled_vec_add_vectorized.py | 20 +++++++++++++ test/ipu-xrt/e2e/test_vec_dot.py | 20 +++++++++++++ 17 files changed, 256 insertions(+) diff --git a/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py b/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py index aa877496a5..8af3ee9cf5 100644 --- a/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py +++ b/test/ipu-xrt/e2e/test_add_256_using_dma_op_no_double_buffering.py @@ -132,6 +132,9 @@ def bobsyouruncle( [0, 0, 0], ) + aiex.ipu_sync( + channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_locks.py b/test/ipu-xrt/e2e/test_locks.py index 6fc1512696..3f50bf1da6 100644 --- a/test/ipu-xrt/e2e/test_locks.py +++ b/test/ipu-xrt/e2e/test_locks.py @@ -129,6 +129,14 @@ def memtile_dma(): bd_id=bd_id, ) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=flow_to_shim.dest_channel, + column=column, + direction=0, + row=0, + ) + ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) @@ -258,6 +266,14 @@ def memtile_dma(): bd_id=bd_id, ) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=flow_to_shim.dest_channel, + column=shim_tile_column, + direction=0, + row=0, + ) + ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) @@ -408,6 +424,14 @@ def memtile_dma(): bd_id=bd_id, ) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=flow_to_shim.dest_channel, + column=shim_tile_column, + direction=0, + row=0, + ) + ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) diff --git a/test/ipu-xrt/e2e/test_manual_dpu_args.py b/test/ipu-xrt/e2e/test_manual_dpu_args.py index ef4109435d..3016384071 100644 --- a/test/ipu-xrt/e2e/test_manual_dpu_args.py +++ b/test/ipu-xrt/e2e/test_manual_dpu_args.py @@ -135,6 +135,7 @@ def dma6(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) @@ -242,6 +243,7 @@ def dma6(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) @@ -324,6 +326,7 @@ def dma3(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) diff --git a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py index 482b9ad108..20c5998709 100644 --- a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult.py @@ -131,6 +131,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -339,6 +349,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py index 3897ed1271..ae1079fd4e 100644 --- a/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py +++ b/test/ipu-xrt/e2e/test_nonsquare_matrix_mult_vectorized.py @@ -152,6 +152,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -432,6 +442,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_offsets_sizes_strides.py b/test/ipu-xrt/e2e/test_offsets_sizes_strides.py index 6d4ef84d38..1262d59bf3 100644 --- a/test/ipu-xrt/e2e/test_offsets_sizes_strides.py +++ b/test/ipu-xrt/e2e/test_offsets_sizes_strides.py @@ -160,6 +160,11 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_repeat_count.py b/test/ipu-xrt/e2e/test_repeat_count.py index b60cf3e2e6..c769770283 100644 --- a/test/ipu-xrt/e2e/test_repeat_count.py +++ b/test/ipu-xrt/e2e/test_repeat_count.py @@ -121,6 +121,16 @@ def dma6(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=col, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) assert ctx.module.operation.verify() @@ -216,6 +226,16 @@ def dma3(): S2MM, channel_index, col, bd_id, repeats=iters - 1 ) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=col, + column_num=1, + direction=S2MM, + row=0, + row_num=1, + ) + ) xclbin.load_ipu_instructions(ipu_insts) diff --git a/test/ipu-xrt/e2e/test_shared_buffers_init_value.py b/test/ipu-xrt/e2e/test_shared_buffers_init_value.py index 825dd3a665..461031ebb3 100644 --- a/test/ipu-xrt/e2e/test_shared_buffers_init_value.py +++ b/test/ipu-xrt/e2e/test_shared_buffers_init_value.py @@ -187,6 +187,14 @@ def memtile_dma(): bd_id=bd_id, ) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=flow_to_shim.dest_channel, + column=shim_tile_column, + direction=S2MM, + row=0, + ) + ) compile_without_vectorization(ctx.module, workdir) xclbin_path = make_xclbin(ctx.module, workdir) diff --git a/test/ipu-xrt/e2e/test_square_matrix_mult.py b/test/ipu-xrt/e2e/test_square_matrix_mult.py index 80dbbc9923..6f746fc490 100644 --- a/test/ipu-xrt/e2e/test_square_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_square_matrix_mult.py @@ -131,6 +131,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -329,6 +339,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py b/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py index 933de57843..b11e4463f8 100644 --- a/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py +++ b/test/ipu-xrt/e2e/test_square_matrix_mult_vectorized.py @@ -153,6 +153,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -431,6 +441,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_matrix_add.py b/test/ipu-xrt/e2e/test_tiled_matrix_add.py index 941e655909..00755104ed 100644 --- a/test/ipu-xrt/e2e/test_tiled_matrix_add.py +++ b/test/ipu-xrt/e2e/test_tiled_matrix_add.py @@ -158,6 +158,11 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -416,6 +421,11 @@ def ipu(): S2MM, output_c_tile_0_1_to_tile_0_0.dest_channel, col, bd_id ) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 + ) + ) @aie.memtile_dma(mem_tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py b/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py index 3de08ca285..1b19015d33 100644 --- a/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_tiled_nonsquare_spatial_tile_matrix_mult.py @@ -340,6 +340,7 @@ def memtile_dma_c_1(): # fmt: off for i, (column, channel, bd_id) in enumerate(channels): ipu_insts.extend(shim_tensor_slice(M, N, tile_rows_C, tile_cols_C, offsets[i], column, S2MM, channel, bd_id, 2)) + ipu_insts.extend(aiex.ipu.sync(channel=channel, column=column)) # fmt: on compile_without_vectorization(ctx.module, workdir) @@ -664,6 +665,7 @@ def memtile_dma_c_1(): # fmt: off for i, (column, channel, bd_id) in enumerate(channels): ipu_insts.extend(shim_tensor_slice(M, N, tile_rows_C, tile_cols_C, offsets[i], column, S2MM, channel, bd_id, 2)) + ipu_insts.extend(aiex.ipu.sync(channel=channel, column=column)) # fmt: on mod_aie = mod_aie.finish() @@ -802,6 +804,7 @@ def memtile_dma(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, dest_channel, col, bd_id) ) + ipu_insts.extend(aiex.ipu.sync(column=col)) xclbin.load_ipu_instructions(ipu_insts) wraps = list(map(np.asarray, views)) diff --git a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py index 92b3edceb2..fdce41d8ae 100644 --- a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py +++ b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult.py @@ -209,6 +209,11 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -498,6 +503,11 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, column=0, column_num=1, direction=0, row=0, row_num=1 + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py index 35a0d26e19..036400fb1a 100644 --- a/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py +++ b/test/ipu-xrt/e2e/test_tiled_nonsquare_tile_matrix_mult_vectorized.py @@ -234,6 +234,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -586,6 +596,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -930,6 +950,16 @@ def ipu(): ) ) ipu_insts.extend(aiex.ipu.write32(S2MM, channel_index, col, bd_id)) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_vec_add.py b/test/ipu-xrt/e2e/test_tiled_vec_add.py index 6106969725..ab0cd13769 100644 --- a/test/ipu-xrt/e2e/test_tiled_vec_add.py +++ b/test/ipu-xrt/e2e/test_tiled_vec_add.py @@ -141,6 +141,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -354,6 +364,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py b/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py index f9de6c4f2f..d0990390ae 100644 --- a/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py +++ b/test/ipu-xrt/e2e/test_tiled_vec_add_vectorized.py @@ -163,6 +163,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -437,6 +447,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): diff --git a/test/ipu-xrt/e2e/test_vec_dot.py b/test/ipu-xrt/e2e/test_vec_dot.py index a38fa1292c..7a2012a1d0 100644 --- a/test/ipu-xrt/e2e/test_vec_dot.py +++ b/test/ipu-xrt/e2e/test_vec_dot.py @@ -147,6 +147,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): @@ -365,6 +375,16 @@ def ipu(): ipu_insts.extend( aiex.ipu.shimtile_push_queue(S2MM, channel_index, col, bd_id) ) + ipu_insts.extend( + aiex.ipu.sync( + channel=0, + column=0, + column_num=1, + direction=0, + row=0, + row_num=1, + ) + ) @aie.memtile_dma(tile_0_1) def memtile_dma_0_1(): From 812ec6642179c7e1ba1ee9f81224cccd495dfa12 Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Fri, 8 Mar 2024 16:25:42 +0100 Subject: [PATCH 04/13] Update lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp b/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp index 3a775175ef..af49ba763f 100644 --- a/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp +++ b/lib/Dialect/AIEX/Transforms/AIEDmaToIpu.cpp @@ -335,7 +335,7 @@ struct DmaToIpuPattern : OpConversionPattern { void insertIpuSyncOpForResults(AIE::DeviceOp device) { device.walk([&](mlir::func::FuncOp f) { SmallVector dmas; - Operation* returnOp = nullptr; + Operation *returnOp = nullptr; f.walk([&](mlir::func::ReturnOp op) { returnOp = op.getOperation(); }); f.walk([&](AIEX::IpuDmaMemcpyNdOp dma) { dmas.push_back(dma); }); for (auto dma : dmas) { From 6642c301b328e8a727eb96a1038797c18e0b38e5 Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Fri, 8 Mar 2024 16:25:50 +0100 Subject: [PATCH 05/13] Update reference_designs/ipu-xrt/matrix_multiplication/aie2.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- reference_designs/ipu-xrt/matrix_multiplication/aie2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py index 1d6fa1fce0..49d220c5a2 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication/aie2.py @@ -297,7 +297,6 @@ def sequence(A, B, C): strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - print(ctx.module) From fef09fda0624449b23aeb76c0c3a34d4958b7d54 Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Fri, 8 Mar 2024 16:25:58 +0100 Subject: [PATCH 06/13] Update reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py index 5b46311962..0692eb2550 100644 --- a/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py +++ b/reference_designs/ipu-xrt/matrix_multiplication_column/aie2.py @@ -246,7 +246,6 @@ def sequence(A, B, C): strides=[n_in_i32s, k_x_N_in_i32s, N_in_i32s], ) - print(ctx.module) From 3b88caf55c7ae4185ea5292d8891086503dc9b12 Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Fri, 8 Mar 2024 16:26:03 +0100 Subject: [PATCH 07/13] Update reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py index 387c1efb94..8a6bb56db2 100644 --- a/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py +++ b/reference_designs/ipu-xrt/matrix_vector_multiplication/aie2.py @@ -204,7 +204,6 @@ def sequence(A, B, C): strides=[0, 0, 0], ) - print(ctx.module) From 0133741b3bc95144825cc9b94c0a7d456b11a69f Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Tue, 4 Jun 2024 15:03:25 -0600 Subject: [PATCH 08/13] Remove conflicts --- programming_examples/vision/color_detect/aie2_colorDetect.py | 4 ---- .../vision/color_threshold/aie2_colorThreshold.py | 4 ---- programming_examples/vision/edge_detect/aie2_edgeDetect.py | 4 ---- programming_examples/vision/vision_passthrough/aie2.py | 4 ---- .../vision/vision_passthrough/aie2_lineBased_8b_1080.mlir | 1 - .../vision/vision_passthrough/aie2_lineBased_8b_8k.mlir | 1 - .../vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir | 1 - 7 files changed, 19 deletions(-) diff --git a/programming_examples/vision/color_detect/aie2_colorDetect.py b/programming_examples/vision/color_detect/aie2_colorDetect.py index 513efdcecf..736ef2a6b3 100644 --- a/programming_examples/vision/color_detect/aie2_colorDetect.py +++ b/programming_examples/vision/color_detect/aie2_colorDetect.py @@ -266,10 +266,6 @@ def sequence(I, B, O): mem=O, sizes=[1, 1, 1, height * lineWidthInInt32s], ) -<<<<<<< HEAD:reference_designs/ipu-xrt/vision_pipelines/color_detect/aie2_colorDetect.py -======= - npu_sync(column=0, row=0, direction=0, channel=0) ->>>>>>> 9be8d710b4b376c65d48e14fc7056c3c709afcd0:programming_examples/vision/color_detect/aie2_colorDetect.py print(ctx.module) diff --git a/programming_examples/vision/color_threshold/aie2_colorThreshold.py b/programming_examples/vision/color_threshold/aie2_colorThreshold.py index 6d879bad1e..7766cee3c5 100644 --- a/programming_examples/vision/color_threshold/aie2_colorThreshold.py +++ b/programming_examples/vision/color_threshold/aie2_colorThreshold.py @@ -284,10 +284,6 @@ def sequence(inTensor, notUsed, outTensor): mem=outTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) -<<<<<<< HEAD:reference_designs/ipu-xrt/vision_pipelines/color_threshold/aie2_colorThreshold.py -======= - npu_sync(column=0, row=0, direction=0, channel=0) ->>>>>>> 9be8d710b4b376c65d48e14fc7056c3c709afcd0:programming_examples/vision/color_threshold/aie2_colorThreshold.py # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/programming_examples/vision/edge_detect/aie2_edgeDetect.py b/programming_examples/vision/edge_detect/aie2_edgeDetect.py index 52f948e15b..c927cdd8c0 100644 --- a/programming_examples/vision/edge_detect/aie2_edgeDetect.py +++ b/programming_examples/vision/edge_detect/aie2_edgeDetect.py @@ -312,10 +312,6 @@ def sequence(I, B, O): mem=I, sizes=[1, 1, 1, tensorSizeInInt32s], ) -<<<<<<< HEAD:reference_designs/ipu-xrt/vision_pipelines/edge_detect/aie2_edgeDetect.py -======= - npu_sync(column=0, row=0, direction=0, channel=0) ->>>>>>> 9be8d710b4b376c65d48e14fc7056c3c709afcd0:programming_examples/vision/edge_detect/aie2_edgeDetect.py # print(ctx.module.operation.verify()) print(ctx.module) diff --git a/programming_examples/vision/vision_passthrough/aie2.py b/programming_examples/vision/vision_passthrough/aie2.py index 02a8695802..dad0ee09ff 100644 --- a/programming_examples/vision/vision_passthrough/aie2.py +++ b/programming_examples/vision/vision_passthrough/aie2.py @@ -165,10 +165,6 @@ def sequence(inTensor, notUsed, outTensor): mem=outTensor, sizes=[1, 1, 1, tensorSizeInInt32s], ) -<<<<<<< HEAD:reference_designs/ipu-xrt/vision_pipelines/passthrough/aie2.py -======= - npu_sync(column=0, row=0, direction=0, channel=0) ->>>>>>> 9be8d710b4b376c65d48e14fc7056c3c709afcd0:programming_examples/vision/vision_passthrough/aie2.py print(ctx.module) diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir index 0621e0b622..13c36fbf9e 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_1080.mlir @@ -55,7 +55,6 @@ module @passThroughLine_aie2 { //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words]) aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<518400xi32> aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<518400xi32> - aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } } diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir index c2c31b0d9b..67efe2e747 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_8k.mlir @@ -56,7 +56,6 @@ module @passThroughLine_aie2 { //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words]) aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @inOF, id = 1 : i64 } : memref<2073600xi32> aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %c1, %totalLenRGBA][%c0, %c0, %c0]) { metadata = @outOF, id = 0 : i64 } : memref<2073600xi32> - aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } } diff --git a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir index dd66475ca5..394ba07bd8 100644 --- a/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir +++ b/programming_examples/vision/vision_passthrough/aie2_lineBased_8b_tiny.mlir @@ -55,7 +55,6 @@ module @passThroughLine_aie2 { //dma_memcpy_nd ([offset in 32b words][length in 32b words][stride in 32b words]) aiex.npu.dma_memcpy_nd (0, 0, %in[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @inOF, id = 1 : i64 } : memref<1152xi32> aiex.npu.dma_memcpy_nd (0, 0, %out[%c0, %c0, %c0, %c0][%c1, %c1, %tileheight, %tilewidth][%c0, %c0, %tilewidth]) { metadata = @outOF, id = 0 : i64 } : memref<1152xi32> - aiex.npu.sync {channel = 0 : i32, column = 0 : i32, column_num = 1 : i32, direction = 0 : i32, row = 0 : i32, row_num = 1 : i32} return } } From 6b05733f14ab7795bae7a51a04fff10887808f4d Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Tue, 4 Jun 2024 23:03:55 +0200 Subject: [PATCH 09/13] Update lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp b/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp index 134f23d15f..7ef7669a50 100644 --- a/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp +++ b/lib/Dialect/AIEX/Transforms/AIEDmaToNpu.cpp @@ -439,7 +439,8 @@ void insertNpuSyncOpForResults(AIE::DeviceOp device) { f.walk([&](mlir::func::ReturnOp op) { returnOp = op.getOperation(); }); f.walk([&](AIEX::NpuDmaMemcpyNdOp dma) { dmas.push_back(dma); }); for (auto dma : dmas) { - if (auto infoOp = getAllocOpForSymbol(shimDmaAllocOps, dma.getMetadata())) { + if (auto infoOp = + getAllocOpForSymbol(shimDmaAllocOps, dma.getMetadata())) { if (infoOp->getChannelDir() == AIE::DMAChannelDir::S2MM) { // Found dma op copying results to host OpBuilder builder(dma); From 9f7250987d3ee9240f18be00d3a7e804b48528be Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Tue, 4 Jun 2024 23:04:19 +0200 Subject: [PATCH 10/13] Update programming_examples/ml/bottleneck/aie2.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- programming_examples/ml/bottleneck/aie2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/programming_examples/ml/bottleneck/aie2.py b/programming_examples/ml/bottleneck/aie2.py index e7a85d0905..b4f7620d34 100644 --- a/programming_examples/ml/bottleneck/aie2.py +++ b/programming_examples/ml/bottleneck/aie2.py @@ -631,7 +631,6 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): sizes=[1, 1, 1, totalWeightsSize32b], ) - print(ctx.module) From 5122553e21db440b1b432dc6098fa6d43d1f205c Mon Sep 17 00:00:00 2001 From: Andra Bisca Date: Tue, 4 Jun 2024 23:04:29 +0200 Subject: [PATCH 11/13] Update programming_examples/ml/resnet/layers_conv2_x/aie2.py Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- programming_examples/ml/resnet/layers_conv2_x/aie2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/programming_examples/ml/resnet/layers_conv2_x/aie2.py b/programming_examples/ml/resnet/layers_conv2_x/aie2.py index f6cf8ddb5d..150128d887 100755 --- a/programming_examples/ml/resnet/layers_conv2_x/aie2.py +++ b/programming_examples/ml/resnet/layers_conv2_x/aie2.py @@ -986,7 +986,6 @@ def sequence(inputFromL3, weightsFromL3, outputToL3): sizes=[1, 1, 1, totalWeightsSize32b_rest], ) - res = ctx.module.operation.verify() if res == True: print(ctx.module) From 6edffdcaba17107713b63a4a4585f286efce6b21 Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Tue, 4 Jun 2024 15:08:39 -0600 Subject: [PATCH 12/13] Remove leftover syncs --- .../section-2e/02_external_mem_to_core/ext_to_core.py | 1 - .../section-2e/03_external_mem_to_core_L2/ext_to_core_L2.py | 4 ---- .../section-2/section-2e/05_join_L2/distribute_and_join_L2.py | 1 - third_party/bootgen | 2 +- 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/programming_guide/section-2/section-2e/02_external_mem_to_core/ext_to_core.py b/programming_guide/section-2/section-2e/02_external_mem_to_core/ext_to_core.py index 6925e6bd2d..6ad968e7eb 100644 --- a/programming_guide/section-2/section-2e/02_external_mem_to_core/ext_to_core.py +++ b/programming_guide/section-2/section-2e/02_external_mem_to_core/ext_to_core.py @@ -60,7 +60,6 @@ def sequence(inTensor, notUsed, outTensor): npu_dma_memcpy_nd( metadata="in", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 48] ) - npu_sync(column=0, row=0, direction=0, channel=0) res = ctx.module.operation.verify() if res == True: diff --git a/programming_guide/section-2/section-2e/03_external_mem_to_core_L2/ext_to_core_L2.py b/programming_guide/section-2/section-2e/03_external_mem_to_core_L2/ext_to_core_L2.py index 1dfec30f37..52c5923d2f 100644 --- a/programming_guide/section-2/section-2e/03_external_mem_to_core_L2/ext_to_core_L2.py +++ b/programming_guide/section-2/section-2e/03_external_mem_to_core_L2/ext_to_core_L2.py @@ -64,10 +64,6 @@ def sequence(inTensor, notUsed, outTensor): npu_dma_memcpy_nd( metadata="in0", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 48] ) -<<<<<<< HEAD:reference_designs/ipu-xrt/add_one_objFifo/aie2.py -======= - npu_sync(column=0, row=0, direction=0, channel=0) ->>>>>>> 9be8d710b4b376c65d48e14fc7056c3c709afcd0:programming_guide/section-2/section-2e/03_external_mem_to_core_L2/ext_to_core_L2.py res = ctx.module.operation.verify() if res == True: diff --git a/programming_guide/section-2/section-2e/05_join_L2/distribute_and_join_L2.py b/programming_guide/section-2/section-2e/05_join_L2/distribute_and_join_L2.py index b8c264ea28..836c2fbba6 100644 --- a/programming_guide/section-2/section-2e/05_join_L2/distribute_and_join_L2.py +++ b/programming_guide/section-2/section-2e/05_join_L2/distribute_and_join_L2.py @@ -101,7 +101,6 @@ def sequence(inTensor, notUsed, outTensor): npu_dma_memcpy_nd( metadata="in", bd_id=1, mem=inTensor, sizes=[1, 1, 1, 48] ) - npu_sync(column=0, row=0, direction=0, channel=0) print(ctx.module) diff --git a/third_party/bootgen b/third_party/bootgen index a36cc52299..92e09bf37e 160000 --- a/third_party/bootgen +++ b/third_party/bootgen @@ -1 +1 @@ -Subproject commit a36cc5229976377db750110079a58bead7420f36 +Subproject commit 92e09bf37ea17d7b1f0e102a2548f27fb768651c From 56eddbb63ffe0ea5580544f1491ca3c5ccdd5db6 Mon Sep 17 00:00:00 2001 From: AndraBisca Date: Tue, 4 Jun 2024 16:01:05 -0600 Subject: [PATCH 13/13] Removed another sync --- programming_guide/section-4/section-4b/aie2.py | 1 - 1 file changed, 1 deletion(-) diff --git a/programming_guide/section-4/section-4b/aie2.py b/programming_guide/section-4/section-4b/aie2.py index 910d4b1a94..bb9a742dff 100644 --- a/programming_guide/section-4/section-4b/aie2.py +++ b/programming_guide/section-4/section-4b/aie2.py @@ -82,7 +82,6 @@ def sequence(A, F, C): npu_dma_memcpy_nd(metadata="out", bd_id=0, mem=C, sizes=[1, 1, 1, 4096]) npu_dma_memcpy_nd(metadata="in", bd_id=1, mem=A, sizes=[1, 1, 1, 4096]) npu_dma_memcpy_nd(metadata="infactor", bd_id=2, mem=F, sizes=[1, 1, 1, 1]) - npu_sync(column=0, row=0, direction=0, channel=0) with mlir_mod_ctx() as ctx: