diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b494b33ba..7a2c2b95d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -77,6 +77,9 @@ set(PEANO_INSTALL_DIR option(LLVM_INCLUDE_TOOLS "Generate build targets for the LLVM tools." ON) option(LLVM_BUILD_TOOLS "Build the LLVM tools. If OFF, just generate build targets." ON) +option(CLANGIR_MLIR_FRONTEND + "Use ClangIR version of Clang/LLVM to provide aie++ C++ support." OFF) + option(AIE_INCLUDE_INTEGRATION_TESTS "Generate build targets for the mlir-aie integration tests." OFF) @@ -85,8 +88,8 @@ execute_process(COMMAND git rev-parse HEAD OUTPUT_VARIABLE AIE_GIT_COMMIT ERROR_ find_package(MLIR REQUIRED CONFIG) -message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") message(STATUS "Using LLVMConfig.cmake in: ${LLVM_DIR}") +message(STATUS "Using MLIRConfig.cmake in: ${MLIR_DIR}") # These are a bit of a hack, because we're hijacking alot of LLVM machinery set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/bin) @@ -135,8 +138,8 @@ set(LLVM_LIT_ARGS "${LIT_ARGS_DEFAULT}" CACHE STRING "Default options for lit") -list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}") list(APPEND CMAKE_MODULE_PATH "${LLVM_CMAKE_DIR}") +list(APPEND CMAKE_MODULE_PATH "${MLIR_CMAKE_DIR}") include(TableGen) include(AddLLVM) @@ -145,6 +148,16 @@ include(HandleLLVMOptions) include(ExternalProject) include(CMakeDependentOption) +if (CLANGIR_MLIR_FRONTEND) + # To understand how to influence the find_package config, see + # https://cmake.org/cmake/help/latest/command/find_package.html#config-mode-search-procedure + find_package(Clang REQUIRED CONFIG) + message(STATUS "Using ClangConfig.cmake in: ${Clang_DIR}") + # Use Clang infrastructure like CIR in this project + list(APPEND CMAKE_MODULE_PATH "${CLANG_CMAKE_DIR}") + include(AddClang) +endif() + include_directories(${LLVM_INCLUDE_DIRS}) include_directories(${MLIR_INCLUDE_DIRS}) include_directories(${PROJECT_SOURCE_DIR}/include) diff --git a/include/aie/CIR/CIRToAIEPasses.h b/include/aie/CIR/CIRToAIEPasses.h new file mode 100644 index 0000000000..c279245c27 --- /dev/null +++ b/include/aie/CIR/CIRToAIEPasses.h @@ -0,0 +1,42 @@ +//===- CIRToAIEpasses.h -----------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices, Inc. +//===----------------------------------------------------------------------===// + +#ifndef CIR_AIE_PASSES_H +#define CIR_AIE_PASSES_H + +#include "aie/Dialect/AIE/IR/AIEDialect.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" + +#include "mlir/Pass/Pass.h" + +namespace xilinx::AIE::CIR { + +#define GEN_PASS_CLASSES +#include "aie/CIR/CIRToAIEPasses.h.inc" + +std::unique_ptr> +createCIRToAIEPreparePass(); + +std::unique_ptr> createCIRToAIEPass(); + +std::unique_ptr> +createCIRToAIEInlineKernelLambdaPass(); + +std::unique_ptr> +createCIRToAIEDecaptureKernelPass(); + +std::unique_ptr> createCIRKeepAIEDevice(); + +/// Generate the code for registering passes. +#define GEN_PASS_REGISTRATION +#include "aie/CIR/CIRToAIEPasses.h.inc" + +} // namespace xilinx::AIE::CIR + +#endif diff --git a/include/aie/CIR/CIRToAIEPasses.td b/include/aie/CIR/CIRToAIEPasses.td new file mode 100644 index 0000000000..0486308a26 --- /dev/null +++ b/include/aie/CIR/CIRToAIEPasses.td @@ -0,0 +1,108 @@ +//===- CIRToAIEpasses.td -----------------------------------*- tablegen -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 AMD +// +//===----------------------------------------------------------------------===// + +#ifndef AIE_CIR_PASSES +#define AIE_CIR_PASSES + +include "mlir/Pass/PassBase.td" + +def CIRToAIEPrepare : Pass<"cir-to-aie-prepare", "mlir::ModuleOp"> { + let summary = "Prepare some transformations of some AIE++ patterns in CIR into AIE equivalent operations"; + let description = [{ + Some AIE constructs are represented in AIE++ by C++ abstractions expressed + in normal C++. These are compiled to CIR as plain C++ constructs. + This pass recognize these AIE++ C++ constructs and replace them by + equivalent AIE operations. + }]; + + let constructor = "xilinx::AIE::CIR::createCIRToAIEPreparePass()"; + let dependentDialects = [ + "cir::CIRDialect", + "xilinx::AIE::AIEDialect", + ]; +} + +def CIRToAIE : Pass<"cir-to-aie", "mlir::ModuleOp"> { + let summary = "Transform some AIE++ patterns in CIR into AIE equivalent operations"; + let description = [{ + Some AIE constructs are represented in AIE++ by C++ abstractions expressed + in normal C++. These are compiled to CIR as plain C++ constructs. + This pass recognize these AIE++ C++ constructs and replace them by + equivalent AIE operations. + }]; + + let constructor = "xilinx::AIE::CIR::createCIRToAIEPass()"; + let dependentDialects = [ + "cir::CIRDialect", + "xilinx::AIE::AIEDialect", + ]; +} + +def CIRToAIEInlineKernelLambda : Pass<"cir-to-aie-inline-kernel-lambda", "mlir::ModuleOp"> { + + let summary = "Inline the call to the kernel lambda inside the aie.core operation"; + + let description = [{ In aie++ the tile kernel is represented by a lambda + called from "aie::tile::program" which end up to be lowered in a aie.core + operation. + + This pass remove the call operations to the lambda by inlining the lambda + itself. + + TODO: this might be possible to use the inliner in a restrictive way for + this, with some SROA, mem2reg... + }]; + + let constructor = "xilinx::AIE::CIR::createCIRToAIEInlineKernelLambdaPass()"; + let dependentDialects = [ + "cir::CIRDialect", + "xilinx::AIE::AIEDialect", + ]; +} + +def CIRToAIEDecaptureKernel : Pass<"cir-to-aie-decapture-kernel", "mlir::ModuleOp"> { + + let summary = "Remove the useless capture storage of a kernel in an aie.core operation"; + + let description = [{ After kernel lambda call inlining into an aie.core + operation, there is a left-over of capture allocation with write+read for + each kernel argument. + + This pass connect directly the kernel argument to their producers without + going through the capture storage and then remove the storage itself. + + TODO: this should be handled by a clever mem2reg or after standard dialect + lowering with mem2reg. + }]; + + let constructor = "xilinx::AIE::CIR::createCIRToAIEDecaptureKernelPass()"; + let dependentDialects = [ + "cir::CIRDialect", + "xilinx::AIE::AIEDialect", + ]; +} + +def CIRKeepAIEDevice : Pass<"cir-keep-aie-device", "mlir::ModuleOp"> { + + let summary = "Remove everything but the first aie.device"; + + let description = [{ Extract the aie.device from a CIR module by removing everything but the first aie.device. + + TODO: handle multiple devices. + }]; + + let constructor = "xilinx::AIE::CIR::createCIRKeepAIEDevice()"; + let dependentDialects = [ + "cir::CIRDialect", + "xilinx::AIE::AIEDialect", + ]; +} + +#endif diff --git a/include/aie/CIR/CMakeLists.txt b/include/aie/CIR/CMakeLists.txt new file mode 100644 index 0000000000..01460b50ae --- /dev/null +++ b/include/aie/CIR/CMakeLists.txt @@ -0,0 +1,13 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. + + +set(LLVM_TARGET_DEFINITIONS CIRToAIEPasses.td) +mlir_tablegen(CIRToAIEPasses.h.inc -gen-pass-decls -name CIRToAIE) +add_public_tablegen_target(MLIRCIRToAIEPassesIncGen) + +add_mlir_doc(CIRToAIEPasses CIRToAIEPasses ./ -gen-pass-doc) diff --git a/include/aie/CIR/runtime/.clang-format b/include/aie/CIR/runtime/.clang-format new file mode 100644 index 0000000000..87b344537e --- /dev/null +++ b/include/aie/CIR/runtime/.clang-format @@ -0,0 +1,8 @@ +Language: Cpp +BasedOnStyle: LLVM +AccessModifierOffset: -1 +BreakInheritanceList: BeforeComma +BreakConstructorInitializers: BeforeComma +Cpp11BracedListStyle: false +PointerAlignment: Left +SpaceBeforeCpp11BracedList: true diff --git a/include/aie/CIR/runtime/aie++/aie++.hpp b/include/aie/CIR/runtime/aie++/aie++.hpp new file mode 100644 index 0000000000..6bd862f1b6 --- /dev/null +++ b/include/aie/CIR/runtime/aie++/aie++.hpp @@ -0,0 +1,219 @@ +#include +#include +#include +#include + +// #include + +namespace aie { + +// using namespace std::literals::string_view_literals; + +// template using buffer_t = std::array; +template struct buffer { + //using storage_t = std::array; + using storage_t = T[Size]; + storage_t storage; + operator storage_t&() { return storage; } + // The previous is not enough + decltype(auto) operator[](std::size_t index) { return storage[index]; } + + //auto begin() { return storage.begin(); } + auto begin() { return &storage[0]; } + + //auto end() { return storage.end(); } + auto end() { return &storage[Size]; } +}; + +template void aquire() {} + +template void release() {} + +template struct accessor { + // clang++: + // /home/rkeryell/Xilinx/Projects/LLVM/worktrees/clangir/clang/lib/CIR/CodeGen/CIRGenExpr.cpp:2548: + // LValue cir::CIRGenFunction::buildLValue(const Expr *): Assertion `0 && "not + // implemented"' failed. + // + // Channel& channel; + // + // accessor(Channel& c) : channel { c } {aquire(); } + using access_t = typename Channel::access_t; + access_t* storage; + + // clang++: + // /home/rkeryell/Xilinx/Projects/LLVM/worktrees/clangir/clang/lib/CIR/CodeGen/CIRGenExpr.cpp:2548: + // LValue cir::CIRGenFunction::buildLValue(const Expr *): Assertion `0 && "not + // implemented"' failed. + // + // accessor(access_t& a) : storage {a} { aquire(); } + accessor(access_t& a) { + storage = &a; + aquire(); + } + auto& operator[](std::size_t index) { return (*storage)[index]; } + operator typename Channel::access_t &() { return *storage; } + ~accessor() { release(); } +}; + +// Channel abstraction used to send data between tiles. +// +// This is lowered to MLIR aie.objectfifo. +template +struct channel { + static_assert(std::is_same_v, + "Only tiles from the same device can be connected"); + using from_tile_type = FromTile; + using to_tile_type = ToTile; + using device_type = typename to_tile_type::device_type; + // clang++: + // /home/rkeryell/Xilinx/Projects/LLVM/worktrees/clangir/clang/lib/CIR/CodeGen/CIRGenExpr.cpp:552: + // CIRGenCallee cir::CIRGenFunction::buildCallee(const clang::Expr *): + // Assertion `!dyn_cast(E) && "NYI"' failed. + // std::vector storage; + ValueType storage[10 * Size]; + std::size_t capacity; + using access_t = ValueType[][Size]; + channel(FromTile& from_tile, ToTile& to_tile, std::size_t capacity) + : capacity { capacity } { + // storage.assign(0, capacity * Size); + } + + auto& get_storage() { + // \todo + return *reinterpret_cast(storage); + } + + auto in_acquire_release(std::size_t capacity) { + // \todo + return accessor(get_storage()); + } + + auto out_acquire_release(std::size_t capacity) { + // \todo + return accessor(get_storage()); + } +}; + +// Represent the tile in AIE Device. +// +// X is the column number starting at 0. +// +// Y is the row number starting at 0. +// +// Typically compiled as: +// !ty_22aie3A3Atile3C12C_43E22 = !cir.struct" +// {!cir.int}> +// +// The tile depends on a Device since we can have programs with different +// devices at the same time +template struct tile { + using device_type = Device; + static constexpr auto x() { return X; } + static constexpr auto y() { return Y; } + // Only tiles from a same device are comparable + template + friend constexpr auto operator==(const tile&, const tile&) { + return X == X2 && Y == Y2; + }; + /* template + friend auto operator<=>(const tile_t &, const tile_t &) { + return std::strong_ordering::equal; + };*/ + /* + friend constexpr auto operator<=>(const tile &a, const tile &b) { +// return operator<=>(std::array { a.x, a.y }, std::array { b.x, b.y }); + return operator<=>(a.x, a.y); + } +*/ + + template static inline Code tile_code; + + // Get a buffer with Size elements of type T. + template + buffer buffer() __attribute__((annotate("aie.tile.buffer"))) { + return {}; + } + + // Define the code to be run inside a tile. + // + // Typically compiled as: + // cir.call @_ZN3aie4tileILi1ELi4EE7programIZ4mainE3$_0EEvOT_(%2, %7) : + // (!cir.ptr, !cir.ptr) -> () + // loc(#loc63) + void program(auto&& code) __attribute__((annotate("aie.tile.program"))) { + // Use this since std::function crashes ClangIR 2024/09/12 + // tile_code = &code; + // Just to instantiate the lambda body while waiting for std::function + code(); + } + + // Create a channel to communicate between 2 tiles. + template + channel channel_to(ToTile& to_tile, + std::size_t capacity) + __attribute__((annotate("aie.device.channel"))) { + return { *this, to_tile, capacity }; + } +}; + +// template inline constexpr tile tile; + +template struct tile_handle { + Storage tile_memory; + constexpr tile_handle(Storage tile_memory) + : tile_memory { tile_memory } {}; + constexpr auto& mem() { return tile_memory; } +}; + +/* +template +channel(FromTile& from_tile, ToTile& to_tile, + std::size_t capacity) + ->channel; +*/ +// Inject in aie:: to ease use +enum : std::int8_t { + npu1 = 42, //< AIE2 device found in RyzenAI 9 7940HS + npu2, + npu3, + npu4, + pacifica //< Fancy AIE device found on Californian shore +}; + +// Abstraction representing an AIE device in the system. +// +// DeviceModel specifies an architecture model, such as aie::npu1. +// +// Typically compiled as: +// !ty_aie3A3Adevice3Caie3A3Anpu12C_aie3A3A28lambda_at_2E2Faie2B2B2Ehpp3A763A54293E +// = !cir.struct" {!cir.int}> +// +// "Unique" with the lambda is used to generate a different type for multiple +// instantiation, so we can have a design with several accelerators of the same +// type +template +struct device { + // Get the tile of a device. X is the column of the tile, Y is the row of the + // tile. + template + tile tile() + __attribute__((annotate("aie.device.tile", X, Y, DeviceModel, + std::to_underlying(DeviceModel)))) { + return {}; + } + + void constexpr run() {} +}; + +template struct tile_storage_t { + using content = T; +}; + +template inline constexpr tile_storage_t tile_storage; + +} // namespace aie diff --git a/include/aie/CMakeLists.txt b/include/aie/CMakeLists.txt index 489b1ebd7a..a9d9bfc9e2 100644 --- a/include/aie/CMakeLists.txt +++ b/include/aie/CMakeLists.txt @@ -6,5 +6,8 @@ # (c) Copyright 2021 Xilinx Inc. configure_file(version.h.in version.h) -add_subdirectory(Dialect) +if (CLANGIR_MLIR_FRONTEND) + add_subdirectory(CIR) +endif() add_subdirectory(Conversion) +add_subdirectory(Dialect) diff --git a/include/aie/Dialect/AIE/IR/AIEOps.td b/include/aie/Dialect/AIE/IR/AIEOps.td index 7c487e3fc2..05ffb3e0b3 100644 --- a/include/aie/Dialect/AIE/IR/AIEOps.td +++ b/include/aie/Dialect/AIE/IR/AIEOps.td @@ -27,15 +27,15 @@ class AIE_Op traits = []> : def AIE_DeviceOp: AIE_Op<"device", [ - AIETarget, HasParent<"mlir::ModuleOp">, + AIETarget, SymbolTable, SingleBlockImplicitTerminator<"EndOp">, IsolatedFromAbove ]> { let summary = "Define an AIE design targetting a complete device"; let description = [{ This operation describes a design that executes on a particular AIEngine device. - It exists at the toplevel of a design; although currently it does not replace the - default toplevel module in MLIR, the intention is that this could be the case - in the future. + + It does not replace the default toplevel module in MLIR since it can be + possible to have several devices in the same module. When using this operation, all resources in a physical device are available and the design does not need to be concerned with other potential users of a physical @@ -642,13 +642,13 @@ def AIE_PacketFlowOp: AIE_Op<"packet_flow", [SingleBlockImplicitTerminator<"EndO let description = [{ A logical packet-switched flow between tiles. During place and route, this is replaced by MasterSets and PacketRules inside - switchboxes. - - The optional attribute keep_pkt_header indicates whether each - data packet's packet header gets preserved at the flow's + switchboxes. + + The optional attribute keep_pkt_header indicates whether each + data packet's packet header gets preserved at the flow's destination. The optional attribute priority_route indicates - whether the packet flow is routed in priority over other flows, - so that they always get allocated with the same master, slave + whether the packet flow is routed in priority over other flows, + so that they always get allocated with the same master, slave ports, arbiters and master selects (msel). Example: @@ -864,9 +864,9 @@ def AIE_DMABDOp: AIE_Op<"dma_bd", []> { ## DMA constant padding on AIE-ML Devices AIE-ML devices can apply constant padding at the buffer descriptor level, described with pairs of padding - counts before and after a dimension, to all dimensions in the data layout transformations. The padding - counts can be supplied to the `dma_bd` through an optional argument, an array of "tuple-like" attributes - `bd_pad_layout`, followed by an optional argument `const_val` (default + counts before and after a dimension, to all dimensions in the data layout transformations. The padding + counts can be supplied to the `dma_bd` through an optional argument, an array of "tuple-like" attributes + `bd_pad_layout`, followed by an optional argument `const_val` (default is 0). All counts are expressed in multiples of the element width. }]; @@ -1467,8 +1467,8 @@ def AIE_CascadeFlowOp: AIE_Op<"cascade_flow", []> { ); let summary = "A cascade connection between tiles"; let description = [{ - The `aie.cascade_flow` operation represents a cascade connection between two `aie.tile` operations. - During lowering, this is replaced by `aie.configure_cascade` operations for each `aie.tile` based on + The `aie.cascade_flow` operation represents a cascade connection between two `aie.tile` operations. + During lowering, this is replaced by `aie.configure_cascade` operations for each `aie.tile` based on their relative placement to one another. Example: @@ -1491,7 +1491,7 @@ def AIE_CascadeFlowOp: AIE_Op<"cascade_flow", []> { def AIE_ConfigureCascadeOp: AIE_Op<"configure_cascade", [HasParent<"DeviceOp">]> { let summary = "An op to configure the input and output directions of the cascade for a single AIE tile"; let description = [{ - An operation to configure the cascade on a single tile in both the input and the output + An operation to configure the cascade on a single tile in both the input and the output directions. Example: @@ -1666,7 +1666,7 @@ def AIE_ObjectFifoCreateOp: AIE_Op<"objectfifo", [HasParent<"DeviceOp">, Symbol] BDDimLayoutArrayArrayAttr:$dimensionsFromStreamPerConsumer, DefaultValuedAttr:$via_DMA, DefaultValuedAttr:$plio, - // disable_synchronization==true will skip lock generation for + // disable_synchronization==true will skip lock generation for // objectfifo synchronous accesses DefaultValuedAttr:$disable_synchronization, // via_shared_mem==0 means use producer tile's memory module @@ -1915,7 +1915,7 @@ def AIE_ObjectFifoSubviewAccessOp : AIE_Op<"objectfifo.subview.access", []> { }]; let arguments = ( - ins AIE_ObjectFifoSubviewType:$subview, + ins AIE_ObjectFifoSubviewType:$subview, ConfinedAttr]>:$index ); @@ -2007,11 +2007,11 @@ def AIE_ObjectFifoRegisterProcessOp: AIE_Op<"objectfifo.register_process", []> { def AIE_BDChainOp: AIE_Op<"bd_chain", [Symbol, SkipAccessibilityCheckTrait]> { let summary = "Definition of a Parametrizable Chain of Buffer Descriptors"; let description = [{ - This operation allows you to define buffer descriptor chains with parametrizable inputs. + This operation allows you to define buffer descriptor chains with parametrizable inputs. This is useful for common patterns such as double buffering (ping-pong) that may look identical but use different input/output buffers and locks. Currently, only buffers and locks are parametrizable. - Once defined, an abstract BD chain can be used elsewhere using AIEX ops in the runtime sequence. + Once defined, an abstract BD chain can be used elsewhere using AIEX ops in the runtime sequence. In the future, abstract BD chains will also be usable elsewhere, inside the static configuration. At its usage sites, the abstract BD chain will be concretized with the given input arguments. }]; diff --git a/include/aie/Dialect/AIE/IR/AIETargetModel.h b/include/aie/Dialect/AIE/IR/AIETargetModel.h index c20710c4ee..e2bdad6aaf 100644 --- a/include/aie/Dialect/AIE/IR/AIETargetModel.h +++ b/include/aie/Dialect/AIE/IR/AIETargetModel.h @@ -19,7 +19,7 @@ namespace xilinx::AIE { -using TileID = struct TileID { +struct TileID { // friend definition (will define the function as a non-member function in the // namespace surrounding the class). friend std::ostream &operator<<(std::ostream &os, const TileID &s) { diff --git a/include/aie/Dialect/AIE/Transforms/AIEPasses.td b/include/aie/Dialect/AIE/Transforms/AIEPasses.td index 82a01c163f..0905a5c3da 100644 --- a/include/aie/Dialect/AIE/Transforms/AIEPasses.td +++ b/include/aie/Dialect/AIE/Transforms/AIEPasses.td @@ -46,7 +46,7 @@ def AIECanonicalizeDevice : Pass<"aie-canonicalize-device", "mlir::ModuleOp"> { let description = [{ This pass inserts a toplevel device operation in designs that do not have one. This allows us to support backwards compatability for older models targetting the VC1902 - device without explicit device operations. + device without explicit device operations. }]; let constructor = "xilinx::AIE::createAIECanonicalizeDevicePass()"; @@ -129,10 +129,10 @@ def AIENormalizeAddressSpaces : Pass<"aie-normalize-address-spaces", "DeviceOp"> def AIERoutePathfinderFlows : Pass<"aie-create-pathfinder-flows", "DeviceOp"> { let summary = "Route aie.flow and aie.packetflow operations through switchboxes"; - let description = [{ - Uses Pathfinder congestion-aware algorithm. + let description = [{ + Uses Pathfinder congestion-aware algorithm. Each aie.flow is replaced with aie.connect operation. - Each aie.packetflow is replace with the set of aie.amsel, aie.masterset + Each aie.packetflow is replace with the set of aie.amsel, aie.masterset and aie.packet_rules operations. }]; @@ -143,9 +143,9 @@ def AIERoutePathfinderFlows : Pass<"aie-create-pathfinder-flows", "DeviceOp"> { let options = [ Option<"clRouteCircuit", "route-circuit", "bool", /*default=*/"true", - "Flag to enable aie.flow lowering.">, + "Flag to enable aie.flow lowering.">, Option<"clRoutePacket", "route-packet", "bool", /*default=*/"true", - "Flag to enable aie.packetflow lowering.">, + "Flag to enable aie.packetflow lowering.">, ]; } @@ -179,16 +179,16 @@ def AIEVectorOpt : Pass<"aie-vector-opt", "mlir::func::FuncOp"> { def AIEObjectFifoStatefulTransform : Pass<"aie-objectFifo-stateful-transform", "DeviceOp"> { let summary = "Instantiate the buffers and locks of aie.objectFifo.createObjectFifo operations"; let description = [{ - Replace each aie.objectFifo.createObjectFifo operation with aie.buffer and aie.lock operations in the + Replace each aie.objectFifo.createObjectFifo operation with aie.buffer and aie.lock operations in the producer tile. Convert aie.objectFifo.acquire, aie.objectFifo.release and aie.objectFifo.subviewAccess operations into useLock operations by keeping track of acquire/release operations on each objectFifo by each process. - If the producer and consumer tiles of an aie.objectFifo.createObjectFifo operation are not adjacent, the + If the producer and consumer tiles of an aie.objectFifo.createObjectFifo operation are not adjacent, the pass also establised aie.flow and aie.dma operations to enable communication between the tiles. Extend the body of each loop that contains operations on objectFifos such that it is unrolled - based on the number of elements in the objectFifos. If the number of iterations of the loop - cannot be divided pefectly by the unrolling factor, the pass duplicates the loop body after + based on the number of elements in the objectFifos. If the number of iterations of the loop + cannot be divided pefectly by the unrolling factor, the pass duplicates the loop body after the original loop. }]; @@ -210,7 +210,7 @@ def AIEObjectFifoStatefulTransform : Pass<"aie-objectFifo-stateful-transform", " def AIEObjectFifoRegisterProcess : Pass<"aie-register-objectFifos", "DeviceOp"> { let summary = "Generate acquire/release patterns for producer/consumer processes registered to an objectFifo"; let description = [{ - Generate acquire/release patterns in the CoreOps of associated cores for each + Generate acquire/release patterns in the CoreOps of associated cores for each aie.objectfifo.register_process operation. Patterns are generated as for loops of different sizes depending on input patterns. }]; @@ -227,8 +227,8 @@ def AIEObjectFifoRegisterProcess : Pass<"aie-register-objectFifos", "DeviceOp"> def AIELowerCascadeFlows : Pass<"aie-lower-cascade-flows", "DeviceOp"> { let summary = "Lower aie.cascade_flow operations through `aie.configure_cascade` operations"; let description = [{ - Replace each aie.cascade_flow operation with an equivalent set of `aie.configure_cascade` - operations. + Replace each aie.cascade_flow operation with an equivalent set of `aie.configure_cascade` + operations. }]; let constructor = "xilinx::AIE::createAIELowerCascadeFlowsPass()"; @@ -266,9 +266,9 @@ def AIEGenerateColumnControlOverlay : Pass<"aie-generate-column-control-overlay" ]; let options = [ Option<"clRouteShimCTRLToTCT", "route-shim-to-tct", "std::string", /*default=*/"\"shim-only\"", - "Flag to generate TCT routing between tile CTRL and shim SOUTH ports. Available options: ['shim-only', 'all-tiles', 'disable'].">, + "Flag to generate TCT routing between tile CTRL and shim SOUTH ports. Available options: ['shim-only', 'all-tiles', 'disable'].">, Option<"clRouteShimDmaToTileCTRL", "route-shim-to-tile-ctrl", "bool", /*default=*/"false", - "Flag to generate routing between shim dma DMA and tile CTRL ports, for configuration.">, + "Flag to generate routing between shim dma DMA and tile CTRL ports, for configuration.">, ]; } diff --git a/lib/CIR/CIRToAIEPasses.cpp b/lib/CIR/CIRToAIEPasses.cpp new file mode 100644 index 0000000000..37e91fa1ee --- /dev/null +++ b/lib/CIR/CIRToAIEPasses.cpp @@ -0,0 +1,1058 @@ +//===- CIRToAIEpasses.cpp ---------------------------------------*- C++ -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2024 Advanced Micro Devices, Inc. +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include +#include + +#include "aie/CIR/CIRToAIEPasses.h" +#include "aie/Dialect/AIE/IR/AIEDialect.h" + +#include "mlir/Analysis/TopologicalSortUtils.h" +#include "mlir/IR/Attributes.h" +#include "mlir/IR/Builders.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/BuiltinTypes.h" +#include "mlir/IR/IRMapping.h" +#include "mlir/IR/Operation.h" +#include "mlir/IR/OperationSupport.h" +#include "mlir/IR/SymbolTable.h" +#include "mlir/IR/Types.h" +#include "mlir/IR/Value.h" +#include "mlir/IR/ValueRange.h" +#include "mlir/IR/Visitors.h" +#include "mlir/Pass/Pass.h" +#include "mlir/Support/LLVM.h" +#include "mlir/Support/LogicalResult.h" +#include "mlir/Transforms/DialectConversion.h" +#include "mlir/Transforms/InliningUtils.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/CIR/LowerToMLIR.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/TypeSwitch.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "cir-to-aie" + +using namespace std::string_literals; + +namespace { +// Erase a range of Operation* and its users recursively. +// Only consider the use-def chains and not the regions of blocks yet. +template +void eraseOpsAndUsers(OpRange &&opsToErase) { + llvm::SetVector allOpsAndUsers; + llvm::SmallVector newOps{ + std::forward(opsToErase)}; + // While there are some operations to process + while (!newOps.empty()) { + auto *op = newOps.pop_back_val(); + LLVM_DEBUG(op->emitRemark("eraseOpsAndUsers: newOps.pop_back_val()")); + // If the operation has not been visited yet, add it to the set and process + // its users + if (allOpsAndUsers.insert(op)) { + LLVM_DEBUG(op->emitRemark("eraseOpsAndUsers: inserted!")); + for (auto result : op->getResults()) + for (auto *user : result.getUsers()) { + // Add each user to the visit queue + newOps.push_back(user); + LLVM_DEBUG( + user->emitRemark("eraseOpsAndUsers: append to visit queue")); + } + } + } + // To avoid erasing operations with remaining users, topologically sort the + // operations according to their use-def chains and erase them in reverse + // order + for (auto *op : allOpsAndUsers) + LLVM_DEBUG(op->emitRemark("eraseOpsAndUsers: allOpsAndUsers")); + // Does not work here + // auto sorted = mlir::topologicalSort(allOpsAndUsers); + llvm::SmallVector sorted{allOpsAndUsers.begin(), + allOpsAndUsers.end()}; + mlir::computeTopologicalSorting(sorted); + LLVM_DEBUG(for (auto *op + : sorted) + op->emitRemark("eraseOpsAndUsers: topologicalSort")); + for (auto *op : llvm::reverse(sorted)) { + LLVM_DEBUG(op->emitRemark("eraseOpsAndUsers: reverse")); + op->erase(); + } +} + +// Find in program order the first useful non cir.scope operation inside the +// root operation +mlir::Operation *findFirstNonCIRScopeOpInside(mlir::Operation *root) { + mlir::Operation *firsttNonCIRScopeOp = nullptr; + root->walk([&](mlir::Operation *op) { + if (op == root || mlir::isa(op)) + return mlir::WalkResult::advance(); + firsttNonCIRScopeOp = op; + return mlir::WalkResult::interrupt(); + }); + return firsttNonCIRScopeOp; +} +} // namespace + +namespace xilinx::AIE::CIR { + +// Analyze all the C++ types used in a module and for aie++ types deconstruct +// them and keep track of the AIE dialect operations used to produce a value of +// its type. If some aie++ type values are produced by some AIE operations, keep +// track of these operations. +class CIRToAIETypesAnalysis { +public: + // llvm::DenseMap> types; + struct AIELikeTypesDeconstruction { + // For example "aie::device" + std::string fullName; + // For example "aie::device" + std::string base; + // For example "npu1" + std::vector subMatches; + // To attach something, like the aie.tile operation for example + std::any data; + // The AIE operation which is generated + std::optional newAIEOperation; + // The new operation producing the result (if any) instead for replacement, + // typically an UnrealizedConversionCastOp fed by the newAIEOperation + std::optional newProducer; + + // Display the content of AIELikeTypesDeconstruction + void dump() { + llvm::outs() << "Fullname = " + fullName + ", base = " + base + + ", subMatches = " + llvm::join(subMatches, ", ") + << '\n'; + if (newAIEOperation) + (*newAIEOperation)->emitRemark("newAIEOperation = "); + if (newProducer) + (*newProducer)->emitRemark("newProducer = "); + } + }; + +private: + // A map from a type to its aie:: deconstruction in the case it is a pointer + // type to a well known aie:: struct + llvm::DenseMap> + moduleTypes; + + // Record whether an aie++ C++ type has been translated into some AIE + // operation producing a value related to that type + llvm::DenseSet isAIELoweredType; + +public: + void analyze() { + // A struct with a name like "aie::device" (and the "npu1" is + // used directly for the MLIR aie.device attribute) or aie::tile_t<8,50> for + // example + static const std::array typeNamePatterns{ + // A struct with a name like "aie::device.0". Drop the non-interesting unique-ing lambda + // part of the type + llvm::Regex{"^(aie::device)>" for + // example + llvm::Regex{"^(aie::tile)<([[:digit:]]+), ([[:digit:]]+), .*$"}, + llvm::Regex{"^(aie::buffer)<([^,]+), ([^>]+)>$"}}; + + for (auto &[type, value] : moduleTypes) { + if (auto maybePointerType = mlir::dyn_cast(type)) + if (auto maybeStructType = + mlir::dyn_cast(maybePointerType.getPointee())) + for (auto &tnp : typeNamePatterns) + if (llvm::SmallVector matches; + tnp.match(maybeStructType.getName(), &matches)) { + value = {.fullName = matches[0].str(), .base = matches[1].str()}; + for (auto &e : llvm::ArrayRef(matches.begin() + 2, matches.end())) + value->subMatches.emplace_back(e.str()); + // No need to look for a next match, go for the next type to + // categorize + break; + } + } + } + + // Analysis called from pass getAnalysis() + CIRToAIETypesAnalysis(mlir::ModuleOp module) { + // First register all the types used in the module + module->walk([this](mlir::Operation *op) { + for (auto result : op->getResults()) { + auto type = result.getType(); + moduleTypes.try_emplace(type, std::nullopt); + } + }); + // Deconstruct the aie++ C++ types found + analyze(); + // If some AIE lowering has already be done in a previous pass, map an aie++ + // C++ type to the AIE operation generating such a value + module->walk([this](mlir::UnrealizedConversionCastOp cast) { + LLVM_DEBUG(cast.emitRemark("CIRToAIETypesAnalysis cast")); + // Only a cast with 1 operand can have a potential AIE operation as + // operand + if (cast.getNumOperands() == 1) { + auto type = cast.getType(0); + // If this is an aie++ type + if (auto &detail = getOptionalTypeDetail(type)) { + auto *newOperation = cast.getOperand(0).getDefiningOp(); + auto dialectNamespace = newOperation->getDialect()->getNamespace(); + LLVM_DEBUG(cast.emitRemark("CIRToAIETypesAnalysis cast operand " + "with dialect namespace ") + << dialectNamespace); + // If the operation producing the value is in AIE dialect, the aie++ + // type has already been translated, so record the translation for + // this aie++ type + if (dialectNamespace == "aie") { + LLVM_DEBUG( + newOperation->emitRemark( + "CIRToAIETypesAnalysis adding newAIEOperation"); + cast->emitRemark("CIRToAIETypesAnalysis adding newProducer")); + detail.value().newAIEOperation = newOperation; + detail.value().newProducer = cast; + } + } + } + }); + } + + // Get the deconstructed AIE type details behind the aie++ C++ type + std::optional & + getOptionalTypeDetail(mlir::Type t) { + assert(moduleTypes.contains(t) && "This type should have been seen"); + return moduleTypes[t]; + } + + // Get the deconstructed AIE type details behind the aie++ C++ type + AIELikeTypesDeconstruction &getTypeDetail(mlir::Type t) { + auto &detail = getOptionalTypeDetail(t); + assert(detail && "This type should have an analysis"); + return detail.value(); + } + + // Associate to a given aie++ C++ type the lowered AIE operation operation and + // add the type to the operation as a "cir.type" type attribute for any later + // introspection + void setProducerOp(mlir::Type t, mlir::Operation *op, mlir::OpBuilder &b) { + // Keep the original aie++ type for this AIE operation with a "cir.type" + // attribute + op->setAttr("cir.type", mlir::TypeAttr::get(t)); + auto &detail = getTypeDetail(t); + detail.newAIEOperation = op; + isAIELoweredType.insert(t); + } + + // Associate to a given aie++ C++ type the operation producing the value for + // this type + void setProducerOpWithUCCast(mlir::Type t, mlir::Operation *op, + mlir::OpBuilder &b) { + setProducerOp(t, op, b); + auto &detail = getTypeDetail(t); + detail.newAIEOperation = op; + detail.newProducer = b.create( + op->getLoc(), t, mlir::ValueRange{op->getResult(0)}); + } + + // Get the optional operation producing the value for the given aie++ C++ type + auto &getProducerOp(mlir::Type t) { return getTypeDetail(t).newProducer; } + + // Get the set of aie++ C++ types which have been lowered to an AIE operation + // producing a value related to that type + auto &getAIELoweredTypes() { return isAIELoweredType; } + + // Return true if the given type has a matching AIE operation to produce a + // value related to that type + bool isAIELowered(mlir::Type t) { return isAIELoweredType.contains(t); } + + // Visit recursively from a given root operation any operand with an + // AIE-like C++ datatype + template + void visitAIEOperands(mlir::Operation *root, FunctionRef &&callBack) { + root->walk([&](mlir::Operation *op) { + for (auto &operand : op->getOpOperands()) { + auto type = operand.get().getType(); + if (this->isAIELowered(type)) { + LLVM_DEBUG(op->emitRemark("visitAIEOperands") << type); + callBack(operand); + } + } + }); + } + + // Display the analysis content + void dump() { + for (auto &[type, value] : moduleTypes) { + llvm::outs() << "Type: " << type << " value: "; + if (value) + value->dump(); + else + llvm::outs() << "None\n"; + } + } +}; + +namespace { + +// Return true if the call operation calls a function with any of the given +// string annotations +bool isCallingFunctionWithAnnotation( + cir::CallOp op, llvm::ArrayRef anyAnnotations) { + if (auto calledFunc = mlir::SymbolTable::lookupNearestSymbolFrom( + op, op.getCalleeAttr())) { + if (auto annnotations = calledFunc.getAnnotationsAttr()) + for (auto a : calledFunc.getAnnotationsAttr()) { + for (auto one : anyAnnotations) + if (mlir::cast(a).getName() == one) + return true; + } + } + return false; +} + +// Return true if the UnrealizedConversionCast operation has any of the given +// string annotations +bool isUnrealizedConversionCastWithAnnotation( + mlir::UnrealizedConversionCastOp op, + llvm::ArrayRef anyAnnotations) { + for (auto attr : op->getAttrDictionary()) + for (auto needle : anyAnnotations) + if (attr.getName() == needle) + return true; + return false; +} + +// Generate the equivalent memref type of an aie::buffer +mlir::MemRefType bufferMemrefType(mlir::Type buffer, + mlir::DataLayout &dataLayout) { + static mlir::TypeConverter typeConverter = + cir::prepareTypeConverter(dataLayout); + LLVM_DEBUG(buffer.dump()); + if (auto p = mlir::dyn_cast(buffer)) { + if (auto bufferType = mlir::dyn_cast(p.getPointee())) { + LLVM_DEBUG(bufferType.dump()); + auto members = bufferType.getMembers(); + if (auto stdArrayType = + mlir::dyn_cast(members.front())) { + // If the aie::buffer is implemented as a std::array in the buffer struct + LLVM_DEBUG(stdArrayType.dump()); + // Access the array inside the std::array struct + if (auto arrayType = mlir::dyn_cast( + stdArrayType.getMembers().front())) { + LLVM_DEBUG(arrayType.dump()); + auto memref = mlir::dyn_cast( + typeConverter.convertType(arrayType)); + LLVM_DEBUG(memref.dump()); + return memref; + } + } + if (auto arrayType = mlir::dyn_cast(members.front())) { + // If the aie::buffer is implemented as a C array in the buffer struct + LLVM_DEBUG(arrayType.dump()); + auto memref = mlir::dyn_cast( + typeConverter.convertType(arrayType)); + LLVM_DEBUG(memref.dump()); + return memref; + } + } + } + assert(false && "Cannot deconstruct the aie::buffer"); + return {}; +} + +// Since an aie.device has its own symbol table, copy recursively all the +// symbols defined at the module level which are referenced by operations inside +// an aie.device into the aie.device. +void cloneReferencedSymbolsIntoDevice(xilinx::AIE::DeviceOp device) { + // Speed-up symbol look-ups by defining some SymbolTable + mlir::SymbolTable deviceSymbolTable{device}; + auto module = device->getParentOfType(); + mlir::SymbolTable moduleSymbolTable{module}; + mlir::OpBuilder builder{device}; + // Look recursively starting from the aie.device itself + std::queue toVisit{{device}}; + while (!toVisit.empty()) { + auto *opToVisit = toVisit.front(); + toVisit.pop(); + opToVisit->walk([&](mlir::Operation *op) { + // Only look at the operations using some symbols + if (auto user = mlir::dyn_cast(op)) { + LLVM_DEBUG(op->emitRemark( + "importCalledFunctionsInSymbolTable: SymbolUserOpInterface!")); + // Look for all the symbol references used by this operation + op->getAttrDictionary().walk([&](mlir::SymbolRefAttr symbolRef) { + LLVM_DEBUG( + op->emitRemark("importCalledFunctionsInSymbolTable: symbolRef = ") + << symbolRef); + if (deviceSymbolTable.lookup(symbolRef.getRootReference())) { + LLVM_DEBUG(llvm::outs() << "In Device!\n"); + // No need to visit it again if it is already in the device + return; + } + // Get the referenced operation from the module symbol table + auto *moduleSymbol = + moduleSymbolTable.lookup(symbolRef.getRootReference()); + assert(moduleSymbol && "The symbol should be found in the module"); + LLVM_DEBUG(llvm::outs() << "In Module!\n"; moduleSymbol->emitRemark( + "importCalledFunctionsInSymbolTable: cloning...")); + // Newly discovered function not already in the device is used by + // existing code and do not refer // TODO: o internal code, so add + // it at the beginning inside the aie.device + builder.setInsertionPointToStart(device.getBody()); + auto *clone = builder.clone(*moduleSymbol); + deviceSymbolTable.insert(clone); + LLVM_DEBUG( + clone->emitRemark("importCalledFunctionsInSymbolTable: clone")); + // Need to handle any missing symbols from the newly created + // operation + toVisit.push(clone); + }); + } + }); + } +} + +// Lower C++ code like \code aie::device into an \code +// aie.device(npu1){} operation +struct PrepareDeviceLowering : public mlir::OpConversionPattern { + using mlir::OpConversionPattern::OpConversionPattern; + + // \todo Find a less ugly way to access the analysis. How is it possible for a + // pattern to access some contextual information? + // It should be OK since it is a module pass, so no parallelism here. + static inline CIRToAIETypesAnalysis *cat; + + mlir::LogicalResult + matchAndRewrite(cir::AllocaOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const final { + // The struct has a name like "aie::device" and the "npu1" + // is used directly for the MLIR aie.device attribute + if (auto aieLike = cat->getOptionalTypeDetail(op.getType()); + aieLike && aieLike->base == "aie::device") { + auto deviceName = aieLike->subMatches[0]; + auto deviceId = + xilinx::AIE::symbolizeEnum(deviceName); + if (!deviceId) + // Actually this test cannot happens since the API of + // xilinx::AIE::symbolizeEnum is strange: even if it returns a + // std::optional it errors without returning + op.emitError() << "aie::device incorrect for '" << deviceName << "'"; + // Replace the alloca of the aie::device by a temporary cast from + // thin air and add a named attribute to the device name to make things + // clearer + rewriter.replaceOpWithNewOp( + op, op.getResult().getType(), mlir::ValueRange{}, + std::array{rewriter.getNamedAttr( + aieLike->base, rewriter.getAttr(deviceName))}); + return mlir::success(); + } + return mlir::failure(); + } +}; + +// clang-format off +// Rewrite something like +// %2 = cir.alloca !ty_aie3A3Atile3C12C_43E, !cir.ptr, ["t", init] {alignment = 1 : i64} loc(#loc102) +// %4 = cir.call @_ZN3aie6deviceILNS_3$_0E42EE4tileILi1ELi4EEENS_4tileIXT_EXT0_EEEv(%1) : (!cir.ptr) -> !ty_aie3A3Atile3C12C_43E loc(#loc70) +// cir.store %4, %2 : !ty_aie3A3Atile3C12C_43E, !cir.ptr loc(#loc70) +// +// Into +// +// %2 = builtin.unrealized_conversion_cast %1 : !cir.ptr to !cir.ptr {"aie::tile" = ["1", "4"]} +// clang-format on +struct PrepareTileBufferLowering + : public mlir::OpConversionPattern { + using mlir::OpConversionPattern::OpConversionPattern; + + // \todo Find a less ugly way to access the analysis. How is it possible for a + // pattern to access some contextual information? + // It should be OK since it is a module pass, so no parallelism here. + static inline CIRToAIETypesAnalysis *cat; + + mlir::LogicalResult + matchAndRewrite(cir::CallOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const final { + if (isCallingFunctionWithAnnotation( + op, {"aie.device.tile", "aie.tile.buffer"})) { + auto device = op.getOperand(0); + auto user = op.getResult().getUsers().begin(); + // Track the alloca where the tiled is stored + auto store = mlir::dyn_cast(*user); + auto alloca = + mlir::dyn_cast(store.getOperand(1).getDefiningOp()); + auto aieLike = cat->getTypeDetail(alloca.getResult().getType()); + // Replace the alloca by a conversion to be replaced later in + // another pass. + // Keep analyzed type information as named attribute to make things + // clearer + llvm::SmallVector attrs; + for (auto e : aieLike.subMatches) + attrs.emplace_back(rewriter.getAttr(e)); + rewriter.replaceOpWithNewOp( + alloca, alloca.getResult().getType(), device, + std::array{rewriter.getNamedAttr(aieLike.base, + rewriter.getArrayAttr(attrs))}); + // Remove the now useless original operations + rewriter.eraseOp(store); + rewriter.eraseOp(op); + return mlir::success(); + } + return mlir::failure(); + } +}; + +/* + Replace the call to + + cir.func internal private + @_ZN3aie6tile_tILi1ELi4EE7programIZ4mainE3$_0EEvOT_(%arg0: + !cir.ptr, %arg1: !cir.ptr) + [#cir.annotation] extra(#fn_attr) + + which ends up calling the lambda + + cir.call @_ZZ4mainENK3$_0clEv(%5) : (!cir.ptr) -> () + + by just inlining the lambda body into the aie.core operation and replacing the + capture by the direct def/use forwarding + +*/ +struct PrepareCoreLowering : public mlir::OpConversionPattern { + using mlir::OpConversionPattern::OpConversionPattern; + + mlir::LogicalResult + matchAndRewrite(cir::CallOp op, OpAdaptor adaptor, + mlir::ConversionPatternRewriter &rewriter) const final { + if (isCallingFunctionWithAnnotation(op, {"aie.tile.program"})) { + // Get tile::program() member function + if (auto calledFunc = + mlir::SymbolTable::lookupNearestSymbolFrom( + op, op.getCalleeAttr())) { + // The last function instruction is cir.return and the one before + // is the call to the lambda + // calledFunc.getBlocks().front().back().dump(); + auto lambdaCall = mlir::dyn_cast( + *std::next(calledFunc.getBlocks().front().rbegin())); + // lambdaCall.dump(); + if (auto lambdaFunc = + mlir::SymbolTable::lookupNearestSymbolFrom( + lambdaCall, lambdaCall.getCalleeAttr())) { + // lambdaFunc.dump(); + assert(lambdaFunc.getLambda()); + // auto scopeOp = op->getParentOfType(); + // scopeOp.dump(); + // The aie++ tile value + rewriter.setInsertionPoint(op); + rewriter.eraseOp(op); + // rewriter.insert(coreOp); + // coreOp.dump(); + + // auto bs = lambdaFunc.getBlocks().begin(); + // rewriter.inlineBlockBefore(Block *source, Block *dest, + // Block::iterator before) + return mlir::success(); + } + } + } + + return mlir::failure(); + } +}; + +struct CIRToAIEPrepare : CIRToAIEPrepareBase { + void runOnOperation() override { + // Compute the analysis for the module since it is a module pass. + // \todo Should this be a real pass? + auto &cat = getAnalysis(); + // \todo Clean up this mess + PrepareDeviceLowering::cat = &cat; + PrepareTileBufferLowering::cat = &cat; + // See mlir/examples/toy/Ch5/mlir/LowerToAffineLoops.cpp + mlir::ConversionTarget target{getContext()}; + target.addLegalDialect(); + target.addLegalOp(); + target.addDynamicallyLegalOp([&](cir::AllocaOp op) { + // If the struct has a name like "aie::device", mark + // the operation illegal so it has to be rewritten + auto aieLike = cat.getOptionalTypeDetail(op.getType()); + return !(aieLike && aieLike->base == "aie::device"); + }); + target.addDynamicallyLegalOp([](cir::CallOp op) { + return !isCallingFunctionWithAnnotation( + op, {"aie.device.tile", "aie.tile.buffer"}); + }); + mlir::RewritePatternSet patterns{&getContext()}; + patterns.add(&getContext()); + patterns.add(&getContext()); + // patterns.add(&getContext()); + if (failed(applyPartialConversion(getOperation(), target, + std::move(patterns)))) + signalPassFailure(); + } +}; + +struct CIRToAIE : CIRToAIEBase { + // \todo Find a less ugly way to access the analysis. How is it possible for a + // pattern to access some contextual information? + // It should be OK since it is a module pass, so no parallelism here. + static inline CIRToAIETypesAnalysis *cat; + // Used to lower some datatypes like unions. + mlir::DataLayout dataLayout; + + // Try to lower the operation as an aie.buffer and return true on success + // + // clang-format off + // %3 = builtin.unrealized_conversion_cast %2 : !cir.ptr to !cir.ptr {"aie::buffer" = ["int", "8192UL"]} + // is lowered to + // %buffer_1_4 = aie.buffer(%tile_1_4) : memref<8192xi32> + // %8 = builtin.unrealized_conversion_cast %buffer_1_4 : memref<8192xi32> to !cir.ptr + // clang-format on + bool tryBufferLowering(mlir::Operation *op, mlir::OpBuilder &b) { + if (auto bufCast = mlir::dyn_cast(op)) { + if (auto bufferDetail = cat->getTypeDetail(bufCast.getType(0)); + bufferDetail.base == "aie::buffer") { + LLVM_DEBUG(bufCast.emitRemark("Buffer cast from tile")); + auto mrt = bufferMemrefType(bufCast.getType(0), dataLayout); + // \todo outline + auto tileDetail = cat->getTypeDetail(bufCast.getOperand(0).getType()); + auto tileOp = + mlir::dyn_cast(*tileDetail.newAIEOperation); + if (!tileOp) + bufCast->emitError("No aie.device operation found for this tile"); + // Insert at the end of the aie.device to keep C++ program order + auto deviceOp = tileOp->getParentOfType(); + b.setInsertionPoint(deviceOp.getBody()->getTerminator()); + // The direct connection to tileOp is a peephole optimization but it + // could be connected to the new tileOp UnrealizedConversionCastOp which + // could be removed later by a cleaning phase + auto bufferOp = b.create(bufCast.getLoc(), mrt, + tileOp.getResult()); + // Keep track of the buffer op behind the C++ type + cat->setProducerOpWithUCCast(bufCast.getType(0), bufferOp, b); + // Do not remap the old buffer users to the new one for now because the + // new buffer is created inside the aie.device and the old users would + // not be able to access it. Rely on the tile::program lowering for this + // later. + + // The lowering is a success, no need to look further. + return true; + } + } + // Notify to try something else + return false; + } + + // During operation cloning, mlir::IRMapping is used to remap some leaf input + // operands but cannot remap some internal ones. In some case, ClangIR lower + // some lambda captures with aie::tile or with aie::device (). Having the + // aie::device is problematic since it is remapped to the aie.device output + // which leads to 2 issues for the verifyer: + // + // - this use is inside the device operation itself + // + // - the aie.device region is isolated from above. + // + // Since this aie::device is used only by an aie::tile, just remove the + // aie::device part. + void resolveSomeDeviceToTileAfterCloning(mlir::Operation *clone) { + llvm::SmallVector oldCastsFromDevice; + cat->visitAIEOperands(clone, [&](mlir::OpOperand &operand) { + if (cat->getTypeDetail(operand.get().getType()).base == "aie::device") { + auto cast = mlir::dyn_cast( + operand.getOwner()); + assert(cast && "There should be only an UnrealizedConversionCastOp " + "using the aie::device"); + // Connect directly the aie::tile user to the one produced by the + // matching aie.tile + cast.replaceAllUsesWith(cat->getProducerOp(cast.getType(0)).value()); + oldCastsFromDevice.push_back(cast); + } + }); + // Remove the problematic operations + eraseOpsAndUsers(oldCastsFromDevice); + } + + // Lower aie::tile::program() to aie.core + bool tryTileProgramLowering(mlir::Operation *op, mlir::OpBuilder &b) { + if (auto callOp = mlir::dyn_cast(op)) { + LLVM_DEBUG( + callOp.emitRemark("tryTileProgramLowering: CallOp using a tile")); + if (isCallingFunctionWithAnnotation(callOp, {"aie.tile.program"})) { + LLVM_DEBUG( + callOp.emitRemark("tryTileProgramLowering: CallOp using a tile")); + if (auto calledFunc = + mlir::SymbolTable::lookupNearestSymbolFrom( + callOp, callOp.getCalleeAttr())) { + // The last function instruction is cir.return and the one before + // is the call to the lambda + if (auto lambdaCall = mlir::dyn_cast( + *std::next(calledFunc.getBlocks().front().rbegin()))) { + LLVM_DEBUG(lambdaCall.emitRemark("lambdaCall")); + if (auto lambdaFunc = + mlir::SymbolTable::lookupNearestSymbolFrom( + lambdaCall, lambdaCall.getCalleeAttr())) { + LLVM_DEBUG(lambdaFunc.emitRemark( + "tryTileProgramLowering: Tile core lambda")); + assert(lambdaFunc.getLambda()); + auto scopeOp = callOp->getParentOfType(); + LLVM_DEBUG(scopeOp.emitRemark("tryTileProgramLowering: Scope")); + // \todo outline + auto tileDetail = + cat->getTypeDetail(callOp.getOperand(0).getType()); + auto tileOp = mlir::dyn_cast( + *tileDetail.newAIEOperation); + if (!tileOp) + LLVM_DEBUG(callOp->emitError( + "No aie.device operation found for this tile")); + // Create the aie.core before the aie.end of the aie.device body + // to keep the C++ order + auto deviceOp = tileOp->getParentOfType(); + b.setInsertionPoint(deviceOp.getBody()->getTerminator()); + + auto coreOp = + b.create(callOp.getLoc(), tileOp); + // Create the empty block of the aie.core op region + coreOp.getRegion().emplaceBlock(); + // The aie.core requires a terminator + b.setInsertionPointToEnd(&coreOp.getRegion().front()); + // Add right away an aie.end to have the verifyers happy even if + // it makes the following more complicated + b.create(callOp.getLoc()); + LLVM_DEBUG( + coreOp.emitRemark("tryTileProgramLowering: Brand-new core")); + // Get the cast connecting the aie.tile to the lambda call + auto tileCastOp = + callOp.getArgOperand(0) + .getDefiningOp(); + LLVM_DEBUG( + tileCastOp.emitRemark("tryTileProgramLowering: tileCastOp as " + "callOp first argument")); + // Values can be replaced while cloning, not operations + mlir::IRMapping irm; + // Compute the remapping to be done while cloning from the old + // operands to the new one produced by the lowered AIE operations + cat->visitAIEOperands(scopeOp, [&](auto &operand) { + // Remap only if there is interesting result. Skip aie.device + // for example + if (auto producer = cat->getProducerOp(operand.get().getType())) + irm.map(operand.get(), producer.value()->getResult(0)); + }); + b.setInsertionPointToStart(&coreOp.getRegion().front()); + auto *clone = b.clone(*scopeOp.getOperation(), irm); + // Since aie.device has a SymbolTable, all the called functions + // need to be present in the aie.device + cloneReferencedSymbolsIntoDevice( + clone->getParentOfType()); + LLVM_DEBUG(clone->emitRemark("tryTileProgramLowering: Clone")); + resolveSomeDeviceToTileAfterCloning(clone); + LLVM_DEBUG( + coreOp.emitRemark("tryTileProgramLowering: Stuffed core"); + coreOp->getParentOfType().emitRemark( + "tryTileProgramLowering: Top function")); + } + } + // The bufCast should be removed as a dependent of tile cast + // later. The lowering is a success, no need to look further. + return true; + } + } + } + // Notify to try something else + return false; + } + + // Try to lower the operation as an aie.tile and return true on success + // + // clang-format off + // %2 = builtin.unrealized_conversion_cast %1 : !cir.ptr to !cir.ptr {"aie::tile" = ["1", "4"]} + // is lowered to + // %tile_1_4 = aie.tile(1, 4) + // %7 = builtin.unrealized_conversion_cast %tile_1_4 : index to !cir.ptr + // clang-format on + bool tryTileLowering(mlir::Operation *op, mlir::OpBuilder &b) { + if (auto tileCast = mlir::dyn_cast(op)) { + auto tileCastOutputType = tileCast.getType(0); + if (auto detail = cat->getTypeDetail(tileCastOutputType); + detail.base == "aie::tile") { + auto col = detail.subMatches[0]; + auto row = detail.subMatches[1]; + LLVM_DEBUG(tileCast.emitRemark("tryTileLowering: tileCast from device") + << ", col = " << col << ", row = " << row); + auto deviceDetail = + cat->getTypeDetail(tileCast.getOperand(0).getType()); + auto deviceOp = mlir::dyn_cast( + *deviceDetail.newAIEOperation); + if (!deviceOp) + tileCast->emitError("No aie.device operation found for this tile"); + // Create all the following code inside the device region. Add the tile + // to the end to keep C++ program order. + b.setInsertionPoint(deviceOp.getBody()->getTerminator()); + auto tileOp = b.create( + tileCast.getLoc(), std::stoi(col), std::stoi(row)); + cat->setProducerOpWithUCCast(tileCastOutputType, tileOp, b); + // Do not remap the old tile users to the new one for now because the + // new tile is created inside the aie.device and the old users would not + // be able to access it. Rely on the tile::program lowering for this + // later. + + // Tile lowering is done + return true; + } + } + // Try some other lowering + return false; + } + + // Try to lower the operation as an aie.device and return true on success + // + // clang-format off + // %1 = builtin.unrealized_conversion_cast to !cir.ptr {"aie::device" = "npu1"} + // is lowered to + // %1 = aie.device(npu1) { + // } + // %2 = builtin.unrealized_conversion_cast %1 : index to !cir.ptr + // clang-format on + bool tryDeviceLowering(mlir::Operation *op, mlir::OpBuilder &b) { + if (auto u = mlir::dyn_cast(op)) { + if (!isUnrealizedConversionCastWithAnnotation(u, {"aie::device"})) + // Try some other lowering + return false; + LLVM_DEBUG(u.emitRemark( + "DeviceLowering found UnrealizedConversionCastOp inside the module")); + auto aieLike = cat->getTypeDetail(u.getType(0)); + auto deviceName = aieLike.subMatches[0]; + auto deviceId = + xilinx::AIE::symbolizeEnum(deviceName); + if (!deviceId) + // Actually this test cannot happens since the API of + // xilinx::AIE::symbolizeEnum is strange: even if it returns a + // std::optional it errors without returning + u.emitError("aie::device incorrect for '") << deviceName << "'"; + // Create an aie.device just before its equivalent + // UnrealizedConversionCast. Since we visit in pre-order mode, this + // should be fine. + b.setInsertionPoint(u); + auto deviceOp = b.create(u.getLoc(), *deviceId); + // The aie.device requires one block and a terminator + b.setInsertionPointToEnd(&deviceOp.getRegion().emplaceBlock()); + b.create(u.getLoc()); + // Keep for now the UnrealizedConversionCastOp for the aie.device since + // aie.device do not returns value + cat->setProducerOp(u.getType(0), deviceOp, b); + // Note: aie.device does not require a terminator + LLVM_DEBUG(deviceOp.emitRemark("DeviceLowering: end")); + return true; + } + return false; + } + + void runOnOperation() override { + // Compute the analysis for the module since it is a module pass. + cat = &getAnalysis(); + auto module = getOperation(); + mlir::DataLayout dataLayout{module}; + mlir::OpBuilder b{module}; + // Use pre-order walk to keep the C++ ordered semantics while lowering the + // AIE constructs + module->walk([&](mlir::Operation *op) { + tryBufferLowering(op, b) || tryDeviceLowering(op, b) || + tryTileLowering(op, b) || tryTileProgramLowering(op, b); + }); + } +}; + +// Inline the kernel lambda and its calling functions found in an aie.core +// operation +struct CIRToAIEInlineKernelLambda + : CIRToAIEInlineKernelLambdaBase { + // \todo Find a less ugly way to access the analysis. How is it possible for a + // pattern to access some contextual information? + // It should be OK since it is a module pass, so no parallelism here. + static inline CIRToAIETypesAnalysis *cat; + + static void inlineAndEraseCall(cir::CallOp call, cir::FuncOp calledFunc) { + LLVM_DEBUG(auto *entryBlock = &calledFunc.getCallableRegion()->front(); + calledFunc.emitRemark("CIRToAIEInlineKernelLambda calledFunc") + << "call.getNumOperands()" << call.getNumOperands() + << "entryBlock->getNumArguments()" + << entryBlock->getNumArguments() << "call.getNumResults()" + << call.getNumResults() << "calledFunc.getResultTypes().size()" + << calledFunc.getResultTypes().size() + << "calledFunc.getNumResults()" << calledFunc.getNumResults() + << "calledFunc.getCallableResults().size()" + << calledFunc.getCallableResults().size()); + mlir::InlinerInterface interface{call.getContext()}; + if (mlir::inlineCall(interface, call, calledFunc, + calledFunc.getCallableRegion()) + .failed()) + call.emitError("CIRToAIEInlineKernelLambdaBase not able to " + "inline the lambda call"); + // Only erase the call since the callee could be used in another + // context. Rely later on --symbol-dce pass. + call.erase(); + } + + void runOnOperation() override { + // Compute the analysis for the module since it is a module pass. + cat = &getAnalysis(); + auto module = getOperation(); + mlir::OpBuilder b{module}; + // Use pre-order walk to keep the C++ ordered semantics while lowering the + // AIE constructs + module->walk([&](xilinx::AIE::CoreOp core) { + LLVM_DEBUG(core.emitRemark("CIRToAIEInlineKernelLambda aie.core")); + if (auto scope = + mlir::dyn_cast(core.getBody().front().front())) { + LLVM_DEBUG(scope.emitRemark("CIRToAIEInlineKernelLambda cir.scope")); + if (auto call = mlir::dyn_cast( + *std::next(scope.getScopeRegion().front().rbegin()))) { + LLVM_DEBUG(call.emitRemark("CIRToAIEInlineKernelLambda call")); + if (auto calledFunc = + mlir::SymbolTable::lookupNearestSymbolFrom( + call, call.getCalleeAttr())) { + // The last function instruction is cir.return and the one before + // is the call to the lambda + if (auto lambdaCall = mlir::dyn_cast( + *std::next(calledFunc.getBlocks().front().rbegin()))) { + LLVM_DEBUG(lambdaCall.emitRemark("lambdaCall")); + if (auto lambdaFunc = + mlir::SymbolTable::lookupNearestSymbolFrom( + lambdaCall, lambdaCall.getCalleeAttr())) { + LLVM_DEBUG(lambdaFunc.emitRemark( + "CIRToAIEInlineKernelLambda: Tile core lambda")); + if (lambdaFunc.getLambda()) { + inlineAndEraseCall(call, calledFunc); + LLVM_DEBUG(core.emitRemark( + "CIRToAIEInlineKernelLambda: core after first inlining")); + if (auto finalCall = mlir::dyn_cast(*std::next( + scope.getScopeRegion().front().rbegin()))) { + inlineAndEraseCall( + finalCall, + mlir::SymbolTable::lookupNearestSymbolFrom( + finalCall, finalCall.getCalleeAttr())); + } + } + } + } + } + } + } + // No need to dive further this aie.core operation since they cannot be + // nested + return mlir::WalkResult::skip(); + }); + } +}; + +struct CIRToAIEDecaptureKernel + : CIRToAIEDecaptureKernelBase { + void runOnOperation() override { + auto module = getOperation(); + mlir::OpBuilder b{module}; + // Use pre-order walk for early exit + module->walk([&](xilinx::AIE::CoreOp core) { + LLVM_DEBUG(core.emitRemark("CIRToAIEDecaptureKernel aie.core")); + if (auto alloca = mlir::dyn_cast_if_present( + findFirstNonCIRScopeOpInside(core))) { + LLVM_DEBUG(alloca.emitRemark("CIRToAIEInlineKernelLambda: alloca")); + // Track the value loaded from or stored into each capture member + llvm::DenseMap loads, stores; + for (auto *u : alloca.getResult().getUsers()) + if (auto gm = mlir::dyn_cast(u)) { + auto memberName = gm.getName(); + llvm::TypeSwitch( + *gm.getResult().getUsers().begin()) + .Case( + [&](cir::StoreOp s) { stores[memberName] = s.getValue(); }) + .Case([&](cir::LoadOp l) { loads[memberName] = l.getResult(); }) + .Default([&](auto op) { + op->emitError( + "CIRToAIEInlineKernelLambda unknown user for member ") + << memberName; + }); + } else + u->emitError("CIRToAIEInlineKernelLambda unknown use for alloca"); + // Connect directly all the capture read users to the capture stored + // values + for (auto &&[memberName, value] : loads) + value.replaceAllUsesWith(stores[memberName]); + // Remove all the lambda capture leftover + eraseOpsAndUsers(alloca); + } + // No need to dive further this aie.core operation since they cannot be + // nested + return mlir::WalkResult::skip(); + }); + } +}; + +struct CIRKeepAIEDevice : CIRKeepAIEDeviceBase { + void runOnOperation() override { + auto module = getOperation(); + mlir::OpBuilder b{module}; + llvm::SmallVector opToDelete; + xilinx::AIE::DeviceOp d; + // Use pre-order walk for early exit to pick the first aie.device for now + module->walk([&](mlir::Operation *op) { + if (!mlir::isa(op)) + return mlir::WalkResult::advance(); + d = mlir::cast(op); + return mlir::WalkResult::interrupt(); + }); + // Extract the aie.device from its function up to the top first + // operation of the module + if (d) + d->moveBefore(&module.front()); + // Erase all the top-module operations but the aie.device + for (auto &op : module) { + if (auto dev = mlir::dyn_cast(op)) + if (dev == d) + continue; + opToDelete.push_back(&op); + } + eraseOpsAndUsers(opToDelete); + } +}; + +} // namespace + +std::unique_ptr> +createCIRToAIEPreparePass() { + return std::make_unique(); +} + +std::unique_ptr> createCIRToAIEPass() { + return std::make_unique(); +} + +std::unique_ptr> +createCIRToAIEInlineKernelLambdaPass() { + return std::make_unique(); +} + +std::unique_ptr> +createCIRToAIEDecaptureKernelPass() { + return std::make_unique(); +} + +std::unique_ptr> createCIRKeepAIEDevice() { + return std::make_unique(); +} + +} // namespace xilinx::AIE::CIR diff --git a/lib/CIR/CMakeLists.txt b/lib/CIR/CMakeLists.txt new file mode 100644 index 0000000000..9a01c6f3a8 --- /dev/null +++ b/lib/CIR/CMakeLists.txt @@ -0,0 +1,23 @@ +# +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# +# (c) Copyright 2024 Advanced Micro Devices, Inc. + +add_mlir_dialect_library( + CIRToAIEPasses + CIRToAIEPasses.cpp + ADDITIONAL_HEADER_DIRS + ${AIE_BINARY_DIR}/include + + DEPENDS + MLIRCIRToAIEPassesIncGen + + LINK_LIBS PUBLIC + clangCIR + MLIRIR + MLIRPass + MLIRSupport + MLIRTransformUtils + MLIRFuncDialect) diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index de63a7bbc8..94b5c3339c 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -6,8 +6,11 @@ # (c) Copyright 2021 Xilinx Inc. -add_subdirectory(Targets) -add_subdirectory(Dialect) add_subdirectory(CAPI) +if (CLANGIR_MLIR_FRONTEND) + add_subdirectory(CIR) +endif() +add_subdirectory(Dialect) add_subdirectory(Conversion) add_subdirectory(Target) +add_subdirectory(Targets) diff --git a/python/compiler/aiecc/main.py b/python/compiler/aiecc/main.py index e27d6b9636..142313aab2 100644 --- a/python/compiler/aiecc/main.py +++ b/python/compiler/aiecc/main.py @@ -211,7 +211,8 @@ def emit_design_kernel_json( def emit_partition(mlir_module_str, kernel_id="0x901", start_columns=None): - with Context(), Location.unknown(): + with Context() as ctx, Location.unknown(): + ctx.allow_unregistered_dialects = True module = Module.parse(mlir_module_str) device = find_ops( module.operation, @@ -276,7 +277,8 @@ def emit_partition(mlir_module_str, kernel_id="0x901", start_columns=None): def generate_cores_list(mlir_module_str): - with Context(), Location.unknown(): + with Context() as ctx, Location.unknown(): + ctx.allow_unregistered_dialects = True module = Module.parse(mlir_module_str) return [ ( @@ -336,6 +338,7 @@ def run_passes(pass_pipeline, mlir_module_str, outputfile=None, verbose=False): if verbose: print("Running:", pass_pipeline) with Context() as ctx, Location.unknown(): + ctx.allow_unregistered_dialects = True module = Module.parse(mlir_module_str) pm = PassManager.parse(pass_pipeline) try: @@ -568,7 +571,7 @@ async def process_core( async def process_cdo(self): from aie.dialects.aie import generate_cdo - with Context(), Location.unknown(): + with Context() as ctx, Location.unknown(): for elf in glob.glob("*.elf"): try: shutil.copy(elf, self.tmpdirname) @@ -579,6 +582,7 @@ async def process_cdo(self): shutil.copy(elf_map, self.tmpdirname) except shutil.SameFileError: pass + ctx.allow_unregistered_dialects = True input_physical = Module.parse( await read_file_async(self.prepend_tmp("input_physical.mlir")) ) @@ -1276,6 +1280,10 @@ def main(): try: with Context() as ctx, Location.unknown(): + # To avoid + # Unable to parse module assembly: + # error: "-":1:101: #"cir"<"lang"> : 'none' attribute created with unregistered dialect. If this is intended, please call allowUnregisteredDialects() on the MLIRContext, or use -allow-unregistered-dialect with the MLIR opt tool used + ctx.allow_unregistered_dialects = True with open(opts.filename, "r") as f: module = Module.parse(f.read()) module_str = str(module) diff --git a/test/CIR/aie++/channel.cpp b/test/CIR/aie++/channel.cpp new file mode 100644 index 0000000000..bc647e054f --- /dev/null +++ b/test/CIR/aie++/channel.cpp @@ -0,0 +1,35 @@ +/* + Iron MICRO24 slide 34. + https://github.com/Xilinx/mlir-aie/blob/main/mlir_tutorials +*/ + +#include "aie++.hpp" +#include + +void consume(std::int32_t in[256]) { + // ... +} + +void produce(std::int32_t out[][256]) { + // ... +} + +int main() { + aie::device d; + auto a = d.tile<1, 3>(); + auto b = d.tile<2, 3>(); + auto of = a.channel_to(b, 3); + a.program([&] { + for (int i = 0; i < 3; ++i) { + auto acc = of.out_acquire_release(1); + produce(acc); + } + }); + b.program([&] { + auto acc = of.in_acquire_release(3); + consume(acc[0]); + consume(acc[1]); + consume(acc[2]); + }); + d.run(); +} diff --git a/test/CIR/aie++/empty.cpp b/test/CIR/aie++/empty.cpp new file mode 100644 index 0000000000..ba41dcb9f8 --- /dev/null +++ b/test/CIR/aie++/empty.cpp @@ -0,0 +1,22 @@ +/* A tile with an empty program + + Show that declaration of multiple devices of the same kind are actually + different types, allowing 2 devices in the same platform distinguished in a + type-safe way +*/ + +#include "aie++.hpp" + +int main() { + aie::device d; + d.tile<2, 3>().program([] {}); + d.run(); + // Check we can get another type for another accelerator of the same kind + aie::device other_device_unused; + auto unused_tile = other_device_unused.tile<2, 3>(); + + // Check the type safety + static_assert(!std::is_same_v); + static_assert( + !std::is_same_v()), decltype(unused_tile)>); +} diff --git a/test/CIR/aie++/example.cpp b/test/CIR/aie++/example.cpp new file mode 100644 index 0000000000..0002b2f7d2 --- /dev/null +++ b/test/CIR/aie++/example.cpp @@ -0,0 +1,33 @@ +/* +https://github.com/Xilinx/mlir-aie/blob/main/mlir_tutorials/tutorial-1/answers/aie_q5.mlir +module @tutorial_1 { + // Declare tile object of the AIE class located at position col 1, row 4 + %tile14 = aie.tile(1, 4) + + // Declare buffer for tile(1, 4) with symbolic name "a14" and + // size 256 deep x int32 wide. By default, the address of + // this buffer begins after the stack (1024 Bytes offset) and + // all subsequent buffers are allocated one after another in memory. + %buf = aie.buffer(%tile14) { sym_name = "a14" } : memref<8192xi32> + + // Define the algorithm for the core of tile(1, 4) + // buf[3] = 14 + %core14 = aie.core(%tile14) { + %val = arith.constant 14 : i32 // declare a constant (int32) + %idx = arith.constant 3 : index // declare a constant (index) + memref.store %val, %buf[%idx] : memref<8192xi32> // store val in buf[3] + aie.end + } +} +*/ + +#include "aie++.hpp" + +int main() { + aie::device d; + auto t = d.tile<1, 4>(); + auto b = t.buffer(); + t.program([&] { b[3] = 14; }); + d.tile<2, 3>().program([] {}); + d.run(); +} diff --git a/test/CIR/lowering/example.cir b/test/CIR/lowering/example.cir new file mode 100644 index 0000000000..0df6b7991e --- /dev/null +++ b/test/CIR/lowering/example.cir @@ -0,0 +1,230 @@ +!s32i = !cir.int +!u64i = !cir.int +!u8i = !cir.int +#fn_attr = #cir, optnone = #cir.optnone})> +#fn_attr1 = #cir, nothrow = #cir.nothrow, optnone = #cir.optnone})> +#fn_attr2 = #cir +#loc3 = loc("./aie++.hpp":69:16) +#loc9 = loc("./aie++.hpp":37:61) +#loc19 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":206:7) +#loc20 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":206:18) +#loc21 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":206:28) +#loc29 = loc("example.cpp":30:13) +#loc36 = loc("./aie++.hpp":45:8) +#loc37 = loc("./aie++.hpp":45:16) +#loc38 = loc("./aie++.hpp":45:23) +#loc45 = loc("example.cpp":31:26) +#loc49 = loc("./aie++.hpp":74:18) +!ty_aie3A3Adevice3Caie3A3Anpu13E = !cir.struct" {!cir.int}> +!ty_aie3A3Atile_t3C12C_43E = !cir.struct" {!cir.int}> +!ty_aie3A3Atile_t3C22C_33E = !cir.struct" {!cir.int}> +!ty_anon2E1_ = !cir.struct}> +#loc77 = loc(fused[#loc20, #loc21]) +#loc82 = loc(fused[#loc37, #loc38]) +!ty_std3A3Aarray3Cint2C_8192UL3E = !cir.struct" {!cir.array x 8192>}> +!ty_anon2E0_ = !cir.struct" {!cir.array x 8192>}>>}> +module @"/home/rkeryell/Xilinx/Projects/AIE/ACDC/air++/example.cpp" attributes {cir.global_annotations = #cir], ["_ZN3aie6tile_tILi1ELi4EE6bufferIiLm8192EEESt5arrayIT_XT0_EEv", #cir.annotation], ["_ZN3aie6tile_tILi1ELi4EE7programIZ4mainE3$_0EEvOT_", #cir.annotation], ["_ZN3aie6deviceILNS_3$_0E42EE4tileILi2ELi3EEENS_6tile_tIXT_EXT0_EEEv", #cir.annotation], ["_ZN3aie6tile_tILi2ELi3EE7programIZ4mainE3$_1EEvOT_", #cir.annotation]]>, cir.lang = #cir.lang, cir.sob = #cir.signed_overflow_behavior, cir.triple = "x86_64-unknown-linux-gnu", dlti.dl_spec = #dlti.dl_spec<#dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry, dense<32> : vector<4xi64>>, #dlti.dl_entry, dense<64> : vector<4xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<2xi64>>, #dlti.dl_entry : vector<4xi64>>, #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.stack_alignment", 128 : i64>>, llvm.data_layout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"} { + cir.func linkonce_odr @_ZN3aie6deviceILNS_3$_0E42EE4tileILi1ELi4EEENS_6tile_tIXT_EXT0_EEEv(%arg0: !cir.ptr loc("./aie++.hpp":69:16)) -> !ty_aie3A3Atile_t3C12C_43E [#cir.annotation] extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc3) + %1 = cir.alloca !ty_aie3A3Atile_t3C12C_43E, !cir.ptr, ["__retval"] {alignment = 1 : i64} loc(#loc2) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc4) + %2 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc3) + %3 = cir.load %1 : !cir.ptr, !ty_aie3A3Atile_t3C12C_43E loc(#loc71) + cir.return %3 : !ty_aie3A3Atile_t3C12C_43E loc(#loc71) + } loc(#loc70) + cir.func linkonce_odr @_ZN3aie6tile_tILi1ELi4EE6bufferIiLm8192EEESt5arrayIT_XT0_EEv(%arg0: !cir.ptr loc("./aie++.hpp":37:61)) -> !ty_std3A3Aarray3Cint2C_8192UL3E [#cir.annotation] extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc9) + %1 = cir.alloca !ty_std3A3Aarray3Cint2C_8192UL3E, !cir.ptr, ["__retval"] {alignment = 4 : i64} loc(#loc8) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc10) + %2 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc9) + %3 = cir.const #cir.zero : !ty_std3A3Aarray3Cint2C_8192UL3E loc(#loc73) + cir.store %3, %1 : !ty_std3A3Aarray3Cint2C_8192UL3E, !cir.ptr loc(#loc73) + %4 = cir.get_member %1[0] {name = "_M_elems"} : !cir.ptr -> !cir.ptr> loc(#loc13) + %5 = cir.load %1 : !cir.ptr, !ty_std3A3Aarray3Cint2C_8192UL3E loc(#loc74) + cir.return %5 : !ty_std3A3Aarray3Cint2C_8192UL3E loc(#loc74) + } loc(#loc72) + cir.global "private" internal dsolocal @_ZN3aie6tile_tILi1ELi4EE9tile_codeIPZ4mainE3$_0EE = #cir.ptr : !cir.ptr {alignment = 8 : i64} loc(#loc75) + cir.func linkonce_odr @_ZNSt5arrayIiLm8192EEixEm(%arg0: !cir.ptr loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":206:7), %arg1: !u64i loc(fused[#loc20, #loc21])) -> !cir.ptr extra(#fn_attr1) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc19) + %1 = cir.alloca !u64i, !cir.ptr, ["__n", init] {alignment = 8 : i64} loc(#loc77) + %2 = cir.alloca !cir.ptr, !cir.ptr>, ["__retval"] {alignment = 8 : i64} loc(#loc18) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc22) + cir.store %arg1, %1 : !u64i, !cir.ptr loc(#loc22) + %3 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc19) + %4 = cir.get_member %3[0] {name = "_M_elems"} : !cir.ptr -> !cir.ptr> loc(#loc13) + %5 = cir.load %1 : !cir.ptr, !u64i loc(#loc23) + %6 = cir.cast(array_to_ptrdecay, %4 : !cir.ptr>), !cir.ptr loc(#loc24) + %7 = cir.ptr_stride(%6 : !cir.ptr, %5 : !u64i), !cir.ptr loc(#loc24) + cir.store %7, %2 : !cir.ptr, !cir.ptr> loc(#loc78) + %8 = cir.load %2 : !cir.ptr>, !cir.ptr loc(#loc78) + cir.return %8 : !cir.ptr loc(#loc78) + } loc(#loc76) + cir.func lambda internal private @_ZZ4mainENK3$_0clEv(%arg0: !cir.ptr loc("example.cpp":30:13)) extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc29) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc30) + %1 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc29) + %2 = cir.const #cir.int<14> : !s32i loc(#loc31) + %3 = cir.get_member %1[0] {name = "b"} : !cir.ptr -> !cir.ptr> loc(#loc32) + %4 = cir.load %3 : !cir.ptr>, !cir.ptr loc(#loc32) + %5 = cir.const #cir.int<3> : !s32i loc(#loc33) + %6 = cir.cast(integral, %5 : !s32i), !u64i loc(#loc33) + %7 = cir.call @_ZNSt5arrayIiLm8192EEixEm(%4, %6) : (!cir.ptr, !u64i) -> !cir.ptr extra(#fn_attr2) loc(#loc32) + cir.store %2, %7 : !s32i, !cir.ptr loc(#loc80) + cir.return loc(#loc28) + } loc(#loc79) + cir.func internal private @_ZN3aie6tile_tILi1ELi4EE7programIZ4mainE3$_0EEvOT_(%arg0: !cir.ptr loc("./aie++.hpp":45:8), %arg1: !cir.ptr loc(fused[#loc37, #loc38])) [#cir.annotation] extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc36) + %1 = cir.alloca !cir.ptr, !cir.ptr>, ["code", init] {alignment = 8 : i64} loc(#loc82) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc39) + cir.store %arg1, %1 : !cir.ptr, !cir.ptr> loc(#loc39) + %2 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc36) + %3 = cir.load %1 : !cir.ptr>, !cir.ptr loc(#loc40) + %4 = cir.get_global @_ZN3aie6tile_tILi1ELi4EE9tile_codeIPZ4mainE3$_0EE : !cir.ptr> loc(#loc75) + cir.store %3, %4 : !cir.ptr, !cir.ptr> loc(#loc83) + %5 = cir.load %1 : !cir.ptr>, !cir.ptr loc(#loc42) + cir.call @_ZZ4mainENK3$_0clEv(%5) : (!cir.ptr) -> () loc(#loc42) + cir.return loc(#loc35) + } loc(#loc81) + cir.func linkonce_odr @_ZN3aie6deviceILNS_3$_0E42EE4tileILi2ELi3EEENS_6tile_tIXT_EXT0_EEEv(%arg0: !cir.ptr loc("./aie++.hpp":69:16)) -> !ty_aie3A3Atile_t3C22C_33E [#cir.annotation] extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc3) + %1 = cir.alloca !ty_aie3A3Atile_t3C22C_33E, !cir.ptr, ["__retval"] {alignment = 1 : i64} loc(#loc2) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc4) + %2 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc3) + %3 = cir.load %1 : !cir.ptr, !ty_aie3A3Atile_t3C22C_33E loc(#loc71) + cir.return %3 : !ty_aie3A3Atile_t3C22C_33E loc(#loc71) + } loc(#loc70) + cir.global "private" internal dsolocal @_ZN3aie6tile_tILi2ELi3EE9tile_codeIPZ4mainE3$_1EE = #cir.ptr : !cir.ptr {alignment = 8 : i64} loc(#loc75) + cir.func lambda internal private @_ZZ4mainENK3$_1clEv(%arg0: !cir.ptr loc("example.cpp":31:26)) extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc45) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc46) + %1 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc45) + cir.return loc(#loc44) + } loc(#loc84) + cir.func internal private @_ZN3aie6tile_tILi2ELi3EE7programIZ4mainE3$_1EEvOT_(%arg0: !cir.ptr loc("./aie++.hpp":45:8), %arg1: !cir.ptr loc(fused[#loc37, #loc38])) [#cir.annotation] extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc36) + %1 = cir.alloca !cir.ptr, !cir.ptr>, ["code", init] {alignment = 8 : i64} loc(#loc82) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc39) + cir.store %arg1, %1 : !cir.ptr, !cir.ptr> loc(#loc39) + %2 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc36) + %3 = cir.load %1 : !cir.ptr>, !cir.ptr loc(#loc40) + %4 = cir.get_global @_ZN3aie6tile_tILi2ELi3EE9tile_codeIPZ4mainE3$_1EE : !cir.ptr> loc(#loc75) + cir.store %3, %4 : !cir.ptr, !cir.ptr> loc(#loc83) + %5 = cir.load %1 : !cir.ptr>, !cir.ptr loc(#loc42) + cir.call @_ZZ4mainENK3$_1clEv(%5) : (!cir.ptr) -> () loc(#loc42) + cir.return loc(#loc35) + } loc(#loc81) + cir.func linkonce_odr @_ZN3aie6deviceILNS_3$_0E42EE3runEv(%arg0: !cir.ptr loc("./aie++.hpp":74:18)) extra(#fn_attr) { + %0 = cir.alloca !cir.ptr, !cir.ptr>, ["this", init] {alignment = 8 : i64} loc(#loc49) + cir.store %arg0, %0 : !cir.ptr, !cir.ptr> loc(#loc50) + %1 = cir.load %0 : !cir.ptr>, !cir.ptr loc(#loc49) + cir.return loc(#loc48) + } loc(#loc85) + cir.func @main() -> !s32i extra(#fn_attr) { + %0 = cir.alloca !s32i, !cir.ptr, ["__retval"] {alignment = 4 : i64} loc(#loc52) + %1 = cir.alloca !ty_aie3A3Adevice3Caie3A3Anpu13E, !cir.ptr, ["d"] {alignment = 1 : i64} loc(#loc87) + %2 = cir.alloca !ty_aie3A3Atile_t3C12C_43E, !cir.ptr, ["t", init] {alignment = 1 : i64} loc(#loc88) + %3 = cir.alloca !ty_std3A3Aarray3Cint2C_8192UL3E, !cir.ptr, ["b", init] {alignment = 4 : i64} loc(#loc89) + %4 = cir.call @_ZN3aie6deviceILNS_3$_0E42EE4tileILi1ELi4EEENS_6tile_tIXT_EXT0_EEEv(%1) : (!cir.ptr) -> !ty_aie3A3Atile_t3C12C_43E loc(#loc59) + cir.store %4, %2 : !ty_aie3A3Atile_t3C12C_43E, !cir.ptr loc(#loc59) + %5 = cir.call @_ZN3aie6tile_tILi1ELi4EE6bufferIiLm8192EEESt5arrayIT_XT0_EEv(%2) : (!cir.ptr) -> !ty_std3A3Aarray3Cint2C_8192UL3E loc(#loc60) + cir.store %5, %3 : !ty_std3A3Aarray3Cint2C_8192UL3E, !cir.ptr loc(#loc60) + cir.scope { + %7 = cir.alloca !ty_anon2E0_, !cir.ptr, ["ref.tmp0"] {alignment = 8 : i64} loc(#loc91) + %8 = cir.get_member %7[0] {name = "b"} : !cir.ptr -> !cir.ptr> loc(#loc32) + cir.store %3, %8 : !cir.ptr, !cir.ptr> loc(#loc91) + cir.call @_ZN3aie6tile_tILi1ELi4EE7programIZ4mainE3$_0EEvOT_(%2, %7) : (!cir.ptr, !cir.ptr) -> () loc(#loc63) + } loc(#loc90) + cir.scope { + %7 = cir.alloca !ty_aie3A3Atile_t3C22C_33E, !cir.ptr, ["ref.tmp1"] {alignment = 1 : i64} loc(#loc93) + %8 = cir.alloca !ty_anon2E1_, !cir.ptr, ["ref.tmp2"] {alignment = 1 : i64} loc(#loc94) + %9 = cir.call @_ZN3aie6deviceILNS_3$_0E42EE4tileILi2ELi3EEENS_6tile_tIXT_EXT0_EEEv(%1) : (!cir.ptr) -> !ty_aie3A3Atile_t3C22C_33E loc(#loc67) + cir.store %9, %7 : !ty_aie3A3Atile_t3C22C_33E, !cir.ptr loc(#loc67) + cir.call @_ZN3aie6tile_tILi2ELi3EE7programIZ4mainE3$_1EEvOT_(%7, %8) : (!cir.ptr, !cir.ptr) -> () loc(#loc68) + } loc(#loc92) + cir.call @_ZN3aie6deviceILNS_3$_0E42EE3runEv(%1) : (!cir.ptr) -> () loc(#loc69) + %6 = cir.load %0 : !cir.ptr, !s32i loc(#loc52) + cir.return %6 : !s32i loc(#loc52) + } loc(#loc86) +} loc(#loc) +#loc = loc("/home/rkeryell/Xilinx/Projects/AIE/ACDC/air++/example.cpp":0:0) +#loc1 = loc("./aie++.hpp":69:3) +#loc2 = loc("./aie++.hpp":72:3) +#loc4 = loc("./aie++.hpp":70:90) +#loc5 = loc("./aie++.hpp":71:5) +#loc6 = loc("./aie++.hpp":71:13) +#loc7 = loc("./aie++.hpp":37:43) +#loc8 = loc("./aie++.hpp":39:3) +#loc10 = loc("./aie++.hpp":37:115) +#loc11 = loc("./aie++.hpp":38:12) +#loc12 = loc("./aie++.hpp":38:13) +#loc13 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":115:55) +#loc14 = loc("./aie++.hpp":38:5) +#loc15 = loc("./aie++.hpp":35:3) +#loc16 = loc("./aie++.hpp":35:47) +#loc17 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":205:7) +#loc18 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":210:7) +#loc22 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":207:7) +#loc23 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":209:18) +#loc24 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":209:9) +#loc25 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":209:2) +#loc26 = loc("/usr/lib/gcc/x86_64-linux-gnu/14/../../../../include/c++/14/array":209:21) +#loc27 = loc("example.cpp":30:15) +#loc28 = loc("example.cpp":30:30) +#loc30 = loc("example.cpp":30:17) +#loc31 = loc("example.cpp":30:26) +#loc32 = loc("example.cpp":30:19) +#loc33 = loc("example.cpp":30:21) +#loc34 = loc("./aie++.hpp":45:3) +#loc35 = loc("./aie++.hpp":50:3) +#loc39 = loc("./aie++.hpp":45:75) +#loc40 = loc("./aie++.hpp":47:35) +#loc41 = loc("./aie++.hpp":47:5) +#loc42 = loc("./aie++.hpp":49:5) +#loc43 = loc("example.cpp":31:27) +#loc44 = loc("example.cpp":31:29) +#loc46 = loc("example.cpp":31:28) +#loc47 = loc("./aie++.hpp":74:3) +#loc48 = loc("./aie++.hpp":74:25) +#loc50 = loc("./aie++.hpp":74:24) +#loc51 = loc("example.cpp":26:1) +#loc52 = loc("example.cpp":33:1) +#loc53 = loc("example.cpp":27:3) +#loc54 = loc("example.cpp":27:26) +#loc55 = loc("example.cpp":28:3) +#loc56 = loc("example.cpp":28:25) +#loc57 = loc("example.cpp":29:3) +#loc58 = loc("example.cpp":29:32) +#loc59 = loc("example.cpp":28:14) +#loc60 = loc("example.cpp":29:14) +#loc61 = loc("example.cpp":30:3) +#loc62 = loc("example.cpp":30:31) +#loc63 = loc("example.cpp":30:5) +#loc64 = loc("example.cpp":31:3) +#loc65 = loc("example.cpp":31:30) +#loc66 = loc("example.cpp":31:16) +#loc67 = loc("example.cpp":31:5) +#loc68 = loc("example.cpp":31:18) +#loc69 = loc("example.cpp":32:5) +#loc70 = loc(fused[#loc1, #loc2]) +#loc71 = loc(fused[#loc5, #loc6]) +#loc72 = loc(fused[#loc7, #loc8]) +#loc73 = loc(fused[#loc11, #loc12]) +#loc74 = loc(fused[#loc14, #loc12]) +#loc75 = loc(fused[#loc15, #loc16]) +#loc76 = loc(fused[#loc17, #loc18]) +#loc78 = loc(fused[#loc25, #loc26]) +#loc79 = loc(fused[#loc27, #loc28]) +#loc80 = loc(fused[#loc32, #loc31]) +#loc81 = loc(fused[#loc34, #loc35]) +#loc83 = loc(fused[#loc41, #loc40]) +#loc84 = loc(fused[#loc43, #loc44]) +#loc85 = loc(fused[#loc47, #loc48]) +#loc86 = loc(fused[#loc51, #loc52]) +#loc87 = loc(fused[#loc53, #loc54]) +#loc88 = loc(fused[#loc55, #loc56]) +#loc89 = loc(fused[#loc57, #loc58]) +#loc90 = loc(fused[#loc61, #loc62]) +#loc91 = loc(fused[#loc29, #loc28]) +#loc92 = loc(fused[#loc64, #loc65]) +#loc93 = loc(fused[#loc64, #loc66]) +#loc94 = loc(fused[#loc45, #loc44]) diff --git a/tools/aie-lsp-server/CMakeLists.txt b/tools/aie-lsp-server/CMakeLists.txt index c1cb43a719..808441fa89 100644 --- a/tools/aie-lsp-server/CMakeLists.txt +++ b/tools/aie-lsp-server/CMakeLists.txt @@ -5,18 +5,25 @@ # (c) Copyright 2024 AMD Inc. get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS) - +message(dialect_libs = ${dialect_libs}) set(AIE_LSP_LIBS ${dialect_libs} + $, + MLIRCIR + ,> MLIRLspServerLib ) add_llvm_tool(aie-lsp-server aie-lsp-server.cpp ) + install(TARGETS aie-lsp-server EXPORT AIETargets RUNTIME DESTINATION ${LLVM_TOOLS_INSTALL_DIR} COMPONENT aie-lsp-server) target_link_libraries(aie-lsp-server PRIVATE ${AIE_LSP_LIBS}) +if (CLANGIR_MLIR_FRONTEND) + target_compile_definitions(aie-lsp-server PRIVATE CLANGIR_MLIR_FRONTEND) +endif() diff --git a/tools/aie-lsp-server/aie-lsp-server.cpp b/tools/aie-lsp-server/aie-lsp-server.cpp index acff34ec69..bffc2c04fe 100644 --- a/tools/aie-lsp-server/aie-lsp-server.cpp +++ b/tools/aie-lsp-server/aie-lsp-server.cpp @@ -14,10 +14,16 @@ #include "mlir/Tools/mlir-lsp-server/MlirLspServerMain.h" #include "aie/InitialAllDialect.h" +#ifdef CLANGIR_MLIR_FRONTEND +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#endif int main(int argc, char **argv) { mlir::DialectRegistry registry; mlir::registerAllDialects(registry); xilinx::registerAllDialects(registry); +#ifdef CLANGIR_MLIR_FRONTEND + registry.insert(); +#endif return mlir::failed(mlir::MlirLspServerMain(argc, argv, registry)); } diff --git a/tools/aie-opt/CMakeLists.txt b/tools/aie-opt/CMakeLists.txt index 3a02f4a030..e548541759 100644 --- a/tools/aie-opt/CMakeLists.txt +++ b/tools/aie-opt/CMakeLists.txt @@ -23,7 +23,18 @@ set(LIBS ${dialect_libs} ${conversion_libs} ${extension_libs} - MLIROptLib + $, + clangCIR + clangCIRLoweringThroughMLIR + clangCIRLoweringDirectToLLVM + CIRToAIEPasses + MLIRAnalysis + MLIRCIR + MLIRCIRTransforms + MLIRDialect + MLIRTransforms + MLIRTransformUtils + ,> ADF AIE AIETransforms @@ -36,7 +47,12 @@ set(LIBS MLIRAIEVecTransformOps MLIRAIEVecTransforms MLIRAIEVecToLLVM + MLIROptLib MLIRTransformDialect MLIRXLLVMDialect ) + target_link_libraries(aie-opt PUBLIC ${LIBS}) +if (CLANGIR_MLIR_FRONTEND) + target_compile_definitions(aie-opt PRIVATE CLANGIR_MLIR_FRONTEND) +endif() diff --git a/tools/aie-opt/aie-opt.cpp b/tools/aie-opt/aie-opt.cpp index 80fba1ab28..4b275efae4 100644 --- a/tools/aie-opt/aie-opt.cpp +++ b/tools/aie-opt/aie-opt.cpp @@ -13,6 +13,9 @@ #include "mlir/InitAllPasses.h" #include "mlir/Tools/mlir-opt/MlirOptMain.h" +#ifdef CLANGIR_MLIR_FRONTEND +#include "aie/CIR/CIRToAIEPasses.h" +#endif #include "aie/Conversion/Passes.h" #include "aie/Dialect/AIE/Transforms/AIEPasses.h" #include "aie/Dialect/AIEVec/Analysis/Passes.h" @@ -22,25 +25,32 @@ #include "aie/Dialect/AIEX/Transforms/AIEXPasses.h" #include "aie/InitialAllDialect.h" #include "aie/version.h" +#ifdef CLANGIR_MLIR_FRONTEND +#include "clang/CIR/Dialect/IR/CIRDialect.h" +#include "clang/CIR/Dialect/Passes.h" +#include "clang/CIR/LowerToLLVM.h" +#include "clang/CIR/LowerToMLIR.h" +#include "clang/CIR/Passes.h" +#endif -using namespace llvm; -using namespace mlir; - -void version_printer(raw_ostream &os) { +static void versionPrinter(llvm::raw_ostream &os) { os << "aie-opt " << AIE_GIT_COMMIT << "\n"; } int main(int argc, char **argv) { - registerAllPasses(); + mlir::registerAllPasses(); xilinx::registerConversionPasses(); xilinx::AIE::registerAIEPasses(); xilinx::AIEX::registerAIEXPasses(); xilinx::aievec::registerAIEVecAnalysisPasses(); xilinx::aievec::registerAIEVecPasses(); xilinx::aievec::registerAIEVecPipelines(); +#ifdef CLANGIR_MLIR_FRONTEND + xilinx::AIE::CIR::registerCIRToAIEPasses(); +#endif - DialectRegistry registry; + mlir::DialectRegistry registry; registerAllDialects(registry); xilinx::registerAllDialects(registry); @@ -48,7 +58,88 @@ int main(int argc, char **argv) { xilinx::aievec::registerTransformDialectExtension(registry); - cl::AddExtraVersionPrinter(version_printer); + llvm::cl::AddExtraVersionPrinter(versionPrinter); + +#ifdef CLANGIR_MLIR_FRONTEND + // ClangIR dialect + registry.insert(); + + // ClangIR-specific passes + mlir::registerPass([]() -> std::unique_ptr { + return cir::createConvertMLIRToLLVMPass(); + }); + mlir::registerPass([]() -> std::unique_ptr<::mlir::Pass> { + return mlir::createCIRCanonicalizePass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createCIRSimplifyPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createLifetimeCheckPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createDropASTPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createLoweringPreparePass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createSCFPreparePass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createHoistAllocasPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createGotoSolverPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createIdiomRecognizerPass(); + }); + mlir::registerPass( + []() -> std::unique_ptr { return mlir::createLibOptPass(); }); + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createCallConvLoweringPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return cir::createConvertCIRToMLIRPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return cir::createConvertCIRToMLIRPass(); + }); + mlir::registerPass([]() -> std::unique_ptr { + return cir::direct::createConvertCIRToLLVMPass(); + }); + + mlir::PassPipelineRegistration pipeline( + "cir-to-llvm", "Full pass pipeline from CIR to LLVM MLIR dialect", + [](mlir::OpPassManager &pm) { + cir::direct::populateCIRToLLVMPasses(pm, /* useCCLowering */ true); + }); + + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createFlattenCFGPass(); + }); + + mlir::registerPass([]() -> std::unique_ptr { + return mlir::createReconcileUnrealizedCastsPass(); + }); + + mlir::registerTransformsPasses(); + + cir::runAtStartOfConvertCIRToMLIRPass([](mlir::ConversionTarget ct) { + ct.addLegalDialect(); + ct.addLegalOp(); + }); + + cir::direct::runAtStartOfConvertCIRToLLVMPass([](mlir::ConversionTarget ct) { + ct.addLegalDialect(); + ct.addLegalOp(); + }); +#endif return failed( MlirOptMain(argc, argv, "MLIR modular optimizer driver\n", registry)); diff --git a/tools/aie-translate/CMakeLists.txt b/tools/aie-translate/CMakeLists.txt index 39c768d66d..9948d70513 100644 --- a/tools/aie-translate/CMakeLists.txt +++ b/tools/aie-translate/CMakeLists.txt @@ -17,6 +17,11 @@ get_property(translation_libs GLOBAL PROPERTY MLIR_TRANSLATION_LIBS) set(LIBS ${dialect_libs} ${translation_libs} + $, + clangCIR + clangCIRLoweringDirectToLLVM + clangCIRLoweringThroughMLIR + ,> AIE AIETransforms AIEX @@ -25,13 +30,17 @@ set(LIBS AIETargets MLIRAIEVecDialect MLIRAIEVecAIE1Dialect - MLIRXLLVMToLLVMIRTranslation MLIRIR MLIRParser MLIRPass MLIRTargetAIEVecCpp MLIRTargetLLVMIRExport MLIRTargetLLVMIRImport + MLIRXLLVMToLLVMIRTranslation ) +if (CLANGIR_MLIR_FRONTEND) + target_compile_definitions(aie-translate PRIVATE CLANGIR_MLIR_FRONTEND) +endif() + target_link_libraries(aie-translate PRIVATE ${LIBS}) diff --git a/tools/aie-translate/aie-translate.cpp b/tools/aie-translate/aie-translate.cpp index 9e71ba9a53..dbaa30c839 100644 --- a/tools/aie-translate/aie-translate.cpp +++ b/tools/aie-translate/aie-translate.cpp @@ -19,48 +19,112 @@ #include "mlir/Target/LLVMIR/Export.h" #include "mlir/Tools/mlir-translate/MlirTranslateMain.h" #include "mlir/Tools/mlir-translate/Translation.h" +#ifdef CLANGIR_MLIR_FRONTEND +#include "aie/Dialect/AIEVec/AIE1/IR/AIEVecAIE1Dialect.h" +#include "aie/Dialect/AIEX/IR/AIEXDialect.h" +#include "clang/CIR/LowerToLLVM.h" +#include "clang/CIR/LowerToMLIR.h" -using namespace mlir; +namespace cir::direct { +extern void registerCIRDialectTranslation(mlir::DialectRegistry ®istry); +} // namespace cir::direct +#endif -namespace aie { -// We redefine the MLIR -> LLVM IR translation to include our AIE intrinsics +namespace { +#ifdef CLANGIR_MLIR_FRONTEND +// TODO refactor clang/tools/cir-translate/cir-translate.cpp to avoid the +// following copy-paste +void registerToLLVMTranslation() { + mlir::TranslateFromMLIRRegistration registration( + "cir-to-llvmir", "Translate CIR to LLVMIR", + [](mlir::Operation *op, mlir::raw_ostream &output) { + llvm::LLVMContext llvmContext; + auto llvmModule = cir::direct::lowerDirectlyFromCIRToLLVMIR( + llvm::dyn_cast(op), llvmContext); + if (!llvmModule) + return mlir::failure(); + llvmModule->print(output, nullptr); + return mlir::success(); + }, + [](mlir::DialectRegistry ®istry) { + registry.insert(); + mlir::registerAllToLLVMIRTranslations(registry); + cir::direct::registerCIRDialectTranslation(registry); + }); +} +#endif + +// We redefine the MLIR -> LLVM IR translation to include CIR & AIE intrinsics // translations. +// The problem by picking the same "mlir-to-llvmir" name is that it is no longer +// possible to combine this with other standard MLIR translations which +// already define "mlir-to-llvmir" void registerToLLVMIRTranslation() { - TranslateFromMLIRRegistration registration( + mlir::TranslateFromMLIRRegistration registration( "mlir-to-llvmir", "Translate MLIR to LLVMIR", - [](Operation *op, raw_ostream &output) { + [](mlir::Operation *op, mlir::raw_ostream &output) { llvm::LLVMContext llvmContext; auto llvmModule = translateModuleToLLVMIR(op, llvmContext); if (!llvmModule) - return failure(); + return mlir::failure(); llvmModule->print(output, nullptr); - return success(); + return mlir::success(); }, - [](DialectRegistry ®istry) { - registry.insert(); + [](mlir::DialectRegistry ®istry) { + registry.insert(); +#ifdef CLANGIR_MLIR_FRONTEND + mlir::registerAllToLLVMIRTranslations(registry); + cir::direct::registerCIRDialectTranslation(registry); +#endif xilinx::registerAllAIEToLLVMIRTranslations(registry); registerAllToLLVMIRTranslations(registry); }); } -} // namespace aie -void version_printer(raw_ostream &os) { +// Mainly copy-paste of registerAllTranslations() to handle "mlir-to-llvmir" +// option duplicated by aie-translate +void registerAllTranslationsWithoutToLLVMIR() { + static bool initOnce = [] { + mlir::registerFromLLVMIRTranslation(); + mlir::registerFromSPIRVTranslation(); + mlir::registerToCppTranslation(); + // "mlir-to-llvmir" is now handled by aie::registerToLLVMIRTranslation(); + // registerToLLVMIRTranslation(); + mlir::registerToSPIRVTranslation(); + return true; + }(); + static_cast(initOnce); +} + +void versionPrinter(llvm::raw_ostream &os) { os << "aie-translate " << AIE_GIT_COMMIT << "\n"; } +} // namespace int main(int argc, char **argv) { - // NOTE: these are the contents of registerAllTranslations(); - registerFromLLVMIRTranslation(); - registerFromSPIRVTranslation(); - registerToCppTranslation(); - aie::registerToLLVMIRTranslation(); - registerToSPIRVTranslation(); + registerAllTranslationsWithoutToLLVMIR(); +#ifdef CLANGIR_MLIR_FRONTEND + registerToLLVMTranslation(); + cir::runAtStartOfConvertCIRToMLIRPass([](mlir::ConversionTarget ct) { + ct.addLegalDialect(); + ct.addLegalOp(); + }); + cir::direct::runAtStartOfConvertCIRToLLVMPass([](mlir::ConversionTarget ct) { + ct.addLegalDialect(); + ct.addLegalOp(); + }); +#endif + registerToLLVMIRTranslation(); xilinx::AIE::registerAIETranslations(); xilinx::aievec::registerAIEVecToCppTranslation(); - llvm::cl::AddExtraVersionPrinter(version_printer); + llvm::cl::AddExtraVersionPrinter(versionPrinter); - return failed(mlirTranslateMain(argc, argv, "AIE Translation Tool")); + return failed(mlir::mlirTranslateMain(argc, argv, "AIE Translation Tool")); }