diff --git a/include/aie/Dialect/AIE/IR/AIEAttrs.td b/include/aie/Dialect/AIE/IR/AIEAttrs.td index bd2640cc659..b41f272a46f 100644 --- a/include/aie/Dialect/AIE/IR/AIEAttrs.td +++ b/include/aie/Dialect/AIE/IR/AIEAttrs.td @@ -97,6 +97,17 @@ def AIEArch: I32EnumAttr<"AIEArch", "AIE Architecture", let cppNamespace = "xilinx::AIE"; } +def AIETileType: I32EnumAttr<"AIETileType", "Type of AIE Tile", + [ + I32EnumAttrCase<"CoreTile", 0>, + I32EnumAttrCase<"MemTile", 1>, + I32EnumAttrCase<"ShimNOCTile", 2>, + I32EnumAttrCase<"ShimPLTile", 3>, + ]> { + + let cppNamespace = "xilinx::AIE"; +} + def AIEDevice: I32EnumAttr<"AIEDevice", "AIE Device", [ I32EnumAttrCase<"xcvc1902", 1>, diff --git a/include/aie/Dialect/AIE/IR/AIEInterfaces.td b/include/aie/Dialect/AIE/IR/AIEInterfaces.td index a346108e49c..e2ebbd00c18 100644 --- a/include/aie/Dialect/AIE/IR/AIEInterfaces.td +++ b/include/aie/Dialect/AIE/IR/AIEInterfaces.td @@ -28,14 +28,10 @@ def HasValidDMAChannels : NativeOpTrait<"HasValidDMAChannels"> { string cppNamespace = "::xilinx::AIE"; } -def PredIsCoreTile : CPred<"xilinx::AIE::getTargetModel(&$_op).isCoreTile(llvm::cast($_op).getTileID().col," - "llvm::cast($_op).getTileID().row)">; -def PredIsMemTile : CPred<"xilinx::AIE::getTargetModel(&$_op).isMemTile(llvm::cast($_op).getTileID().col," - "llvm::cast($_op).getTileID().row)">; -def PredIsShimNOCTile : CPred<"xilinx::AIE::getTargetModel(&$_op).isShimNOCTile(llvm::cast($_op).getTileID().col," - "llvm::cast($_op).getTileID().row)">; -def PredIsShimPLTile : CPred<"xilinx::AIE::getTargetModel(&$_op).isShimPLTile(llvm::cast($_op).getTileID().col," - "llvm::cast($_op).getTileID().row)">; +def PredIsCoreTile : CPred<"llvm::cast($_op).getTileLike().isCoreTile()">; +def PredIsMemTile : CPred<"llvm::cast($_op).getTileLike().isMemTile()">; +def PredIsShimNOCTile : CPred<"llvm::cast($_op).getTileLike().isShimNOCTile()">; +def PredIsShimPLTile : CPred<"llvm::cast($_op).getTileLike().isShimPLTile()">; def IsCoreTile : PredOpTrait<"op exists in a core tile", PredIsCoreTile>; def IsMemTile : PredOpTrait<"op exists in a MemTile", PredIsMemTile>; @@ -48,6 +44,48 @@ def IsFlowEndPoint : NativeOpTrait<"IsFlowEndPoint"> { string cppNamespace = "::xilinx::AIE"; } +def TileLike : OpInterface<"TileLike"> { + let description = [{ + Interface for operations that represent an AIE tile (logical or physical). + }]; + let cppNamespace = "::xilinx::AIE"; + let methods = [ + InterfaceMethod<[{Return the true hardware tile type.}], + "xilinx::AIE::AIETileType", "getTileType", (ins) + >, + InterfaceMethod<[{Return the optional column index.}], + "std::optional", "tryGetCol", (ins) + >, + InterfaceMethod<[{Return the optional row index.}], + "std::optional", "tryGetRow", (ins) + >, + InterfaceMethod<[{}], "bool", "isCoreTile", (ins), [{}], [{ + return $_op.getTileType() == xilinx::AIE::AIETileType::CoreTile; + }]>, + InterfaceMethod<[{}], "bool", "isMemTile", (ins), [{}], [{ + return $_op.getTileType() == xilinx::AIE::AIETileType::MemTile; + }]>, + InterfaceMethod<[{}], "bool", "isShimNOCTile", (ins), [{}], [{ + return $_op.getTileType() == xilinx::AIE::AIETileType::ShimNOCTile; + }]>, + InterfaceMethod<[{}], "bool", "isShimPLTile", (ins), [{}], [{ + return $_op.getTileType() == xilinx::AIE::AIETileType::ShimPLTile; + }]>, + InterfaceMethod<[{}], "bool", "isShimNOCorPLTile", (ins), [{}], [{ + return $_op.isShimNOCTile() || $_op.isShimPLTile(); + }]>, + InterfaceMethod<[{}], "bool", "isShimTile", (ins), [{}], [{ + return $_op.isShimNOCTile() || $_op.isShimPLTile(); + }]>, + InterfaceMethod<[{Return the number of source connections for a wire bundle.}], + "size_t", "getNumSourceConnections", (ins "xilinx::AIE::WireBundle":$bundle) + >, + InterfaceMethod<[{Return the number of dest connections for a wire bundle.}], + "size_t", "getNumDestConnections", (ins "xilinx::AIE::WireBundle":$bundle) + >, + ]; +} + def TileElement : OpInterface<"TileElement", [ DeclareOpInterfaceMethods, ]> { @@ -56,49 +94,48 @@ def TileElement : OpInterface<"TileElement", [ }]; let cppNamespace = "::xilinx::AIE"; let methods = [ - InterfaceMethod<[{ - Return the tile operand value. - }], - "mlir::Value", "getTile", (ins ) + InterfaceMethod<[{Return the tile operand value.}], + "mlir::Value", "getTile", (ins) >, - InterfaceMethod<[{ - Return the location of the Tile where the element is located. - }], - "xilinx::AIE::TileID", "getTileID", (ins ), - /*methodBody=*/[{}], - /*defaultImpl=*/[{ - ConcreteOp op = llvm::cast(this->getOperation()); - return op.getTileOp().getTileID(); + InterfaceMethod<[{Return the TileLike interface for this element's tile.}], + "TileLike", "getTileLike", (ins), [{}], [{ + return llvm::dyn_cast($_op.getTile().getDefiningOp()); }] >, - InterfaceMethod<[{ - Return column. - }], - "int", "colIndex", (ins), - /*methodBody=*/[{}], - /*defaultImpl=*/[{ - return $_op.getTileID().col; + InterfaceMethod<[{Return the column index of this element's tile. Asserts if unplaced.}], + "int", "colIndex", (ins), [{}], [{ + std::optional col = $_op.getTileLike().tryGetCol(); + assert(col.has_value() && "TileElement called colIndex() on an unplaced tile."); + return *col; }] >, - InterfaceMethod<[{ - Return row. - }], - "int", "rowIndex", (ins), - /*methodBody=*/[{}], - /*defaultImpl=*/[{ - return $_op.getTileID().row; + InterfaceMethod<[{Return the row index of this element's tile. Asserts if unplaced.}], + "int", "rowIndex", (ins), [{}], [{ + std::optional row = $_op.getTileLike().tryGetRow(); + assert(row.has_value() && "TileElement called rowIndex() on an unplaced tile."); + return *row; }] >, + InterfaceMethod<[{Return the TileID of this element's tile.}], + "TileID", "getTileID", (ins), [{}], [{ + return TileID{$_op.colIndex(), $_op.rowIndex()}; + }] + > ]; + + let extraClassDeclaration = [{ + TileOp getTileOp(); + }]; + let extraTraitClassDeclaration = [{ - void getAsmResultNames( - llvm::function_ref setNameFn) { - ConcreteOp op = llvm::cast(this->getOperation()); - std::string nameWithoutDialect = - op.getOperationName().str().substr(op.getOperationName().find('.') + 1); - setNameFn(op.getResult(), nameWithoutDialect + "_" + - std::to_string(getTileID().col) + "_" + - std::to_string(getTileID().row)); + void getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { + std::optional col = $_op.getTileLike().tryGetCol(); + std::optional row = $_op.getTileLike().tryGetRow(); + if (col && row) { + llvm::StringRef opName = $_op.getOperation()->getName().stripDialect(); + std::string name = (opName + "_" + llvm::Twine(*col) + "_" + llvm::Twine(*row)).str(); + setNameFn($_op.getResult(), name); + } } }]; } @@ -109,7 +146,6 @@ def Interconnect : OpInterface<"Interconnect", [TileElement]> { enabling them to host flows for routing. }]; let cppNamespace = "::xilinx::AIE"; - let methods = [ InterfaceMethod<[{}], "mlir::Region &", "getConnections", (ins ) diff --git a/include/aie/Dialect/AIE/IR/AIEOps.td b/include/aie/Dialect/AIE/IR/AIEOps.td index afa7769f8f6..912075ec512 100644 --- a/include/aie/Dialect/AIE/IR/AIEOps.td +++ b/include/aie/Dialect/AIE/IR/AIEOps.td @@ -79,8 +79,85 @@ def AIE_DeviceOp: AIE_Op<"device", [ }]; } +def AIE_LogicalTileOp: AIE_Op<"logical_tile", [ + Pure, + SkipAccessibilityCheckTrait, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods, + DeclareOpInterfaceMethods + ]>, Results<(outs Index:$result)> { + let arguments = ( + ins AIETileType:$tile_type, + OptionalAttr]>>:$col, + OptionalAttr]>>:$row, + OptionalAttr:$allocation_scheme + ); + + let summary = "Declare a logical AIE tile to be placed"; + let description = [{ + This operation creates a logical AIE tile that represents a tile to be + mapped to a `aie.tile` during placement. Unlike `aie.tile`, + this operation does not require absolute coordinates and instead uses + a tile type to specify the kind of tile needed. + + The tile types are: + - `Core`: Maps to core tiles (tiles with a core, TileDMA, memory, + and stream connections) + - `ShimNOC`: Maps to shim NOC tiles + - `ShimPL`: Maps to shim PL tiles + - `Mem`: Maps to memory tiles (AIE2+ tiles with TileDMA and memory, + but no core) + + Optional `col` and `row` can be provided as placement hints or constraints. + Use `?` for unspecified coordinates. + + Examples: + ``` + // Logical core tile without placement constraints + %core_tile = aie.logical_tile(?, ?) + + // Logical shim tile with column hint but unspecified row + %shim_tile = aie.logical_tile(0, ?) + + // Logical tile with full placement constraint (effectively fixed) + %fixed = aie.logical_tile(2, 3) + ``` + }]; + + let hasCustomAssemblyFormat = 1; + + let hasVerifier = 1; + + let extraClassDeclaration = [{ + TileID getCanonicalTileID(); + }]; + + let extraClassDefinition = [{ + void $cppClass::getAsmResultNames( + function_ref setNameFn) { + std::string tileName = "logical_"; + switch (getTileType()) { + case AIETileType::CoreTile: + tileName += "core"; + break; + case AIETileType::ShimNOCTile: + tileName += "shim_noc"; + break; + case AIETileType::ShimPLTile: + tileName += "shim_pl"; + break; + case AIETileType::MemTile: + tileName += "mem"; + break; + } + setNameFn(getResult(), tileName); + } + }]; +} + def AIE_TileOp: AIE_Op<"tile", [ Pure, + DeclareOpInterfaceMethods, IsFlowEndPoint, DeclareOpInterfaceMethods, DeclareOpInterfaceMethods @@ -106,16 +183,9 @@ def AIE_TileOp: AIE_Op<"tile", [ }]; let extraClassDeclaration = [{ - size_t getNumSourceConnections(WireBundle bundle); - size_t getNumDestConnections(WireBundle bundle); int colIndex() { return getCol(); } int rowIndex() { return getRow(); } TileID getTileID() { return {getCol(), getRow()}; } - bool isShimTile() { return getRow() == 0; } - bool isMemTile(); - bool isShimNOCTile(); - bool isShimPLTile(); - bool isShimNOCorPLTile(); bool isInternalMemWest() { return ((rowIndex() % 2) == 0); }; MemOp getMemOp() { @@ -146,18 +216,18 @@ def AIE_TileOp: AIE_Op<"tile", [ let extraClassDefinition = [{ void $cppClass::getAsmResultNames( function_ref setNameFn) { - std::string tileName = - getOperationName().str().substr(getOperationName().find('.') + 1); - // Specialize the SSA value name according to the tile kind - if (isMemTile()) - tileName = "mem_" + tileName; - else if (isShimNOCTile()) - tileName = "shim_noc_" + tileName; - else if (isShimPLTile()) - tileName = "shim_pl_" + tileName; - else if (isShimTile()) - tileName = "shim_" + tileName; - setNameFn(getResult(), tileName + "_" + + std::string tileName = + getOperationName().str().substr(getOperationName().find('.') + 1); + // Specialize the SSA value name according to the tile kind + if (isMemTile()) + tileName = "mem_" + tileName; + else if (isShimNOCTile()) + tileName = "shim_noc_" + tileName; + else if (isShimPLTile()) + tileName = "shim_pl_" + tileName; + else if (isShimTile()) + tileName = "shim_" + tileName; + setNameFn(getResult(), tileName + "_" + std::to_string(getCol()) + "_" + std::to_string(getRow())); } @@ -210,9 +280,8 @@ def AIE_SwitchboxOp: AIE_Op<"switchbox", [ TileOp getTileOp(); size_t getNumSourceConnections(WireBundle bundle); size_t getNumDestConnections(WireBundle bundle); - void getAsmResultNames( - llvm::function_ref setNameFn) { - ::xilinx::AIE::TileElement::Trait::getAsmResultNames(setNameFn); + void getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { + TileElement::Trait::getAsmResultNames(setNameFn); } }]; } @@ -272,9 +341,8 @@ def AIE_ShimMuxOp: AIE_Op<"shim_mux", [ TileOp getTileOp(); size_t getNumSourceConnections(WireBundle bundle); size_t getNumDestConnections(WireBundle bundle); - void getAsmResultNames( - llvm::function_ref setNameFn) { - ::xilinx::AIE::TileElement::Trait::getAsmResultNames(setNameFn); + void getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { + TileElement::Trait::getAsmResultNames(setNameFn); } }]; } @@ -313,12 +381,10 @@ def AIE_ShimDMAOp: AIE_Op<"shim_dma", [ let regions = (region AnyRegion:$body); let assemblyFormat = [{ `(` $tile `)` regions attr-dict }]; let hasVerifier = 1; - let extraClassDeclaration = [{ TileOp getTileOp(); - void getAsmResultNames( - llvm::function_ref setNameFn) { - ::xilinx::AIE::TileElement::Trait::getAsmResultNames(setNameFn); + void getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { + TileElement::Trait::getAsmResultNames(setNameFn); } }]; } @@ -378,12 +444,11 @@ def AIE_CoreOp: AIE_Op<"core", [ let hasVerifier = 1; let extraClassDeclaration = [{ + TileOp getTileOp(); bool isMemWest() { return ((rowIndex() % 2) == 0); }; bool isEmpty(); - TileOp getTileOp(); - void getAsmResultNames( - llvm::function_ref setNameFn) { - ::xilinx::AIE::TileElement::Trait::getAsmResultNames(setNameFn); + void getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { + TileElement::Trait::getAsmResultNames(setNameFn); } }]; @@ -1085,8 +1150,7 @@ def AIE_DMAOp: AIE_Op<"dma", [ let hasVerifier = 1; let extraClassDefinition = [{ - void $cppClass::getAsmResultNames( - llvm::function_ref setNameFn) { + void $cppClass::getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { if (auto name = getOperation()->getAttrOfType( mlir::SymbolTable::getSymbolAttrName())) setNameFn(getResult(), name.str()); @@ -1125,12 +1189,10 @@ def AIE_MemOp: AIE_Op<"mem", [ let regions = (region AnyRegion:$body); let assemblyFormat = [{ `(` $tile `)` regions attr-dict }]; let hasVerifier = 1; - let extraClassDeclaration = [{ TileOp getTileOp(); - void getAsmResultNames( - llvm::function_ref setNameFn) { - ::xilinx::AIE::TileElement::Trait::getAsmResultNames(setNameFn); + void getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { + TileElement::Trait::getAsmResultNames(setNameFn); } }]; } @@ -1170,12 +1232,10 @@ def AIE_MemTileDMAOp: AIE_Op<"memtile_dma", [ let regions = (region AnyRegion:$body); let assemblyFormat = [{ `(` $tile `)` regions attr-dict }]; let hasVerifier = 1; - let extraClassDeclaration = [{ TileOp getTileOp(); - void getAsmResultNames( - llvm::function_ref setNameFn) { - ::xilinx::AIE::TileElement::Trait::getAsmResultNames(setNameFn); + void getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { + TileElement::Trait::getAsmResultNames(setNameFn); } }]; } @@ -1261,16 +1321,20 @@ def AIE_LockOp: AIE_Op<"lock", [ } TileOp getTileOp(); - void getAsmResultNames( - llvm::function_ref setNameFn) { + + void getAsmResultNames(mlir::OpAsmSetValueNameFn setNameFn) { if (hasName()) setNameFn(getResult(), name().str()); else { - std::string nameWithoutDialect = - getOperationName().str().substr(getOperationName().find('.') + 1); - setNameFn(getResult(), nameWithoutDialect + "_" + - std::to_string(getTileID().col) + "_" + - std::to_string(getTileID().row)); + std::optional col = getTileLike().tryGetCol(); + std::optional row = getTileLike().tryGetRow(); + if (col && row) { + std::string nameWithoutDialect = + getOperation()->getName().stripDialect().str(); + setNameFn(getResult(), nameWithoutDialect + "_" + + std::to_string(*col) + "_" + + std::to_string(*row)); + } } } }]; @@ -1389,19 +1453,22 @@ def AIE_BufferOp: AIE_Op<"buffer", [ llvm::report_fatal_error("couldn't get name"); } + TileOp getTileOp(); // Return the number of bytes that need to be allocated for this buffer. int64_t getAllocationSize(); - TileOp getTileOp(); - void getAsmResultNames( - llvm::function_ref setNameFn) { + void getAsmResultNames(mlir::OpAsmSetValueNameFn setNameFn) { if (hasName()) setNameFn(getResult(), name().str()); else { - std::string nameWithoutDialect = - getOperationName().str().substr(getOperationName().find('.') + 1); - setNameFn(getResult(), nameWithoutDialect + "_" + - std::to_string(getTileID().col) + "_" + - std::to_string(getTileID().row)); + std::optional col = getTileLike().tryGetCol(); + std::optional row = getTileLike().tryGetRow(); + if (col && row) { + std::string nameWithoutDialect = + getOperation()->getName().stripDialect().str(); + setNameFn(getResult(), nameWithoutDialect + "_" + + std::to_string(*col) + "_" + + std::to_string(*row)); + } } } }]; @@ -1453,8 +1520,7 @@ def AIE_ExternalBufferOp: AIE_Op<"external_buffer", [ }]; let extraClassDefinition = [{ - void $cppClass::getAsmResultNames( - llvm::function_ref setNameFn) { + void $cppClass::getAsmResultNames(::mlir::OpAsmSetValueNameFn setNameFn) { if (hasName()) setNameFn(getResult(), name().str()); } diff --git a/include/aie/Dialect/AIE/IR/AIETargetModel.h b/include/aie/Dialect/AIE/IR/AIETargetModel.h index d44a2a4e222..0e4a8bf2c21 100644 --- a/include/aie/Dialect/AIE/IR/AIETargetModel.h +++ b/include/aie/Dialect/AIE/IR/AIETargetModel.h @@ -134,27 +134,40 @@ class AIETargetModel { /// Return the number of rows in the device. virtual int rows() const = 0; - /// Return true if the given tile is a 'Core' tile. These tiles - /// include a Core, TileDMA, tile memory, and stream connections. - virtual bool isCoreTile(int col, int row) const = 0; + /// Return the tile type for the given tile coordinates. + /// - CoreTile: tiles with a Core, TileDMA, tile memory, and stream + /// connections. + /// - MemTile: tiles with TileDMA, tile memory, and stream connections, + /// but no core. + /// - ShimNOCTile: tiles with ShimDMA and connection to the memory-mapped NOC. + /// - ShimPLTile: tiles with connections to the PL, no ShimDMA. + virtual AIETileType getTileType(int col, int row) const = 0; - /// Return true if the given tile is an AIE2 'Memory' tile. These tiles - /// include a TileDMA, tile memory, and stream connections, but no core. - virtual bool isMemTile(int col, int row) const = 0; + /// Return true if the given tile is a Core tile. + bool isCoreTile(int col, int row) const { + return getTileType(col, row) == AIETileType::CoreTile; + } - /// Return true if the given tile is a Shim NOC tile. These tiles include a - /// ShimDMA and a connection to the memory-mapped NOC. They do not contain - /// any memory. - virtual bool isShimNOCTile(int col, int row) const = 0; + /// Return true if the given tile is a Mem tile. + bool isMemTile(int col, int row) const { + return getTileType(col, row) == AIETileType::MemTile; + } - /// Return true if the given tile is a Shim PL interface tile. These - /// tiles do not include a ShimDMA and instead include connections to the PL. - /// They do not contain any memory. - virtual bool isShimPLTile(int col, int row) const = 0; + /// Return true if the given tile is a ShimNOC tile. + bool isShimNOCTile(int col, int row) const { + return getTileType(col, row) == AIETileType::ShimNOCTile; + } - /// Return true if the given tile is either a Shim NOC or a Shim PL interface - /// tile. - virtual bool isShimNOCorPLTile(int col, int row) const = 0; + /// Return true if the given tile is a ShimPL tile. + bool isShimPLTile(int col, int row) const { + return getTileType(col, row) == AIETileType::ShimPLTile; + } + + /// Return true if the given tile is either a ShimNOC or ShimPL tile. + bool isShimNOCorPLTile(int col, int row) const { + AIETileType t = getTileType(col, row); + return t == AIETileType::ShimNOCTile || t == AIETileType::ShimPLTile; + } /// Return true if the given tile ID is valid. virtual bool isValidTile(TileID src) const { @@ -247,8 +260,13 @@ class AIETargetModel { /// Return the size (in bits) of the accumulator/cascade. virtual uint32_t getAccumulatorCascadeSize() const = 0; - /// Return the number of lock objects - virtual uint32_t getNumLocks(int col, int row) const = 0; + /// Return the number of lock objects for a given tile type. + virtual uint32_t getNumLocks(AIETileType tileType) const = 0; + + /// Return the number of lock objects for a tile at the given coordinates. + uint32_t getNumLocks(int col, int row) const { + return getNumLocks(getTileType(col, row)); + } /// Return the maximum value that can be stored in a lock register virtual uint32_t getMaxLockValue() const = 0; @@ -258,9 +276,14 @@ class AIETargetModel { virtual std::optional getLocalLockAddress(uint32_t lockId, TileID tile) const = 0; + /// Return the number of buffer descriptors for a given tile type. + virtual uint32_t getNumBDs(AIETileType tileType) const = 0; + /// Return the number of buffer descriptors supported by the DMA in the given /// tile. - virtual uint32_t getNumBDs(int col, int row) const = 0; + uint32_t getNumBDs(int col, int row) const { + return getNumBDs(getTileType(col, row)); + } /// Return true iff buffer descriptor `bd_id` on tile (`col`, `row`) can be /// submitted on channel `channel`. @@ -366,9 +389,6 @@ class AIE1TargetModel : public AIETargetModel { public: AIE1TargetModel(TargetModelKind k) : AIETargetModel(k) {} - bool isCoreTile(int col, int row) const override { return row > 0; } - bool isMemTile(int col, int row) const override { return false; } - AIEArch getTargetArch() const override; std::optional getMemWest(TileID src) const override; @@ -400,11 +420,16 @@ class AIE1TargetModel : public AIETargetModel { uint32_t getMemEastBaseAddress() const override { return 0x00038000; } uint32_t getLocalMemorySize() const override { return 0x00008000; } uint32_t getAccumulatorCascadeSize() const override { return 384; } - uint32_t getNumLocks(int col, int row) const override { return 16; } + using AIETargetModel::getNumLocks; + uint32_t getNumLocks(AIETileType tileType) const override { + return 16; // AIE1 has no MemTiles, always 16 + } uint32_t getMaxLockValue() const override { return 1; } std::optional getLocalLockAddress(uint32_t lockId, TileID tile) const override; - uint32_t getNumBDs(int col, int row) const override { return 16; } + uint32_t getNumBDs(AIETileType tileType) const override { + return 16; // AIE1 has no MemTiles, always 16 + } bool isBdChannelAccessible(int col, int row, uint32_t bd_id, int channel) const override { return true; @@ -491,8 +516,9 @@ class AIE2TargetModel : public AIETargetModel { uint32_t getLocalMemorySize() const override { return 0x00010000; } uint32_t getAccumulatorCascadeSize() const override { return 512; } - uint32_t getNumLocks(int col, int row) const override { - return isMemTile(col, row) ? 64 : 16; + using AIETargetModel::getNumLocks; + uint32_t getNumLocks(AIETileType tileType) const override { + return tileType == AIETileType::MemTile ? 64 : 16; } uint32_t getMaxLockValue() const override { return 0x3F; } @@ -500,13 +526,13 @@ class AIE2TargetModel : public AIETargetModel { std::optional getLocalLockAddress(uint32_t lockId, TileID tile) const override; - uint32_t getNumBDs(int col, int row) const override { - return isMemTile(col, row) ? 48 : 16; + uint32_t getNumBDs(AIETileType tileType) const override { + return tileType == AIETileType::MemTile ? 48 : 16; } bool isBdChannelAccessible(int col, int row, uint32_t bd_id, int channel) const override { - if (!isMemTile(col, row)) { + if (getTileType(col, row) != AIETileType::MemTile) { return true; } else { if ((channel & 1) == 0) { // even channel number @@ -528,7 +554,7 @@ class AIE2TargetModel : public AIETargetModel { uint32_t getMemTileSize() const override { return 0x00080000; } uint32_t getNumBanks(int col, int row) const override { - return isMemTile(col, row) ? 8 : 4; + return getTileType(col, row) == AIETileType::MemTile ? 8 : 4; } uint32_t getMaxChannelNumForAdjacentMemTile(int col, int row) const override { @@ -572,16 +598,12 @@ class VC1902TargetModel : public AIE1TargetModel { int rows() const override { return 9; /* One Shim row and 8 Core rows. */ } - bool isShimNOCTile(int col, int row) const override { - return row == 0 && nocColumns.contains(col); - } - - bool isShimPLTile(int col, int row) const override { - return row == 0 && !nocColumns.contains(col); - } - - bool isShimNOCorPLTile(int col, int row) const override { - return isShimNOCTile(col, row) || isShimPLTile(col, row); + AIETileType getTileType(int col, int row) const override { + if (row == 0) { + return nocColumns.contains(col) ? AIETileType::ShimNOCTile + : AIETileType::ShimPLTile; + } + return AIETileType::CoreTile; // AIE1 has no MemTiles } static bool classof(const AIETargetModel *model) { @@ -601,19 +623,14 @@ class VE2302TargetModel : public AIE2TargetModel { return 4; /* One Shim row, 1 memtile rows, and 2 Core rows. */ } - bool isCoreTile(int col, int row) const override { return row > 1; } - bool isMemTile(int col, int row) const override { return row == 1; } - - bool isShimNOCTile(int col, int row) const override { - return row == 0 && nocColumns.contains(col); - } - - bool isShimPLTile(int col, int row) const override { - return row == 0 && !nocColumns.contains(col); - } - - bool isShimNOCorPLTile(int col, int row) const override { - return isShimNOCTile(col, row) || isShimPLTile(col, row); + AIETileType getTileType(int col, int row) const override { + if (row == 0) { + return nocColumns.contains(col) ? AIETileType::ShimNOCTile + : AIETileType::ShimPLTile; + } + if (row == 1) + return AIETileType::MemTile; + return AIETileType::CoreTile; } uint32_t getNumMemTileRows() const override { return 1; } @@ -636,22 +653,14 @@ class VE2802TargetModel : public AIE2TargetModel { return 11; /* One Shim row, 2 memtile rows, and 8 Core rows. */ } - bool isCoreTile(int col, int row) const override { return row > 2; } - - bool isMemTile(int col, int row) const override { - return row == 1 || row == 2; - } - - bool isShimNOCTile(int col, int row) const override { - return row == 0 && nocColumns.contains(col); - } - - bool isShimPLTile(int col, int row) const override { - return row == 0 && !nocColumns.contains(col); - } - - bool isShimNOCorPLTile(int col, int row) const override { - return isShimNOCTile(col, row) || isShimPLTile(col, row); + AIETileType getTileType(int col, int row) const override { + if (row == 0) { + return nocColumns.contains(col) ? AIETileType::ShimNOCTile + : AIETileType::ShimPLTile; + } + if (row == 1 || row == 2) + return AIETileType::MemTile; + return AIETileType::CoreTile; } uint32_t getNumMemTileRows() const override { return 2; } @@ -672,17 +681,6 @@ class BaseNPU1TargetModel : public AIE2TargetModel { return 6; /* 1 Shim row, 1 memtile row, and 4 Core rows. */ } - bool isCoreTile(int col, int row) const override { return row > 1; } - bool isMemTile(int col, int row) const override { return row == 1; } - - bool isShimPLTile(int col, int row) const override { - return false; // No PL - } - - bool isShimNOCorPLTile(int col, int row) const override { - return isShimNOCTile(col, row) || isShimPLTile(col, row); - } - uint32_t getNumMemTileRows() const override { return 1; } static bool classof(const AIETargetModel *model) { @@ -708,7 +706,13 @@ class VirtualizedNPU1TargetModel : public BaseNPU1TargetModel { int columns() const override { return cols; } - bool isShimNOCTile(int col, int row) const override { return row == 0; } + AIETileType getTileType(int col, int row) const override { + if (row == 0) + return AIETileType::ShimNOCTile; // NPU1 has no ShimPL tiles + if (row == 1) + return AIETileType::MemTile; + return AIETileType::CoreTile; + } static bool classof(const AIETargetModel *model) { return model->getKind() >= TK_AIE2_NPU1_1Col && @@ -729,17 +733,12 @@ class BaseNPU2TargetModel : public AIE2TargetModel { return 6; /* 1 Shim row, 1 memtile row, and 4 Core rows. */ } - bool isCoreTile(int col, int row) const override { return row > 1; } - bool isMemTile(int col, int row) const override { return row == 1; } - - bool isShimPLTile(int col, int row) const override { - return false; // No PL tiles - } - - bool isShimNOCTile(int col, int row) const override { return row == 0; } - - bool isShimNOCorPLTile(int col, int row) const override { - return isShimNOCTile(col, row); + AIETileType getTileType(int col, int row) const override { + if (row == 0) + return AIETileType::ShimNOCTile; + if (row == 1) + return AIETileType::MemTile; + return AIETileType::CoreTile; } uint32_t getNumMemTileRows() const override { return 1; } diff --git a/include/aie/Dialect/AIEX/IR/AIEX.td b/include/aie/Dialect/AIEX/IR/AIEX.td index d4b316f655a..e09d67dd85c 100644 --- a/include/aie/Dialect/AIEX/IR/AIEX.td +++ b/include/aie/Dialect/AIEX/IR/AIEX.td @@ -1050,7 +1050,7 @@ def AIE_DMAConfigureTaskOp : AIEX_Op<"dma_configure_task", [HasParent<"AIE::Runt }]; let extraClassDefinition = [{ - AIE::TileOp DMAConfigureTaskOp::getTileOp() { return cast(getTile().getDefiningOp()); } + AIE::TileOp DMAConfigureTaskOp::getTileOp() { return cast(this->getOperation()).getTileOp(); } }]; let hasVerifier = 1; @@ -1189,7 +1189,7 @@ def AIE_DMAStartBdChainOp: AIEX_Op<"dma_start_bd_chain", [HasParent<"AIE::Runtim }]; let extraClassDefinition = [{ - AIE::TileOp DMAStartBdChainOp::getTileOp() { return cast(getTile().getDefiningOp()); } + AIE::TileOp DMAStartBdChainOp::getTileOp() { return cast(this->getOperation()).getTileOp(); } }]; } diff --git a/lib/Dialect/AIE/IR/AIEDialect.cpp b/lib/Dialect/AIE/IR/AIEDialect.cpp index f9662ed8e21..9c6470b735c 100644 --- a/lib/Dialect/AIE/IR/AIEDialect.cpp +++ b/lib/Dialect/AIE/IR/AIEDialect.cpp @@ -246,6 +246,12 @@ namespace { struct UsesAreAccessible { static LogicalResult verifyTrait(Operation *op) { auto thisElement = cast(op); + + // Skip accessibility checks for logical tiles as we cannot tell until tile + // is placed + if (!isa(thisElement.getTile().getDefiningOp())) + return success(); + auto thisID = thisElement.getTileID(); auto users = op->getResult(0).getUsers(); const auto &targetModel = getTargetModel(op); @@ -314,8 +320,7 @@ template LogicalResult HasValidBDs::verifyTrait(Operation *op) { auto element = cast(op); const auto &targetModel = getTargetModel(op); - int bdMax = - targetModel.getNumBDs(element.getTileID().col, element.getTileID().row); + int bdMax = targetModel.getNumBDs(element.getTileLike().getTileType()); int bdNum = 0; for (auto &block : element.getBody()) { @@ -395,13 +400,15 @@ LogicalResult HasValidDMAChannels::verifyTrait(Operation *op) { } } - if (inputChannels.size() > - element.getTileOp().getNumSourceConnections(WireBundle::DMA)) + TileLike tile = element.getTileLike(); + if (!tile) + return op->emitOpError("tile must implement TileLike interface"); + + if (inputChannels.size() > tile.getNumSourceConnections(WireBundle::DMA)) return op->emitOpError( "uses more input channels than available on this tile"); - if (outputChannels.size() > - element.getTileOp().getNumDestConnections(WireBundle::DMA)) + if (outputChannels.size() > tile.getNumDestConnections(WireBundle::DMA)) return op->emitOpError( "uses more output channels than available on this tile"); return success(); @@ -419,15 +426,27 @@ LogicalResult ObjectFifoCreateOp::verify() { "and for each consumer."); } + // Helper to get tile interface from Value + auto getTileLikeFromValue = [](Value v) -> TileLike { + return llvm::dyn_cast(v.getDefiningOp()); + }; + + TileLike producerTile = getTileLikeFromValue(getProducerTile()); + if (!producerTile) + return emitError("producer tile must implement TileLike interface"); + // data layout transformations on shim tiles are handled by runtime operations - if (getProducerTileOp().isShimTile() && !getDimensionsToStream().empty()) { + if (producerTile.isShimTile() && !getDimensionsToStream().empty()) { return emitError( "`dimensionsToStream` data layout transformations are not supported " "on shim tile producers"); } - for (auto consTile : getConsumerTiles()) { - if (cast(consTile.getDefiningOp()).isShimTile() && - !getDimensionsFromStream(consTile).empty()) { + for (auto consTileVal : getConsumerTiles()) { + TileLike consTile = getTileLikeFromValue(consTileVal); + if (!consTile) + return emitError("consumer tile must implement TileLike interface"); + if (consTile.isShimTile() && + !getDimensionsFromStream(consTileVal).empty()) { return emitError( "`dimensionsFromStreamPerConsumer` data layout transformations are " "not supported on shim tile consumers"); @@ -435,7 +454,7 @@ LogicalResult ObjectFifoCreateOp::verify() { } if (getRepeatCount().has_value()) { - if (getProducerTileOp().isShimTile()) + if (producerTile.isShimTile()) return emitError("`repeat_count` unavailable for shim tiles"); } @@ -452,7 +471,7 @@ LogicalResult ObjectFifoCreateOp::verify() { return emitError("`aie_stream_port` must be defined"); if (getAieStream().value() == 0 || getAieStream().value() == 2) { - if (getProducerTileOp().isShimTile() || getProducerTileOp().isMemTile()) + if (producerTile.isShimTile() || producerTile.isMemTile()) return emitError( "`aie_stream` is not available for shim and mem tiles"); @@ -470,11 +489,12 @@ LogicalResult ObjectFifoCreateOp::verify() { "unavailable on stream end"); } - if (getAieStream().value() == 1 || getAieStream().value() == 2) - if (getConsumerTiles()[0].getDefiningOp().isShimTile() || - getConsumerTiles()[0].getDefiningOp().isMemTile()) + if (getAieStream().value() == 1 || getAieStream().value() == 2) { + TileLike consTile = getTileLikeFromValue(getConsumerTiles()[0]); + if (consTile && (consTile.isShimTile() || consTile.isMemTile())) return emitError( "`aie_stream` is not available for shim and mem tiles"); + } if (!getDimensionsFromStreamPerConsumer()[0].empty()) return emitError("`dimensionsFromStreamPerConsumer` data layout " @@ -482,7 +502,7 @@ LogicalResult ObjectFifoCreateOp::verify() { } if (getInitValues().has_value()) { - if (getProducerTileOp().isShimTile()) + if (producerTile.isShimTile()) return emitError("`init_values` unavailable for shim tiles"); } @@ -497,10 +517,11 @@ LogicalResult ObjectFifoCreateOp::verify() { return emitError("`iter_count` must be between 1 and 256"); // Check that either producer or at least one consumer is a MemTile - bool hasMemTile = getProducerTileOp().isMemTile(); + bool hasMemTile = producerTile.isMemTile(); if (!hasMemTile) { - for (auto consumerTile : getConsumerTiles()) { - if (cast(consumerTile.getDefiningOp()).isMemTile()) { + for (auto consTileVal : getConsumerTiles()) { + TileLike consTile = getTileLikeFromValue(consTileVal); + if (consTile && consTile.isMemTile()) { hasMemTile = true; break; } @@ -719,7 +740,9 @@ LogicalResult ObjectFifoLinkOp::verify() { return emitError("ObjectFifoLinkOp must have a link point, i.e., a " "shared tile between objectFifos"); - TileOp tile = cast(sharedTile.value().getDefiningOp()); + TileLike tile = llvm::dyn_cast(sharedTile.value().getDefiningOp()); + if (!tile) + return emitError("shared tile must implement TileLike interface"); if (!tile.isMemTile()) { if (isJoin() || isDistribute()) return emitError("ObjectFifoLinkOp join and distribute are " @@ -1089,8 +1112,7 @@ LogicalResult CascadeFlowOp::verify() { if (src.isShimTile() || dst.isShimTile()) return emitOpError("shimTile row has no cascade stream interface"); - if (t.isMemTile(src.colIndex(), src.rowIndex()) || - t.isMemTile(dst.colIndex(), dst.rowIndex())) + if (src.isMemTile() || dst.isMemTile()) return emitOpError("memTile row has no cascade stream interface"); if (!t.isSouth(src.getCol(), src.getRow(), dst.getCol(), dst.getRow()) && @@ -1122,7 +1144,7 @@ LogicalResult ConfigureCascadeOp::verify() { if (tile.isShimTile()) return emitOpError("shimTile row has no cascade stream interface"); - if (t.isMemTile(tile.colIndex(), tile.rowIndex())) + if (tile.isMemTile()) return emitOpError("memTile row has no cascade stream interface"); if (isa(t)) { @@ -1217,6 +1239,212 @@ DeviceOp::getForSymbolInModuleOrError(mlir::ModuleOp module, return deviceOp; } +//===----------------------------------------------------------------------===// +// TileElement +//===----------------------------------------------------------------------===// + +TileOp TileElement::getTileOp() { + auto element = cast(this->getOperation()); + return dyn_cast_or_null(element.getTile().getDefiningOp()); +} + +//===----------------------------------------------------------------------===// +// LogicalTileOp +//===----------------------------------------------------------------------===// + +LogicalResult LogicalTileOp::verify() { + const auto &targetModel = getTargetModel(*this); + int columns = targetModel.columns(); + int rows = targetModel.rows(); + + // Only verify col/row bounds if they are specified + if (auto col = getCol()) { + if (*col >= columns) + return emitOpError("column index (") + << *col + << ") must be less than the number of columns in the device (" + << columns << ")"; + } + if (auto row = getRow()) { + if (*row >= rows) + return emitOpError("row index (") + << *row << ") must be less than the number of rows in the device (" + << rows << ")"; + } + + // Check logical tile type matches coordinates on device + // Only validate when both col and row are specified + if (auto col = tryGetCol()) { + if (auto row = tryGetRow()) { + AIETileType tileType = getTileType(); + + if (targetModel.getTileType(*col, *row) != tileType) { + return emitOpError("declared logical tile type does not match " + "the tile type at coordinates (") + << *col << ", " << *row << ")"; + } + } + } + + if (isShimNOCorPLTile() && getAllocationScheme()) + return emitOpError("Shim tiles cannot have an allocation scheme"); + + return success(); +} + +TileID LogicalTileOp::getCanonicalTileID() { + const auto &targetModel = getTargetModel(*this); + + // If col and row are both specified, use them directly + if (getCol().has_value() && getRow().has_value()) { + return {getCol().value(), getRow().value()}; + } + + // Otherwise, find a representative tile of the given type + AIETileType tileType = getTileType(); + for (int col = 0; col < targetModel.columns(); col++) { + for (int row = 0; row < targetModel.rows(); row++) { + if (targetModel.getTileType(col, row) == tileType) { + return {col, row}; + } + } + } + llvm_unreachable("No tile of matching tile type found in AIE device"); +} + +size_t LogicalTileOp::getNumSourceConnections(WireBundle bundle) { + const auto &targetModel = getTargetModel(*this); + TileID tile = getCanonicalTileID(); + + if (bundle == WireBundle::Core || bundle == WireBundle::DMA) { + // Note dest is correct here, since direction is reversed. + if (isShimNOCorPLTile()) + return targetModel.getNumDestShimMuxConnections(tile.col, tile.row, + bundle); + return targetModel.getNumDestSwitchboxConnections(tile.col, tile.row, + bundle); + } + return 0; +} + +size_t LogicalTileOp::getNumDestConnections(WireBundle bundle) { + const auto &targetModel = getTargetModel(*this); + TileID tile = getCanonicalTileID(); + + if (bundle == WireBundle::Core || bundle == WireBundle::DMA) { + // Note source is correct here, since direction is reversed. + if (isShimNOCorPLTile()) + return targetModel.getNumDestShimMuxConnections(tile.col, tile.row, + bundle); + return targetModel.getNumSourceSwitchboxConnections(tile.col, tile.row, + bundle); + } + return 0; +} + +std::optional LogicalTileOp::tryGetCol() { + if (auto col = getCol()) + return col; + return std::nullopt; +} + +std::optional LogicalTileOp::tryGetRow() { + if (auto row = getRow()) + return row; + return std::nullopt; +} + +//===----------------------------------------------------------------------===// +// Custom Printer and Parser for LogicalTileOp +//===----------------------------------------------------------------------===// + +ParseResult LogicalTileOp::parse(OpAsmParser &parser, OperationState &result) { + AIETileType tileType; + if (parser.parseLess()) + return failure(); + + StringRef tileTypeStr; + if (parser.parseKeyword(&tileTypeStr)) + return failure(); + + auto tileTypeOpt = symbolizeAIETileType(tileTypeStr); + if (!tileTypeOpt) + return parser.emitError(parser.getCurrentLocation(), + "unknown logical tile type: ") + << tileTypeStr; + tileType = *tileTypeOpt; + + if (parser.parseGreater()) + return failure(); + + if (parser.parseLParen()) + return failure(); + + std::optional col; + if (succeeded(parser.parseOptionalQuestion())) { + // col is unspecified + } else { + int32_t colVal; + if (parser.parseInteger(colVal)) + return failure(); + col = colVal; + } + + if (parser.parseComma()) + return failure(); + + std::optional row; + if (succeeded(parser.parseOptionalQuestion())) { + // row is unspecified + } else { + int32_t rowVal; + if (parser.parseInteger(rowVal)) + return failure(); + row = rowVal; + } + + if (parser.parseRParen()) + return failure(); + + // Parse optional attributes + if (parser.parseOptionalAttrDict(result.attributes)) + return failure(); + + // Add the parsed attributes to the result + result.getOrAddProperties().tile_type = + AIETileTypeAttr::get(parser.getContext(), tileType); + if (col) + result.getOrAddProperties().col = + parser.getBuilder().getI32IntegerAttr(*col); + if (row) + result.getOrAddProperties().row = + parser.getBuilder().getI32IntegerAttr(*row); + + // Add result type (index) + result.addTypes(parser.getBuilder().getIndexType()); + + return success(); +} + +void LogicalTileOp::print(OpAsmPrinter &printer) { + printer << "<" << stringifyAIETileType(getTileType()) << ">"; + + printer << "("; + if (auto col = getCol()) + printer << *col; + else + printer << "?"; + printer << ", "; + if (auto row = getRow()) + printer << *row; + else + printer << "?"; + printer << ")"; + + SmallVector elidedAttrs = {"tile_type", "col", "row"}; + printer.printOptionalAttrDict((*this)->getAttrs(), elidedAttrs); +} + //===----------------------------------------------------------------------===// // TileOp //===----------------------------------------------------------------------===// @@ -1246,7 +1474,7 @@ LogicalResult TileOp::verify() { } } - if (isShimTile() && getAllocationScheme()) + if (isShimNOCorPLTile() && getAllocationScheme()) return emitOpError("Shim tiles cannot have an allocation scheme"); return success(); @@ -1258,8 +1486,7 @@ size_t TileOp::getNumSourceConnections(WireBundle bundle) { // Note dest is correct here, since direction is reversed. { // Note dest is correct here, since direction is reversed. - if (targetModel.isShimNOCTile(getCol(), getRow()) || - targetModel.isShimPLTile(getCol(), getRow())) + if (isShimNOCorPLTile()) return targetModel.getNumDestShimMuxConnections(getCol(), getRow(), bundle); return targetModel.getNumDestSwitchboxConnections(getCol(), getRow(), @@ -1274,8 +1501,7 @@ size_t TileOp::getNumDestConnections(WireBundle bundle) { // Note source is correct here, since direction is reversed. { // Note source is correct here, since direction is reversed. - if (targetModel.isShimNOCTile(getCol(), getRow()) || - targetModel.isShimPLTile(getCol(), getRow())) + if (isShimNOCorPLTile()) return targetModel.getNumDestShimMuxConnections(getCol(), getRow(), bundle); return targetModel.getNumSourceSwitchboxConnections(getCol(), getRow(), @@ -1284,24 +1510,12 @@ size_t TileOp::getNumDestConnections(WireBundle bundle) { return 0; } -bool TileOp::isMemTile() { - const auto &targetModel = getTargetModel(*this); - return targetModel.isMemTile(getCol(), getRow()); -} - -bool TileOp::isShimNOCTile() { - const auto &targetModel = getTargetModel(*this); - return targetModel.isShimNOCTile(getCol(), getRow()); -} +std::optional TileOp::tryGetCol() { return getCol(); } +std::optional TileOp::tryGetRow() { return getRow(); } -bool TileOp::isShimPLTile() { +AIETileType TileOp::getTileType() { const auto &targetModel = getTargetModel(*this); - return targetModel.isShimPLTile(getCol(), getRow()); -} - -bool TileOp::isShimNOCorPLTile() { - const auto &targetModel = getTargetModel(*this); - return targetModel.isShimNOCorPLTile(getCol(), getRow()); + return targetModel.getTileType(getCol(), getRow()); } bool isLegalTileConnection(TileOp tile, const AIETargetModel &targetModel, @@ -1378,6 +1592,10 @@ LogicalResult ShimSwitchboxOp::verify() { //===----------------------------------------------------------------------===// LogicalResult ShimMuxOp::verify() { + // ShimMux requires a placed tile (TileOp), not a logical tile + if (!isa(getTile().getDefiningOp())) + return emitOpError("requires a placed tile (aie.tile), not a logical tile"); + Region &body = getConnections(); DenseSet destset; if (body.empty()) @@ -1414,10 +1632,6 @@ size_t ShimMuxOp::getNumDestConnections(WireBundle bundle) { bundle); } -TileOp ShimMuxOp::getTileOp() { - return cast(getTile().getDefiningOp()); -} - //===----------------------------------------------------------------------===// // ShimDMAOp //===----------------------------------------------------------------------===// @@ -1430,7 +1644,7 @@ LogicalResult ShimDMAOp::verify() { } TileOp ShimDMAOp::getTileOp() { - return cast(getTile().getDefiningOp()); + return cast(this->getOperation()).getTileOp(); } LogicalResult PacketRulesOp::verify() { @@ -1472,8 +1686,6 @@ LogicalResult CoreOp::verify() { return success(); } -TileOp CoreOp::getTileOp() { return cast(getTile().getDefiningOp()); } - bool CoreOp::isEmpty() { Region &body = getBody(); // Return iff. core body contains exactly one block with exactly one AIE.EndOp @@ -1481,6 +1693,10 @@ bool CoreOp::isEmpty() { llvm::isa(body.front().front())); } +TileOp CoreOp::getTileOp() { + return cast(this->getOperation()).getTileOp(); +} + //===----------------------------------------------------------------------===// // BufferOp //===----------------------------------------------------------------------===// @@ -1491,8 +1707,6 @@ int64_t BufferOp::getAllocationSize() { return type.getNumElements() * dataLayout.getTypeSize(type.getElementType()); } -TileOp BufferOp::getTileOp() { return cast(getTile().getDefiningOp()); } - LogicalResult BufferOp::verify() { if (UsesAreAccessible::verifyTrait(*this).failed()) return failure(); @@ -1555,6 +1769,10 @@ static ParseResult parseBufferInitialValue(OpAsmParser &parser, Type &type, return success(); } +TileOp BufferOp::getTileOp() { + return cast(this->getOperation()).getTileOp(); +} + //===----------------------------------------------------------------------===// // MemOp //===----------------------------------------------------------------------===// @@ -1574,7 +1792,9 @@ LogicalResult MemOp::verify() { return success(); } -TileOp MemOp::getTileOp() { return cast(getTile().getDefiningOp()); } +TileOp MemOp::getTileOp() { + return cast(this->getOperation()).getTileOp(); +} //===----------------------------------------------------------------------===// // MemTileDMAOp @@ -1621,7 +1841,7 @@ LogicalResult MemTileDMAOp::verify() { for (Block *b : reachable) { for (DMABDOp bd : b->getOps()) { if (auto bufferOp = bd.getBufferOp(); - bufferOp.getTileID() != getTileID()) { + bufferOp.getTile() != getTile()) { InFlightDiagnostic err = bd.emitOpError() << "is reachable from DMA channel " @@ -1634,7 +1854,7 @@ LogicalResult MemTileDMAOp::verify() { } for (auto useLock : b->getOps()) { if (auto lockOp = useLock.getLockOp(); - lockOp.getTileID() != getTileID()) { + lockOp.getTile() != getTile()) { InFlightDiagnostic err = useLock.emitOpError() << "is reachable from DMA channel " @@ -1653,6 +1873,10 @@ LogicalResult MemTileDMAOp::verify() { return success(); } +TileOp MemTileDMAOp::getTileOp() { + return cast(this->getOperation()).getTileOp(); +} + //===----------------------------------------------------------------------===// // DMAOp //===----------------------------------------------------------------------===// @@ -1875,16 +2099,19 @@ LogicalResult DMABDOp::verify() { return emitOpError("transfer length must be multiple of 4 (i.e., represent " "4 byte aligned address)"); - TileID parentTileId = getParentTileElement(getOperation()).getTileID(); + TileElement parentTileElement = getParentTileElement(getOperation()); + TileLike parentTile = parentTileElement.getTileLike(); + if (!parentTile) + return emitOpError("parent tile must implement TileLike interface"); if (!isUnrankedMemRef && getOperation()->getParentOfType() && - getBufferOp().getTileID() != parentTileId) + getBufferOp().getTile() != parentTileElement.getTile()) return emitOpError( "Core tile DMAs can only access a buffer in the same tile."); const AIETargetModel &targetModel = getTargetModel(getOperation()); - uint32_t maxBds = targetModel.getNumBDs(parentTileId.col, parentTileId.row); + uint32_t maxBds = targetModel.getNumBDs(parentTile.getTileType()); if (std::optional bdId = getBdId(); bdId.has_value() && static_cast(*bdId) >= maxBds) return emitOpError("bdId attribute exceeds max: ") << maxBds - 1; @@ -1943,7 +2170,7 @@ LogicalResult DMABDOp::verify() { if (!dims.has_value()) return emitOpError() << "Padding requires n-d data layouts expressed as" << " wrap(s) and stride(s)."; - if (!targetModel.isMemTile(parentTileId.col, parentTileId.row)) + if (!parentTile.isMemTile()) return emitOpError() << "Padding is only supported by memtile dma bds."; if (dims->size() != paddims->size()) return emitOpError() << "Mismatch number of dimensions between padding(s)" @@ -1969,8 +2196,7 @@ LogicalResult DMABDOp::verify() { << " padding in 32-bit words."; } if (!isUnrankedMemRef && - (targetModel.isMemTile(parentTileId.col, parentTileId.row) || - targetModel.isCoreTile(parentTileId.col, parentTileId.row))) { + (parentTile.isMemTile() || parentTile.isCoreTile())) { if (auto baseAddr = getBufferOp().getAddress(); baseAddr.has_value()) { int offsetInBytes = *baseAddr + getOffsetInBytes(); if (offsetInBytes % 4) @@ -1989,18 +2215,13 @@ LogicalResult DMABDOp::verify() { if (!getLen() && !getBuffer().getType().hasStaticShape()) return emitOpError() << "buffer with dynamic shape requires static length."; - if (getBurstLength() != 0 && - !targetModel.isShimNOCTile(parentTileId.col, parentTileId.row)) + if (getBurstLength() != 0 && !parentTile.isShimNOCTile()) return emitOpError("Burst length is only supported in Shim NOC tiles that " "are connected to the memory-mapped NOC."); return success(); } -TileOp MemTileDMAOp::getTileOp() { - return cast(getTile().getDefiningOp()); -} - //===----------------------------------------------------------------------===// // DMAStartOp //===----------------------------------------------------------------------===// @@ -2085,6 +2306,10 @@ void DMAStartOp::getCanonicalizationPatterns(RewritePatternSet &patterns, //===----------------------------------------------------------------------===// LogicalResult SwitchboxOp::verify() { + // Switchbox requires a placed tile (TileOp), not a logical tile + if (!isa(getTile().getDefiningOp())) + return emitOpError("requires a placed tile (aie.tile), not a logical tile"); + Region &body = getConnections(); DenseSet sourceset; DenseSet destset; @@ -2208,10 +2433,6 @@ LogicalResult SwitchboxOp::verify() { return success(); } -TileOp SwitchboxOp::getTileOp() { - return cast(getTile().getDefiningOp()); -} - template struct HasSomeParent { static LogicalResult verifyTrait(Operation *op) { @@ -2225,17 +2446,21 @@ struct HasSomeParent { } }; -TileOp LockOp::getTileOp() { return cast(getTile().getDefiningOp()); } +TileOp LockOp::getTileOp() { + return cast(this->getOperation()).getTileOp(); +} LogicalResult LockOp::verify() { if (auto result = UsesAreAccessible::verifyTrait(*this); result.failed()) return result; if (getLockID().has_value()) { - const auto &targetModel = getTargetModel(getTileOp()); - auto tileOp = getTileOp(); - if (int numLocks = - targetModel.getNumLocks(tileOp.getCol(), tileOp.getRow()); + TileLike tileLike = getTileLike(); + if (!tileLike) + return emitOpError("tile operand must implement TileLike interface"); + const auto &targetModel = getTargetModel(*this); + auto tileType = tileLike.getTileType(); + if (int numLocks = targetModel.getNumLocks(tileType); getLockID().value() >= numLocks) return emitOpError("lock assigned invalid id (maximum is ") << numLocks - 1 << ")"; @@ -2285,8 +2510,7 @@ struct AccessesLocalLocks { static LogicalResult verifyTrait(Operation *op) { if (auto memOp = op->getParentOfType()) { auto useLock = dyn_cast(op); - if (auto lock = useLock.getLockOp(); - lock.getTileID() != memOp.getTileID()) + if (auto lock = useLock.getLockOp(); lock.getTile() != memOp.getTile()) return failure(); } return success(); @@ -2351,6 +2575,10 @@ LogicalResult UseLockOp::verify() { #define GET_OP_CLASSES #include "aie/Dialect/AIE/IR/AIEOps.cpp.inc" +TileOp SwitchboxOp::getTileOp() { + return cast(this->getOperation()).getTileOp(); +} + size_t SwitchboxOp::getNumSourceConnections(WireBundle bundle) { auto tile = getTileOp(); const auto &targetModel = getTargetModel(*this); @@ -2365,6 +2593,10 @@ size_t SwitchboxOp::getNumDestConnections(WireBundle bundle) { tile.getRow(), bundle); } +TileOp ShimMuxOp::getTileOp() { + return cast(this->getOperation()).getTileOp(); +} + WireBundle xilinx::AIE::getConnectingBundle(WireBundle dir) { switch (dir) { case WireBundle::North: @@ -2446,18 +2678,20 @@ void BDChainOp::print(OpAsmPrinter &printer) { //===----------------------------------------------------------------------===// LogicalResult ShimDMAAllocationOp::verify() { - TileOp tileOp = getTileOp(); - if (!tileOp) { - return emitOpError("tile operand must be a TileOp"); - } - - const auto &targetModel = getTargetModel(*this); - int col = tileOp.getCol(); - int row = tileOp.getRow(); - - if (!targetModel.isShimNOCorPLTile(col, row)) { - return emitOpError("tile must be a shim tile, but got tile(") - << col << ", " << row << ")"; + TileLike tileLike = llvm::dyn_cast(getTile().getDefiningOp()); + if (!tileLike) { + return emitOpError("tile operand must implement TileLike interface"); + } + + if (!tileLike.isShimNOCorPLTile()) { + // if placed, provide detailed error message + auto col = tileLike.tryGetCol(); + auto row = tileLike.tryGetRow(); + if (col && row) { + return emitOpError("tile must be a shim tile, but got tile(") + << *col << ", " << *row << ")"; + } + return emitOpError("tile must be a shim tile"); } return success(); diff --git a/programming_examples/basic/combined_transpose/combined_transpose.py b/programming_examples/basic/combined_transpose/combined_transpose.py index f17604d5afb..f944835a936 100644 --- a/programming_examples/basic/combined_transpose/combined_transpose.py +++ b/programming_examples/basic/combined_transpose/combined_transpose.py @@ -8,7 +8,6 @@ import argparse from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker, str_to_dtype -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessPattern, TensorTiler2D @@ -95,7 +94,7 @@ def core_fn(in_fifo, out_fifo, kernel_func): rt.drain(out_fifo.cons(), out, tap_out_L1L3, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() if __name__ == "__main__": diff --git a/programming_examples/basic/dma_transpose/dma_transpose_iron.py b/programming_examples/basic/dma_transpose/dma_transpose_iron.py index a28fe9bdd0e..cbd01418f05 100644 --- a/programming_examples/basic/dma_transpose/dma_transpose_iron.py +++ b/programming_examples/basic/dma_transpose/dma_transpose_iron.py @@ -11,7 +11,6 @@ from aie.iron import ObjectFifo, Program, Runtime from aie.iron.device import NPU1Col1, NPU2Col1, AnyComputeTile -from aie.iron.placers import SequentialPlacer from aie.helpers.taplib import TensorTiler2D if len(sys.argv) > 3: @@ -50,7 +49,7 @@ def my_passthrough(M, K, generate_acccess_map=False): my_program = Program(dev, rt) # Place program components (assign the resources on the device) and generate an MLIR module - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_iron.py b/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_iron.py index 888505a8d07..9665fd4a2b7 100644 --- a/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_iron.py +++ b/programming_examples/basic/matrix_multiplication/matrix_vector/matrix_vector_iron.py @@ -8,7 +8,6 @@ import argparse from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -109,7 +108,7 @@ def core_fn(of_a, of_b, of_c, zero, matvec): my_program = Program(dev_ty, rt) # Place components (assign them resources on the device) and generate an MLIR module - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_examples/basic/matrix_multiplication/single_core/single_core_iron.py b/programming_examples/basic/matrix_multiplication/single_core/single_core_iron.py index 77bdf322187..aaa6574fa7e 100644 --- a/programming_examples/basic/matrix_multiplication/single_core/single_core_iron.py +++ b/programming_examples/basic/matrix_multiplication/single_core/single_core_iron.py @@ -8,7 +8,6 @@ import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker, str_to_dtype -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessSequence, TensorTiler2D @@ -300,7 +299,7 @@ def core_fn(of_a, of_b, of_c, zero, matmul): my_program = Program(dev_ty, rt) # Place components (assign them resources on the device) and generate an MLIR module - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/basic/matrix_multiplication/whole_array/whole_array_iron.py b/programming_examples/basic/matrix_multiplication/whole_array/whole_array_iron.py index cec91633c9b..8e59fcf9740 100644 --- a/programming_examples/basic/matrix_multiplication/whole_array/whole_array_iron.py +++ b/programming_examples/basic/matrix_multiplication/whole_array/whole_array_iron.py @@ -8,7 +8,6 @@ import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker, str_to_dtype -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU1Col2, NPU1, NPU2, Tile from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessSequence, TensorTiler2D @@ -530,7 +529,7 @@ def core_fn(in_a, in_b, out_c, zero, matmul): my_program = Program(dev_ty, rt) # Place components (assign them resources on the device) and generate an MLIR module - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/basic/matrix_scalar_add/matrix_scalar_add.py b/programming_examples/basic/matrix_scalar_add/matrix_scalar_add.py index e316e8b3ece..11ca16246fa 100644 --- a/programming_examples/basic/matrix_scalar_add/matrix_scalar_add.py +++ b/programming_examples/basic/matrix_scalar_add/matrix_scalar_add.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2, XCVC1902 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -70,7 +69,7 @@ def core_fn(of_in1, of_out1): rt.drain(of_out.cons(), out_tensor, tap, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() print(my_matrix_add_one()) diff --git a/programming_examples/basic/memcpy/memcpy.py b/programming_examples/basic/memcpy/memcpy.py index 1766d7ff151..df32b2bcc44 100644 --- a/programming_examples/basic/memcpy/memcpy.py +++ b/programming_examples/basic/memcpy/memcpy.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import Tile, NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern @@ -140,7 +139,7 @@ def core_fn(of_in, of_out, passThroughLine): # END EXERCISE # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/passthrough_dmas/passthrough_dmas.py b/programming_examples/basic/passthrough_dmas/passthrough_dmas.py index 62555a12dcb..ab585b626b0 100644 --- a/programming_examples/basic/passthrough_dmas/passthrough_dmas.py +++ b/programming_examples/basic/passthrough_dmas/passthrough_dmas.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1, XCVC1902 N = 4096 @@ -47,7 +46,7 @@ my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_examples/basic/passthrough_kernel/passthrough_kernel.py b/programming_examples/basic/passthrough_kernel/passthrough_kernel.py index 0c45db34869..9fef8261973 100644 --- a/programming_examples/basic/passthrough_kernel/passthrough_kernel.py +++ b/programming_examples/basic/passthrough_kernel/passthrough_kernel.py @@ -10,7 +10,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 @@ -60,7 +59,7 @@ def core_fn(of_in, of_out, passThroughLine): rt.drain(of_out.cons(), b_out, wait=True) # Place components (assign the resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/passthrough_pykernel/passthrough_pykernel.py b/programming_examples/basic/passthrough_pykernel/passthrough_pykernel.py index 3365332e2e5..9902780ab3b 100644 --- a/programming_examples/basic/passthrough_pykernel/passthrough_pykernel.py +++ b/programming_examples/basic/passthrough_pykernel/passthrough_pykernel.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 from aie.iron.controlflow import range_ from aie.helpers.dialects.func import func @@ -73,7 +72,7 @@ def core_fn(of_in, of_out, passthrough_fn): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_examples/basic/row_wise_bias_add/row_wise_bias_add.py b/programming_examples/basic/row_wise_bias_add/row_wise_bias_add.py index 3b9293431f8..d4bfd809497 100644 --- a/programming_examples/basic/row_wise_bias_add/row_wise_bias_add.py +++ b/programming_examples/basic/row_wise_bias_add/row_wise_bias_add.py @@ -8,7 +8,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -66,7 +65,7 @@ def core_fn(in_fifo, bias_fifo, out_fifo, kernel_func): rt.drain(out_fifo.cons(), out, tap, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/basic/shuffle_transpose/shuffle_transpose.py b/programming_examples/basic/shuffle_transpose/shuffle_transpose.py index 18e555be6ae..a79dc27d35f 100644 --- a/programming_examples/basic/shuffle_transpose/shuffle_transpose.py +++ b/programming_examples/basic/shuffle_transpose/shuffle_transpose.py @@ -9,7 +9,6 @@ import argparse from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -59,7 +58,7 @@ def core_fn(in_fifo, out_fifo, kernel_func): rt.drain(out_fifo.cons(), out, tap, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() if __name__ == "__main__": diff --git a/programming_examples/basic/tiling_exploration/per_tile/per_tile.py b/programming_examples/basic/tiling_exploration/per_tile/per_tile.py index 5179c3b9c17..22334b4d789 100644 --- a/programming_examples/basic/tiling_exploration/per_tile/per_tile.py +++ b/programming_examples/basic/tiling_exploration/per_tile/per_tile.py @@ -9,7 +9,6 @@ import numpy as np from aie.iron import Buffer, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1 from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -62,7 +61,7 @@ def access_order(of_out, counter_buf): my_program = Program(NPU1Col1(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(opts): diff --git a/programming_examples/basic/tiling_exploration/tile_group/tile_group.py b/programming_examples/basic/tiling_exploration/tile_group/tile_group.py index 457e61bb6d5..74d17ff3f05 100644 --- a/programming_examples/basic/tiling_exploration/tile_group/tile_group.py +++ b/programming_examples/basic/tiling_exploration/tile_group/tile_group.py @@ -9,7 +9,6 @@ import numpy as np from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1 from aie.helpers.taplib import TensorTiler2D from aie.iron.controlflow import range_ @@ -61,7 +60,7 @@ def access_order(of_out): my_program = Program(NPU1Col1(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(opts): diff --git a/programming_examples/basic/vector_exp/vector_exp.py b/programming_examples/basic/vector_exp/vector_exp.py index 76a367ebf0e..850af0dffa7 100644 --- a/programming_examples/basic/vector_exp/vector_exp.py +++ b/programming_examples/basic/vector_exp/vector_exp.py @@ -10,7 +10,6 @@ from ml_dtypes import bfloat16 from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 from aie.iron.controlflow import range_ @@ -78,7 +77,7 @@ def core_fn(a_in, c_out, exp_bf16_1024): rt.drain(C_fifo.cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() print(my_eltwise_exp()) diff --git a/programming_examples/basic/vector_reduce_add/vector_reduce_add.py b/programming_examples/basic/vector_reduce_add/vector_reduce_add.py index b245d4314ac..1d096a1f14b 100644 --- a/programming_examples/basic/vector_reduce_add/vector_reduce_add.py +++ b/programming_examples/basic/vector_reduce_add/vector_reduce_add.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 @@ -56,7 +55,7 @@ def core_body(of_in, of_out, reduce_add_vector): rt.drain(of_out.cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() print(my_reduce_add()) diff --git a/programming_examples/basic/vector_reduce_max/multi_column_designs/col_wise_vector_reduce_max.py b/programming_examples/basic/vector_reduce_max/multi_column_designs/col_wise_vector_reduce_max.py index f7fd7745ebf..7c78fa8473c 100644 --- a/programming_examples/basic/vector_reduce_max/multi_column_designs/col_wise_vector_reduce_max.py +++ b/programming_examples/basic/vector_reduce_max/multi_column_designs/col_wise_vector_reduce_max.py @@ -19,7 +19,6 @@ Buffer, str_to_dtype, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from ml_dtypes import bfloat16 from aie.iron.controlflow import range_ @@ -182,7 +181,7 @@ def core_body(*args): ) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer(cores_per_col)) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max.py b/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max.py index 0d94e7ec906..3efe26eb9ac 100644 --- a/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max.py +++ b/programming_examples/basic/vector_reduce_max/multi_column_designs/row_wise_vector_reduce_max.py @@ -19,7 +19,6 @@ Buffer, str_to_dtype, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from ml_dtypes import bfloat16 from aie.iron.controlflow import range_ @@ -191,7 +190,7 @@ def core_body(*args): ) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained.py b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained.py index 16d241914dd..10c9f3b64b0 100644 --- a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained.py +++ b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_chained.py @@ -18,7 +18,6 @@ Buffer, str_to_dtype, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.helpers.util import np_ndarray_type_get_shape from ml_dtypes import bfloat16 @@ -172,7 +171,7 @@ def core_body( rt.drain(out_fifos[0].cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile.py b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile.py index de71a68a51b..d78afe400be 100644 --- a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile.py +++ b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_memtile.py @@ -18,7 +18,6 @@ Buffer, str_to_dtype, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col2, NPU2Col2 from aie.helpers.util import np_ndarray_type_get_shape from ml_dtypes import bfloat16 @@ -189,7 +188,7 @@ def core_body( rt.drain(of_out.cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared.py b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared.py index 295b4108047..01683e54af3 100644 --- a/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared.py +++ b/programming_examples/basic/vector_reduce_max/single_column_designs/vector_reduce_max_shared.py @@ -18,7 +18,6 @@ Buffer, str_to_dtype, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.helpers.util import np_ndarray_type_get_shape from ml_dtypes import bfloat16 @@ -192,7 +191,7 @@ def core_body(*args): rt.drain(out_fifos[0 if n_cores == 1 else 1].cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max.py b/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max.py index 7aff8b2d2ac..5e74d5d27bb 100644 --- a/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max.py +++ b/programming_examples/basic/vector_reduce_max/single_core_designs/vector_reduce_max.py @@ -10,7 +10,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker, str_to_dtype -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 @@ -68,7 +67,7 @@ def core_body(of_in, of_out, reduce_max_vector): rt.drain(of_out.cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/vector_reduce_min/vector_reduce_min.py b/programming_examples/basic/vector_reduce_min/vector_reduce_min.py index 3eaf60c67a8..f7066b6270f 100644 --- a/programming_examples/basic/vector_reduce_min/vector_reduce_min.py +++ b/programming_examples/basic/vector_reduce_min/vector_reduce_min.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 @@ -58,7 +57,7 @@ def core_body(of_in, of_out, reduce_min_vector): rt.drain(of_out.cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() print(my_reduce_min()) diff --git a/programming_examples/basic/vector_reduce_min/vector_reduce_min_jit.py b/programming_examples/basic/vector_reduce_min/vector_reduce_min_jit.py index 2c73d70612f..6b611dece68 100644 --- a/programming_examples/basic/vector_reduce_min/vector_reduce_min_jit.py +++ b/programming_examples/basic/vector_reduce_min/vector_reduce_min_jit.py @@ -13,7 +13,6 @@ import aie.iron as iron from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron import ExternalFunction @@ -61,7 +60,7 @@ def core_body(of_in, of_out, reduce_min_vector): rt.drain(of_out.cons(), c_out, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() def main(): diff --git a/programming_examples/basic/vector_scalar_add/vector_scalar_add.py b/programming_examples/basic/vector_scalar_add/vector_scalar_add.py index 7e2b9874b2a..5080f92b7f4 100644 --- a/programming_examples/basic/vector_scalar_add/vector_scalar_add.py +++ b/programming_examples/basic/vector_scalar_add/vector_scalar_add.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -59,7 +58,7 @@ def core_body(of_in1, of_out0): rt.drain(of_out1.cons(), outTensor, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() module = my_vector_bias_add() diff --git a/programming_examples/basic/vector_scalar_add_runlist/vector_scalar_add.py b/programming_examples/basic/vector_scalar_add_runlist/vector_scalar_add.py index 3db032c0e26..1e8caff727a 100644 --- a/programming_examples/basic/vector_scalar_add_runlist/vector_scalar_add.py +++ b/programming_examples/basic/vector_scalar_add_runlist/vector_scalar_add.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -59,7 +58,7 @@ def core_body(of_in1, of_out0): rt.drain(of_out1.cons(), outTensor, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() module = my_vector_bias_add() diff --git a/programming_examples/basic/vector_scalar_mul/vector_scalar_mul.py b/programming_examples/basic/vector_scalar_mul/vector_scalar_mul.py index 25bd867bbb5..289c8858508 100644 --- a/programming_examples/basic/vector_scalar_mul/vector_scalar_mul.py +++ b/programming_examples/basic/vector_scalar_mul/vector_scalar_mul.py @@ -10,7 +10,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 from aie.iron.controlflow import range_ @@ -87,7 +86,7 @@ def core_body(of_in, of_factor, of_out, scale_fn): rt.drain(of_out.cons(), C, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/basic/vector_vector_add/vector_vector_add.py b/programming_examples/basic/vector_vector_add/vector_vector_add.py index 72a482036ce..fd3fd667e77 100644 --- a/programming_examples/basic/vector_vector_add/vector_vector_add.py +++ b/programming_examples/basic/vector_vector_add/vector_vector_add.py @@ -12,7 +12,6 @@ import aie.iron as iron from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -81,7 +80,7 @@ def core_body(of_in1, of_in2, of_out): rt.drain(of_out.cons(), C, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() def main(): diff --git a/programming_examples/basic/vector_vector_modulo/vector_vector_modulo.py b/programming_examples/basic/vector_vector_modulo/vector_vector_modulo.py index ce8f364afe3..80c9348ccb4 100644 --- a/programming_examples/basic/vector_vector_modulo/vector_vector_modulo.py +++ b/programming_examples/basic/vector_vector_modulo/vector_vector_modulo.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1, XCVC1902 from aie.iron.controlflow import range_ @@ -65,7 +64,7 @@ def core_body(of_in1, of_in2, of_out): rt.drain(of_out.cons(), C, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() module = my_vector_mod() diff --git a/programming_examples/basic/vector_vector_mul/vector_vector_mul.py b/programming_examples/basic/vector_vector_mul/vector_vector_mul.py index 993deb7cbec..21035456ca2 100644 --- a/programming_examples/basic/vector_vector_mul/vector_vector_mul.py +++ b/programming_examples/basic/vector_vector_mul/vector_vector_mul.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1, XCVC1902 from aie.iron.controlflow import range_ @@ -65,7 +64,7 @@ def core_body(of_in1, of_in2, of_out): rt.drain(of_out.cons(), C, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() module = my_vector_mul() diff --git a/programming_examples/getting_started/00_memcpy/memcpy.py b/programming_examples/getting_started/00_memcpy/memcpy.py index 756ae5900c7..e8fc4c64d67 100644 --- a/programming_examples/getting_started/00_memcpy/memcpy.py +++ b/programming_examples/getting_started/00_memcpy/memcpy.py @@ -13,7 +13,6 @@ import aie.iron as iron from aie.iron import ExternalFunction, jit from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.helpers.taplib.tap import TensorAccessPattern from aie.utils.config import cxx_header_path @@ -168,7 +167,7 @@ def core_fn(of_in, of_out, passThroughLine): # -------------------------------------------------------------------------- my_program = Program(device, rt) - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_examples/getting_started/01_SAXPY/saxpy.py b/programming_examples/getting_started/01_SAXPY/saxpy.py index 23ef83a3e8e..f4aa1c9a25a 100644 --- a/programming_examples/getting_started/01_SAXPY/saxpy.py +++ b/programming_examples/getting_started/01_SAXPY/saxpy.py @@ -12,7 +12,6 @@ import aie.iron as iron from aie.iron import ExternalFunction, jit from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessPattern, TensorTiler2D from aie.utils.config import cxx_header_path @@ -82,7 +81,7 @@ def core_body(of_x, of_y, of_z, saxpy_kernel): # -------------------------------------------------------------------------- my_program = Program(iron.get_current_device(), rt) - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_examples/getting_started/02_vector_reduce_max/vector_reduce_max_1col.py b/programming_examples/getting_started/02_vector_reduce_max/vector_reduce_max_1col.py index ee3e330e962..20b4e73b56c 100644 --- a/programming_examples/getting_started/02_vector_reduce_max/vector_reduce_max_1col.py +++ b/programming_examples/getting_started/02_vector_reduce_max/vector_reduce_max_1col.py @@ -12,7 +12,6 @@ import aie.iron as iron from aie.iron import ExternalFunction from aie.iron import ObjectFifo, Program, Runtime, Worker, Buffer -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ from aie.helpers.util import np_ndarray_type_get_shape from aie.helpers.dialects.scf import if_, else_ @@ -169,7 +168,7 @@ def core_body(of_in, of_out, in0, reduce_fn, nextC_buffer, tmp_buffer): # -------------------------------------------------------------------------- my_program = Program(iron.get_current_device(), rt) - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_examples/getting_started/03_matrix_multiplication_single_core/matrix_multiplication_single_core.py b/programming_examples/getting_started/03_matrix_multiplication_single_core/matrix_multiplication_single_core.py index c8b212688b7..f3049907628 100644 --- a/programming_examples/getting_started/03_matrix_multiplication_single_core/matrix_multiplication_single_core.py +++ b/programming_examples/getting_started/03_matrix_multiplication_single_core/matrix_multiplication_single_core.py @@ -11,7 +11,6 @@ import aie.iron as iron from aie.iron import ExternalFunction, jit from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessPattern, TensorTiler2D from aie.utils.config import cxx_header_path @@ -144,7 +143,7 @@ def core_fn(of_a, of_b, of_c, matmul): # -------------------------------------------------------------------------- my_program = Program(iron.get_current_device(), rt) - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion.py b/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion.py index 49febe84680..24fa77db37e 100644 --- a/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion.py +++ b/programming_examples/ml/block_datatypes/bfp_conversion/bfp_conversion.py @@ -13,7 +13,6 @@ from aie.iron import ObjectFifo, Program, Runtime, Worker from aie.iron.kernel import Kernel -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2 from aie.iron.controlflow import range_ @@ -121,7 +120,7 @@ def multiplication_core( rt.fill(of_in2.prod(), B) rt.drain(of_out.cons(), C, wait=True) - return Program(NPU2(), rt).resolve_program(SequentialPlacer()) + return Program(NPU2(), rt).resolve_program() module = bfp_conversion() diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/in_core_shuffle/single_core.py b/programming_examples/ml/block_datatypes/matrix_multiplication/in_core_shuffle/single_core.py index 8fcb7be28d7..48ec7f58203 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/in_core_shuffle/single_core.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/in_core_shuffle/single_core.py @@ -8,7 +8,6 @@ import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2 from aie.dialects.aiex import v8bfp16ebs8 @@ -75,7 +74,7 @@ def core_fn(of_a, of_c, scalar_shuffle_kernel): dev_ty = NPU2() my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core/single_core.py b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core/single_core.py index 3af0f3ed8b7..80b451fe1a0 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core/single_core.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core/single_core.py @@ -12,7 +12,6 @@ from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker from aie.iron.controlflow import range_ from aie.iron.device import NPU2 -from aie.iron.placers import SequentialPlacer def ceildiv(a, b): @@ -134,7 +133,7 @@ def core_fn(of_a, of_b, of_c, zero, matmul): dev_ty = NPU2() my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_mixed/single_core.py b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_mixed/single_core.py index 76832d826e8..20b63d70249 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_mixed/single_core.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_mixed/single_core.py @@ -13,7 +13,6 @@ from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker from aie.iron.controlflow import range_ from aie.iron.device import NPU2 -from aie.iron.placers import SequentialPlacer def ceildiv(a, b): @@ -136,7 +135,7 @@ def core_fn(of_a, of_b, of_c, zero, matmul): dev_ty = NPU2() my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling/single_core.py b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling/single_core.py index 1f847f8ebce..9b97afab313 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling/single_core.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling/single_core.py @@ -7,7 +7,6 @@ import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2 from aie.dialects.aiex import v8bfp16ebs8 @@ -76,7 +75,7 @@ def core_fn(of_a, of_b, of_c, zero, matmul): dev_ty = NPU2() my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling_mixed/single_core.py b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling_mixed/single_core.py index a936e595876..d48679d45fd 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling_mixed/single_core.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/single_core_no_tiling_mixed/single_core.py @@ -8,7 +8,6 @@ import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2 from aie.dialects.aiex import v8bfp16ebs8 @@ -83,7 +82,7 @@ def core_fn(of_a, of_b, of_c, zero, matmul): dev_ty = NPU2() my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array/whole_array.py b/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array/whole_array.py index d82d2ab0f11..eb1b64743b2 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array/whole_array.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array/whole_array.py @@ -8,7 +8,6 @@ import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2, Tile from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -262,7 +261,7 @@ def core_fn(in_a, in_b, out_c, zero, matmul): my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_mixed/whole_array.py b/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_mixed/whole_array.py index e1a104d432e..b9f1dd820bf 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_mixed/whole_array.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_mixed/whole_array.py @@ -14,7 +14,6 @@ from aie.iron.controlflow import range_ from aie.iron.device import NPU2 from aie.iron.device.tile import Tile -from aie.iron.placers import SequentialPlacer def ceildiv(a, b): @@ -278,7 +277,7 @@ def core_fn(in_a, in_b, out_c, zero, matmul): dev_ty = NPU2() my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_shuffle/whole_array.py b/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_shuffle/whole_array.py index 1d8c0e87df9..05a5b3ba49b 100644 --- a/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_shuffle/whole_array.py +++ b/programming_examples/ml/block_datatypes/matrix_multiplication/whole_array_shuffle/whole_array.py @@ -8,7 +8,6 @@ import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker, Buffer -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2, Tile from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -276,7 +275,7 @@ def core_fn(in_a, in_b, out_c, zero, matmul, shuffle, buffer_a): my_program = Program(dev_ty, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module diff --git a/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough.py b/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough.py index bf577d054b4..ed38d47b4e9 100644 --- a/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough.py +++ b/programming_examples/ml/block_datatypes/vector_passthrough/vector_passthrough.py @@ -12,7 +12,6 @@ from aie.iron import ObjectFifo, Program, Runtime, Worker from aie.iron.kernel import Kernel -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2 from aie.iron.controlflow import range_ @@ -64,7 +63,7 @@ def core(of_in, of_out, passthrough_kernel): rt.drain(of_out.cons(), B, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(NPU2(), rt).resolve_program(SequentialPlacer()) + return Program(NPU2(), rt).resolve_program() module = bfp_passthrough() diff --git a/programming_examples/ml/bottleneck/bottleneck.py b/programming_examples/ml/bottleneck/bottleneck.py index 9642b196799..14cc335c3fe 100644 --- a/programming_examples/ml/bottleneck/bottleneck.py +++ b/programming_examples/ml/bottleneck/bottleneck.py @@ -16,7 +16,6 @@ Worker, WorkerRuntimeBarrier, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import AnyMemTile, NPU1Col1, NPU2, Tile from aie.iron.controlflow import range_ @@ -380,7 +379,7 @@ def runtime_ops(p2, p4): rt.drain(outOFL2L3.cons(), outputToL3, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() module = bottleneck4AIEs() diff --git a/programming_examples/ml/conv2d/conv2d.py b/programming_examples/ml/conv2d/conv2d.py index 47bb39c08db..7a024a6579a 100644 --- a/programming_examples/ml/conv2d/conv2d.py +++ b/programming_examples/ml/conv2d/conv2d.py @@ -16,7 +16,6 @@ Worker, WorkerRuntimeBarrier, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -140,7 +139,7 @@ def set_rtps(my_rtp): rt.drain(of_outOFL2L3.cons(), O, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/ml/conv2d_14x14/conv2dk14.py b/programming_examples/ml/conv2d_14x14/conv2dk14.py index 585c51ed687..42a564e9312 100644 --- a/programming_examples/ml/conv2d_14x14/conv2dk14.py +++ b/programming_examples/ml/conv2d_14x14/conv2dk14.py @@ -16,7 +16,6 @@ Worker, WorkerRuntimeBarrier, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -190,7 +189,7 @@ def set_rtps(my_rtp): rt.drain(of_outOFL2L3.cons(), O, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu.py b/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu.py index 77e935ba114..0f0021df0e7 100644 --- a/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu.py +++ b/programming_examples/ml/conv2d_fused_relu/conv2d_fused_relu.py @@ -16,7 +16,6 @@ Worker, WorkerRuntimeBarrier, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -139,7 +138,7 @@ def set_rtps(my_rtp): rt.drain(of_outOFL2L3.cons(), O, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/ml/eltwise_add/eltwise_add.py b/programming_examples/ml/eltwise_add/eltwise_add.py index 0fd4b0963a8..c859cb86abf 100644 --- a/programming_examples/ml/eltwise_add/eltwise_add.py +++ b/programming_examples/ml/eltwise_add/eltwise_add.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern from aie.iron.controlflow import range_ @@ -115,7 +114,7 @@ def core_body(of_in1, of_in2, of_out, eltwise_add): rt.finish_task_group(tg) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/ml/eltwise_mul/eltwise_mul.py b/programming_examples/ml/eltwise_mul/eltwise_mul.py index 127d0068666..a6fda7470d2 100644 --- a/programming_examples/ml/eltwise_mul/eltwise_mul.py +++ b/programming_examples/ml/eltwise_mul/eltwise_mul.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern from aie.iron.controlflow import range_ @@ -120,7 +119,7 @@ def core_body(of_in1, of_in2, of_out, eltwise_mul): rt.finish_task_group(tg) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/ml/gelu/gelu.py b/programming_examples/ml/gelu/gelu.py index 9d8059a9817..9d22d458f27 100644 --- a/programming_examples/ml/gelu/gelu.py +++ b/programming_examples/ml/gelu/gelu.py @@ -13,7 +13,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import Tile, NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern @@ -116,7 +115,7 @@ def core_fn(of_in, of_out, geluLine): rt.finish_task_group(tg) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/ml/layernorm/layernorm.py b/programming_examples/ml/layernorm/layernorm.py index bd1d9e956cd..3b58b0d9a43 100644 --- a/programming_examples/ml/layernorm/layernorm.py +++ b/programming_examples/ml/layernorm/layernorm.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern from aie.iron.controlflow import range_ @@ -92,7 +91,7 @@ def core_body(of_in, of_out, layer_norm_kernel): rt.fill(of_in[i].prod(), a_in, taps_in[i]) for i in range(n_cores): rt.drain(of_out[i].cons(), c_out, taps_out[i], wait=True) - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/ml/relu/relu.py b/programming_examples/ml/relu/relu.py index 9926e859205..8b869235988 100644 --- a/programming_examples/ml/relu/relu.py +++ b/programming_examples/ml/relu/relu.py @@ -13,7 +13,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import Tile, NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern @@ -116,7 +115,7 @@ def core_fn(of_in, of_out, reluLine): rt.finish_task_group(tg) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/ml/resnet/layers_conv2_x/resnet.py b/programming_examples/ml/resnet/layers_conv2_x/resnet.py index a2cfe658bc2..5ebbbaaa130 100644 --- a/programming_examples/ml/resnet/layers_conv2_x/resnet.py +++ b/programming_examples/ml/resnet/layers_conv2_x/resnet.py @@ -8,7 +8,6 @@ import sys from aie.iron import Buffer, Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col3, NPU2, Tile from aie.iron.controlflow import range_ from aie.helpers.util import np_ndarray_type_get_shape @@ -591,7 +590,7 @@ def set_rtps(rtp): rt.drain(outOFL2L3.cons(), outputToL3, placement=Tile(1, 0), wait=True) # Place components (assign them resources on the device) and generate an MLIR module -module = Program(dev, rt).resolve_program(SequentialPlacer()) +module = Program(dev, rt).resolve_program() # Print the generated MLIR print(module) diff --git a/programming_examples/ml/rmsnorm/rmsnorm.py b/programming_examples/ml/rmsnorm/rmsnorm.py index 42772bfa769..4629ec74595 100644 --- a/programming_examples/ml/rmsnorm/rmsnorm.py +++ b/programming_examples/ml/rmsnorm/rmsnorm.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern from aie.iron.controlflow import range_ @@ -92,7 +91,7 @@ def core_body(of_in, of_out, rms_norm_kernel): rt.fill(of_in[i].prod(), a_in, taps_in[i]) for i in range(n_cores): rt.drain(of_out[i].cons(), c_out, taps_out[i], wait=True) - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/ml/rope/rope.py b/programming_examples/ml/rope/rope.py index ed14aec788e..51ef4a17905 100644 --- a/programming_examples/ml/rope/rope.py +++ b/programming_examples/ml/rope/rope.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern from aie.iron.controlflow import range_ @@ -96,7 +95,7 @@ def core_body(of_in, of_lut, of_out, rope_kernel): rt.fill(of_lut[i].prod(), b_in, taps_in[i]) for i in range(n_cores): rt.drain(of_out[i].cons(), c_out, taps_out[i], wait=True) - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/ml/scale_shift/scale_shift.py b/programming_examples/ml/scale_shift/scale_shift.py index 37adad8a4f2..05ee61d2441 100644 --- a/programming_examples/ml/scale_shift/scale_shift.py +++ b/programming_examples/ml/scale_shift/scale_shift.py @@ -21,7 +21,6 @@ Worker, WorkerRuntimeBarrier, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ from aie.helpers.util import np_ndarray_type_get_shape @@ -225,7 +224,7 @@ def set_rtps(*args): rt.finish_task_group(tg2) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/ml/silu/silu.py b/programming_examples/ml/silu/silu.py index add5520d5fd..8ac8de55db4 100644 --- a/programming_examples/ml/silu/silu.py +++ b/programming_examples/ml/silu/silu.py @@ -13,7 +13,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import Tile, NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern @@ -116,7 +115,7 @@ def core_fn(of_in, of_out, siluLine): rt.finish_task_group(tg) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/ml/softmax/softmax.py b/programming_examples/ml/softmax/softmax.py index 61ccd37e5ab..24db7c96fe0 100644 --- a/programming_examples/ml/softmax/softmax.py +++ b/programming_examples/ml/softmax/softmax.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -88,7 +87,7 @@ def core_fn(of_in, of_out, softmax_kernel): rt.drain(outC.cons(), C, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/ml/swiglu/swiglu.py b/programming_examples/ml/swiglu/swiglu.py index fe45f10b314..acc3d64be15 100644 --- a/programming_examples/ml/swiglu/swiglu.py +++ b/programming_examples/ml/swiglu/swiglu.py @@ -13,7 +13,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import Tile, NPU1, NPU2 from aie.helpers.taplib.tap import TensorAccessPattern @@ -128,7 +127,7 @@ def core_fn(of_in, of_wts, of_out, swigluLine): rt.finish_task_group(tg) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() p = argparse.ArgumentParser() diff --git a/programming_examples/vision/color_detect/color_detect.py b/programming_examples/vision/color_detect/color_detect.py index f98cd8b59c6..75389037671 100644 --- a/programming_examples/vision/color_detect/color_detect.py +++ b/programming_examples/vision/color_detect/color_detect.py @@ -8,7 +8,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 @@ -205,7 +204,7 @@ def or_gray2rgba_and_fn( rt.drain(outOF_L2L3.cons(), O, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/vision/color_threshold/color_threshold.py b/programming_examples/vision/color_threshold/color_threshold.py index 3624b6263db..d1fa133a14b 100644 --- a/programming_examples/vision/color_threshold/color_threshold.py +++ b/programming_examples/vision/color_threshold/color_threshold.py @@ -16,7 +16,6 @@ Worker, WorkerRuntimeBarrier, ) -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 from aie.extras.dialects import arith @@ -138,7 +137,7 @@ def set_rtps(*args): rt.drain(outOOB_L2L3.cons(), outTensor, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/vision/edge_detect/edge_detect.py b/programming_examples/vision/edge_detect/edge_detect.py index d6e2154c4a8..cbe50703a6f 100644 --- a/programming_examples/vision/edge_detect/edge_detect.py +++ b/programming_examples/vision/edge_detect/edge_detect.py @@ -8,7 +8,6 @@ import sys from aie.iron import Buffer, Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 from aie.iron.controlflow import range_ @@ -258,7 +257,7 @@ def gray2rgba_addWeight_fn( rt.drain(outOF_L2L3.cons(), O, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_examples/vision/vision_passthrough/vision_passthrough.py b/programming_examples/vision/vision_passthrough/vision_passthrough.py index 6e559e55c9b..7d5d028570c 100644 --- a/programming_examples/vision/vision_passthrough/vision_passthrough.py +++ b/programming_examples/vision/vision_passthrough/vision_passthrough.py @@ -8,7 +8,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 @@ -50,7 +49,7 @@ def passthrough_fn(of_in, of_out, passThroughLine): rt.drain(of_out.cons(), outTensor, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() try: diff --git a/programming_guide/mini_tutorial/aie2p.py b/programming_guide/mini_tutorial/aie2p.py index d038edbbe17..7ba29ff68d3 100644 --- a/programming_guide/mini_tutorial/aie2p.py +++ b/programming_guide/mini_tutorial/aie2p.py @@ -10,7 +10,6 @@ import sys from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -80,7 +79,7 @@ def core_fn(of_in, of_out): # information to be lowered to its MLIR equivalent. # At this point, the program is also verified and will report underlying MLIR errors, if any. # You can see a list of available placers in python/iron/placers.py - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_1/answer_2.py b/programming_guide/mini_tutorial/exercise_1/answer_2.py index 39168057b5e..b02db2095e6 100644 --- a/programming_guide/mini_tutorial/exercise_1/answer_2.py +++ b/programming_guide/mini_tutorial/exercise_1/answer_2.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -49,7 +48,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_1/answer_3.py b/programming_guide/mini_tutorial/exercise_1/answer_3.py index 6057552c927..082c6f5a4a5 100644 --- a/programming_guide/mini_tutorial/exercise_1/answer_3.py +++ b/programming_guide/mini_tutorial/exercise_1/answer_3.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, ObjectFifo -from aie.iron.placers import SequentialPlacer import aie.iron as iron @@ -35,7 +34,7 @@ def exercise_1(input0, output): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_1/answer_4.py b/programming_guide/mini_tutorial/exercise_1/answer_4.py index f850002e2b6..5443eff2b08 100644 --- a/programming_guide/mini_tutorial/exercise_1/answer_4.py +++ b/programming_guide/mini_tutorial/exercise_1/answer_4.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -50,7 +49,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_1/answer_5.py b/programming_guide/mini_tutorial/exercise_1/answer_5.py index e9e52438f96..ea337ff39d9 100644 --- a/programming_guide/mini_tutorial/exercise_1/answer_5.py +++ b/programming_guide/mini_tutorial/exercise_1/answer_5.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -51,7 +50,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_1/exercise_1.py b/programming_guide/mini_tutorial/exercise_1/exercise_1.py index 9300c60d5ae..766dec47733 100644 --- a/programming_guide/mini_tutorial/exercise_1/exercise_1.py +++ b/programming_guide/mini_tutorial/exercise_1/exercise_1.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo, Buffer -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -51,7 +50,7 @@ def core_fn(of_out, buff): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_2/answer_1.py b/programming_guide/mini_tutorial/exercise_2/answer_1.py index 32e4bbc0c98..8139ee97456 100644 --- a/programming_guide/mini_tutorial/exercise_2/answer_1.py +++ b/programming_guide/mini_tutorial/exercise_2/answer_1.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -77,7 +76,7 @@ def core_fn(of_in, of_out, num_elem): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_2/answer_2.py b/programming_guide/mini_tutorial/exercise_2/answer_2.py index 9d6edda88cd..a4d086ea9d2 100644 --- a/programming_guide/mini_tutorial/exercise_2/answer_2.py +++ b/programming_guide/mini_tutorial/exercise_2/answer_2.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -79,7 +78,7 @@ def core_fn(of_in, of_out, num_elem): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_2/exercise_2.py b/programming_guide/mini_tutorial/exercise_2/exercise_2.py index 982cc75cc74..3ad06aec962 100644 --- a/programming_guide/mini_tutorial/exercise_2/exercise_2.py +++ b/programming_guide/mini_tutorial/exercise_2/exercise_2.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -49,7 +48,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_3/answer_2.py b/programming_guide/mini_tutorial/exercise_3/answer_2.py index 9d349e72ed2..05aa08bc8f1 100644 --- a/programming_guide/mini_tutorial/exercise_3/answer_2.py +++ b/programming_guide/mini_tutorial/exercise_3/answer_2.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -53,7 +52,7 @@ def core_fn(of_in_A, of_in_B, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_3/exercise_3.py b/programming_guide/mini_tutorial/exercise_3/exercise_3.py index b49364ddb24..f541cb4215a 100644 --- a/programming_guide/mini_tutorial/exercise_3/exercise_3.py +++ b/programming_guide/mini_tutorial/exercise_3/exercise_3.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -49,7 +48,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_4/answer_2.py b/programming_guide/mini_tutorial/exercise_4/answer_2.py index 67a58bec7cd..08e71509339 100644 --- a/programming_guide/mini_tutorial/exercise_4/answer_2.py +++ b/programming_guide/mini_tutorial/exercise_4/answer_2.py @@ -17,7 +17,6 @@ Buffer, WorkerRuntimeBarrier, ) -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -73,7 +72,7 @@ def set_rtps(*args): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_4/exercise_4.py b/programming_guide/mini_tutorial/exercise_4/exercise_4.py index ea8432e50e4..30dca994a04 100644 --- a/programming_guide/mini_tutorial/exercise_4/exercise_4.py +++ b/programming_guide/mini_tutorial/exercise_4/exercise_4.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo, Buffer -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -62,7 +61,7 @@ def set_rtps(*args): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_1.py b/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_1.py index 96f2d9b52c3..0982b9c159a 100644 --- a/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_1.py +++ b/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_1.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessPattern @@ -63,7 +62,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_2.py b/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_2.py index ea0f7094e4e..cffc82ddbd8 100644 --- a/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_2.py +++ b/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_2.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorTiler2D @@ -62,7 +61,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_3.py b/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_3.py index 09bfdb9fc2c..99298303a07 100644 --- a/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_3.py +++ b/programming_guide/mini_tutorial/exercise_5/exercise_5a/answer_3.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.iron as iron @@ -51,7 +50,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_5/exercise_5a/exercise_5a.py b/programming_guide/mini_tutorial/exercise_5/exercise_5a/exercise_5a.py index bbe8d92d539..fbdccde30db 100644 --- a/programming_guide/mini_tutorial/exercise_5/exercise_5a/exercise_5a.py +++ b/programming_guide/mini_tutorial/exercise_5/exercise_5a/exercise_5a.py @@ -10,7 +10,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessPattern @@ -57,7 +56,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/mini_tutorial/exercise_5/exercise_5b/exercise_5b.py b/programming_guide/mini_tutorial/exercise_5/exercise_5b/exercise_5b.py index df38d4967fb..0d0ce14afe4 100644 --- a/programming_guide/mini_tutorial/exercise_5/exercise_5b/exercise_5b.py +++ b/programming_guide/mini_tutorial/exercise_5/exercise_5b/exercise_5b.py @@ -12,7 +12,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, ObjectFifo -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ from aie.helpers.taplib import TensorAccessPattern, TensorAccessSequence @@ -80,7 +79,7 @@ def core_fn(of_in, of_out): my_program = Program(iron.get_current_device(), rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() def main(): diff --git a/programming_guide/section-1/aie2.py b/programming_guide/section-1/aie2.py index bd3cc35dd69..ee4e8972c02 100644 --- a/programming_guide/section-1/aie2.py +++ b/programming_guide/section-1/aie2.py @@ -9,7 +9,6 @@ import numpy as np from aie.iron import Program, Runtime, Worker, Buffer -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, Tile from aie.iron.controlflow import range_ @@ -40,7 +39,7 @@ def core_fn(buff): my_program = Program(NPU1Col1(), rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2c/from_stream_transformations/from_stream.py b/programming_guide/section-2/section-2c/from_stream_transformations/from_stream.py index 441f515c72d..fe66783dea0 100644 --- a/programming_guide/section-2/section-2c/from_stream_transformations/from_stream.py +++ b/programming_guide/section-2/section-2c/from_stream_transformations/from_stream.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -60,7 +59,7 @@ def core_fn(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2c/to_stream_transformations/to_stream.py b/programming_guide/section-2/section-2c/to_stream_transformations/to_stream.py index 78c946ef049..0aef456beea 100644 --- a/programming_guide/section-2/section-2c/to_stream_transformations/to_stream.py +++ b/programming_guide/section-2/section-2c/to_stream_transformations/to_stream.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -58,7 +57,7 @@ def core_fn(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2e/aie2.py b/programming_guide/section-2/section-2e/aie2.py index cc0c99050c0..455a3cca3ca 100644 --- a/programming_guide/section-2/section-2e/aie2.py +++ b/programming_guide/section-2/section-2e/aie2.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -59,7 +58,7 @@ def core_fn(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2e/aie2_multi.py b/programming_guide/section-2/section-2e/aie2_multi.py index 58c0b302506..3ad7eee016d 100644 --- a/programming_guide/section-2/section-2e/aie2_multi.py +++ b/programming_guide/section-2/section-2e/aie2_multi.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -82,7 +81,7 @@ def core_fn(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2f/01_single_double_buffer/single_buffer.py b/programming_guide/section-2/section-2f/01_single_double_buffer/single_buffer.py index 448af3e44c8..3da2634c783 100644 --- a/programming_guide/section-2/section-2f/01_single_double_buffer/single_buffer.py +++ b/programming_guide/section-2/section-2f/01_single_double_buffer/single_buffer.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -61,7 +60,7 @@ def core_fn2(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2f/02_external_mem_to_core/ext_to_core.py b/programming_guide/section-2/section-2f/02_external_mem_to_core/ext_to_core.py index 1f60e247dee..dad7bf65a20 100644 --- a/programming_guide/section-2/section-2f/02_external_mem_to_core/ext_to_core.py +++ b/programming_guide/section-2/section-2f/02_external_mem_to_core/ext_to_core.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -54,7 +53,7 @@ def core_fn(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2f/03_external_mem_to_core_L2/ext_to_core_L2.py b/programming_guide/section-2/section-2f/03_external_mem_to_core_L2/ext_to_core_L2.py index 2d7f91a4765..72895c92e2b 100644 --- a/programming_guide/section-2/section-2f/03_external_mem_to_core_L2/ext_to_core_L2.py +++ b/programming_guide/section-2/section-2f/03_external_mem_to_core_L2/ext_to_core_L2.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -60,7 +59,7 @@ def core_fn(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2f/04_distribute_L2/distribute_L2.py b/programming_guide/section-2/section-2f/04_distribute_L2/distribute_L2.py index 6d0d88d5908..eb302997afe 100644 --- a/programming_guide/section-2/section-2f/04_distribute_L2/distribute_L2.py +++ b/programming_guide/section-2/section-2f/04_distribute_L2/distribute_L2.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 if len(sys.argv) > 1: @@ -63,7 +62,7 @@ def core_fn(of_in): rt.fill(of_in.prod(), a_in) # Create the program from the device type and runtime - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() module = distribute_L2() diff --git a/programming_guide/section-2/section-2f/05_join_L2/distribute_and_join_L2.py b/programming_guide/section-2/section-2f/05_join_L2/distribute_and_join_L2.py index a493cbc478a..828cafceec4 100644 --- a/programming_guide/section-2/section-2f/05_join_L2/distribute_and_join_L2.py +++ b/programming_guide/section-2/section-2f/05_join_L2/distribute_and_join_L2.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -81,7 +80,7 @@ def core_fn(of_in, of_out): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-2/section-2f/05_join_L2/join_L2.py b/programming_guide/section-2/section-2f/05_join_L2/join_L2.py index 30453e1aa0a..b5f972cbed3 100644 --- a/programming_guide/section-2/section-2f/05_join_L2/join_L2.py +++ b/programming_guide/section-2/section-2f/05_join_L2/join_L2.py @@ -9,7 +9,6 @@ import sys from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 if len(sys.argv) > 1: @@ -63,7 +62,7 @@ def core_fn(of_out): rt.drain(of_out.cons(), c_out, wait=True) # Create the program from the device type and runtime - return Program(dev, rt).resolve_program(SequentialPlacer()) + return Program(dev, rt).resolve_program() module = join_L2() diff --git a/programming_guide/section-3/aie2.py b/programming_guide/section-3/aie2.py index 51e796d01b5..ba517e80c0c 100644 --- a/programming_guide/section-3/aie2.py +++ b/programming_guide/section-3/aie2.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -71,7 +70,7 @@ def core_fn(of_in, of_factor, of_out, scale_scalar): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-4/section-4a/aie2.py b/programming_guide/section-4/section-4a/aie2.py index 51e796d01b5..ba517e80c0c 100644 --- a/programming_guide/section-4/section-4a/aie2.py +++ b/programming_guide/section-4/section-4a/aie2.py @@ -9,7 +9,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2Col1 from aie.iron.controlflow import range_ @@ -71,7 +70,7 @@ def core_fn(of_in, of_factor, of_out, scale_scalar): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() # Print the generated MLIR print(module) diff --git a/programming_guide/section-4/section-4b/aie2.py b/programming_guide/section-4/section-4b/aie2.py index b5f5ff82718..5a53909424d 100644 --- a/programming_guide/section-4/section-4b/aie2.py +++ b/programming_guide/section-4/section-4b/aie2.py @@ -10,7 +10,6 @@ import sys from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1, NPU2 from aie.iron.controlflow import range_ @@ -75,7 +74,7 @@ def core_fn(of_in, of_factor, of_out, scale_scalar): my_program = Program(dev, rt) # Place components (assign them resources on the device) and generate an MLIR module - return my_program.resolve_program(SequentialPlacer()) + return my_program.resolve_program() # Parse module arguments diff --git a/python/dialects/aie.py b/python/dialects/aie.py index cf854771ca2..df4989dbdd5 100644 --- a/python/dialects/aie.py +++ b/python/dialects/aie.py @@ -1,4 +1,4 @@ -# Copyright (C) 2022, Advanced Micro Devices, Inc. +# Copyright (C) 2022-2026, Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from dataclasses import dataclass import inspect @@ -948,6 +948,32 @@ def tile(col, row, *, loc=None, ip=None, allocation_scheme=None): return TileOp(col=col, row=row, loc=loc, ip=ip, allocation_scheme=allocation_scheme) +def logical_tile( + tile_type, col=None, row=None, *, loc=None, ip=None, allocation_scheme=None +): + """Create a logical tile operation. + + Args: + tile_type: AIETileType enum value (CoreTile, MemTile, ShimNOCTile, ShimPLTile) + col: Optional column coordinate (None for unconstrained) + row: Optional row coordinate (None for unconstrained) + loc: Optional location + ip: Optional insertion point + allocation_scheme: Optional allocation scheme string + + Returns: + LogicalTileOp instance + """ + return LogicalTileOp( + tile_type=tile_type, + col=col, + row=row, + loc=loc, + ip=ip, + allocation_scheme=allocation_scheme, + ) + + # BDChainOp diff --git a/python/iron/buffer.py b/python/iron/buffer.py index 0eb74ef4918..32791d8fe9b 100644 --- a/python/iron/buffer.py +++ b/python/iron/buffer.py @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. import numpy as np from typing import Sequence @@ -14,12 +14,11 @@ np_ndarray_type_get_dtype, np_ndarray_type_get_shape, ) -from .device import PlacementTile +from .device import Tile from .resolvable import Resolvable, NotResolvedError -from .placeable import Placeable -class Buffer(Resolvable, Placeable): +class Buffer(Resolvable): """A buffer that is available both to Workers and to the Runtime for operations. This is often used for Runtime Parameters. """ @@ -32,7 +31,7 @@ def __init__( type: type[np.ndarray] | None = None, initial_value: np.ndarray | None = None, name: str | None = None, - placement: PlacementTile | None = None, + placement: Tile | None = None, use_write_rtp: bool = False, ): """A Buffer is a memory region declared at the top-level of the design, allowing it to @@ -42,7 +41,7 @@ def __init__( type (type[np.ndarray] | None, optional): The type of the buffer. Defaults to None. initial_value (np.ndarray | None, optional): An initial value to set the buffer to. Should be of same datatype and shape as the buffer. Defaults to None. name (str | None, optional): The name of the buffer. If none is given, a unique name will be generated. Defaults to None. - placement (PlacementTile | None, optional): A placement location for the buffer. Defaults to None. + placement (Tile | None, optional): A placement location for the buffer. Defaults to None. use_write_rtp (bool, optional): If use_write_rtp, write_rtp/read_rtp operations will be generated. Otherwise, traditional write/read operations will be used. Defaults to False. Raises: @@ -59,7 +58,7 @@ def __init__( if not self._name: self._name = f"buf_{self.__get_index()}" self._use_write_rtp = use_write_rtp - Placeable.__init__(self, placement) + self._tile = placement @classmethod def __get_index(cls) -> int: @@ -77,6 +76,11 @@ def dtype(self) -> np.dtype: """The per-element datatype of the buffer.""" return np_ndarray_type_get_dtype(self._obj_type) + @property + def tile(self) -> Tile | None: + """Return the tile of the buffer.""" + return self._tile + @property def op(self): if self._op is None: diff --git a/python/iron/dataflow/endpoint.py b/python/iron/dataflow/endpoint.py index 513b93de701..92d4ca45c5c 100644 --- a/python/iron/dataflow/endpoint.py +++ b/python/iron/dataflow/endpoint.py @@ -4,11 +4,22 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. -from ..placeable import Placeable +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. +from ..device import Tile -class ObjectFifoEndpoint(Placeable): +class ObjectFifoEndpoint: """The endpoint of an ObjectFifo. Each ObjectFifoHandle has one ObjectFifoEndpoint""" - pass + def __init__(self, tile: Tile | None): + """Initialize an ObjectFifoEndpoint. + + Args: + tile: Tile placement for this endpoint + """ + self._tile = tile + + @property + def tile(self) -> Tile | None: + """Return the tile of the endpoint.""" + return self._tile diff --git a/python/iron/dataflow/objectfifo.py b/python/iron/dataflow/objectfifo.py index 660b9a38710..2229f85864a 100644 --- a/python/iron/dataflow/objectfifo.py +++ b/python/iron/dataflow/objectfifo.py @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. from __future__ import annotations import numpy as np from typing import Sequence @@ -22,7 +22,7 @@ from ..resolvable import Resolvable, NotResolvedError from .endpoint import ObjectFifoEndpoint -from ..device import Device, PlacementTile, AnyMemTile, Tile +from ..device import Tile class ObjectFifo(Resolvable): @@ -204,7 +204,7 @@ def cons( ) return self._cons[-1] - def tiles(self, cons_only: bool = False) -> list[PlacementTile]: + def tiles(self, cons_only: bool = False) -> list[Tile]: """The list of placement tiles corresponding to the endpoints of all handles of this ObjectFifo Raises: @@ -212,7 +212,7 @@ def tiles(self, cons_only: bool = False) -> list[PlacementTile]: ValueError: At least one consumer handle must be constructed. Returns: - list[PlacementTile]: A list of tiles of the endpoints of this ObjectFifo. + list[Tile]: A list of tiles of the endpoints of this ObjectFifo. """ tiles = [] if not cons_only: @@ -226,14 +226,6 @@ def tiles(self, cons_only: bool = False) -> list[PlacementTile]: tiles += [cons.endpoint.tile for cons in self._cons] return tiles - def can_used_shared_mem(self, device: Device, cons_only: bool = False) -> bool: - """Checks if all endpoints of the object fifo have a legal memory affinity.""" - tiles = self.tiles(cons_only=cons_only) - for t in tiles: - if device.is_mem_accessible(t, tiles): - return True - return False - def _prod_tile_op(self) -> Tile: if self._prod == None: raise ValueError( @@ -480,7 +472,7 @@ def all_of_endpoints(self) -> list[ObjectFifoEndpoint]: def join( self, offsets: list[int], - placement: PlacementTile = AnyMemTile, + placement: Tile | None = None, depths: list[int] | None = None, obj_types: list[type[np.ndarray]] = None, names: list[str] | None = None, @@ -493,7 +485,7 @@ def join( Args: offsets (list[int]): Offsets into the current producer, each corresponding to a new consumer. - placement (PlacementTile, optional): The placement where the Join operation occurs. Defaults to AnyMemTile. + placement (Tile, optional): The placement where the Join operation occurs. Defaults to None (unconstrained MemTile). depths (list[int] | None, optional): The depth of each new ObjectFifo. Defaults to None. obj_types (list[type[np.ndarray]], optional): The type of the buffers corresponding to each new ObjectFifo. Defaults to None. names (list[str] | None, optional): The name of each new ObjectFifo. If not given, unique names will be generated. Defaults to None. @@ -562,7 +554,7 @@ def join( def split( self, offsets: list[int], - placement: PlacementTile = AnyMemTile, + placement: Tile | None = None, depths: list[int] | None = None, obj_types: list[type[np.ndarray]] = None, names: list[str] | None = None, @@ -575,7 +567,7 @@ def split( Args: offsets (list[int]): The offset into the current consumer for each new ObjectFifo producer. - placement (PlacementTile, optional): The placement tile where the Split operation takes place. Defaults to AnyMemTile. + placement (Tile, optional): The placement tile where the Split operation takes place. Defaults to None (unconstrained MemTile). depths (list[int] | None, optional): The depth of each new ObjectFifo. Defaults to None. obj_types (list[type[np.ndarray]], optional): The buffer type of each new ObjectFifo. Defaults to None. names (list[str] | None, optional): The name of each new ObjectFifo. If not given, a unique name will be generated. Defaults to None. @@ -642,7 +634,7 @@ def split( def forward( self, - placement: PlacementTile = AnyMemTile, + placement: Tile | None = None, obj_type: type[np.ndarray] | None = None, depth: int | None = None, name: str | None = None, @@ -654,7 +646,7 @@ def forward( is forwarded to the producer of a newly-constructed ObjectFifo. Args: - placement (PlacementTile, optional): The placement of the Forward operation. Defaults to AnyMemTile. + placement (Tile, optional): The placement of the Forward operation. Defaults to None (unconstrained MemTile). obj_type (type[np.ndarray] | None, optional): The object type of the new ObjectFifo. Defaults to None. depth (int | None, optional): The depth of the new ObjectFifo. Defaults to None. name (str | None, optional): The name of the new ObjectFifo. If None is given, a unique name will be generated. Defaults to None. @@ -710,7 +702,7 @@ def __init__( self, srcs: list[ObjectFifoHandle] | ObjectFifoHandle, dsts: list[ObjectFifoHandle] | ObjectFifoHandle, - placement: PlacementTile = AnyMemTile, + placement: Tile | None = None, src_offsets: list[int] = [], dst_offsets: list[int] = [], ): @@ -719,7 +711,7 @@ def __init__( Args: srcs (list[ObjectFifoHandle] | ObjectFifoHandle): A list of consumer ObjectFifoHandles to link. dsts (list[ObjectFifoHandle] | ObjectFifoHandle): A list of producer ObjectFifoHandles to link. - placement (PlacementTile, optional): The place the link occurs. Defaults to AnyMemTile. + placement (Tile, optional): The place the link occurs. Defaults to Memory Tile. src_offsets (list[int], optional): If many sources, one offset per source is required to split the destination. Defaults to []. dst_offsets (list[int], optional): If many destinations, one offset per destination is required to split the source. Defaults to []. @@ -753,6 +745,23 @@ def __init__( s.endpoint = self for d in self._dsts: d.endpoint = self + + # ObjectFifoLink allows Memory or Compute Tiles + if placement is None: + placement = Tile(tile_type=Tile.MEMORY) + else: + # Validate tile_type if user specified it + if placement.tile_type is not None: + if placement.tile_type not in [Tile.MEMORY, Tile.COMPUTE]: + raise ValueError( + f"ObjectFifoLink requires Tile.MEMORY or Tile.COMPUTE, " + f"got tile_type='{placement.tile_type}'" + ) + elif placement.col is None and placement.row is None: + # Unconstrained tile, default to MEMORY + placement.tile_type = Tile.MEMORY + # else: has coordinates, type will be inferred and validated in device resolve_tile() + ObjectFifoEndpoint.__init__(self, placement) def resolve( diff --git a/python/iron/device/__init__.py b/python/iron/device/__init__.py index 9d4efe00600..76e40aa9f59 100644 --- a/python/iron/device/__init__.py +++ b/python/iron/device/__init__.py @@ -14,4 +14,4 @@ NPU2Col7, XCVC1902, ) -from .tile import AnyShimTile, AnyMemTile, AnyComputeTile, PlacementTile, Tile +from .tile import Tile diff --git a/python/iron/device/device.py b/python/iron/device/device.py index 1ccacfb3094..2052dc7bc32 100644 --- a/python/iron/device/device.py +++ b/python/iron/device/device.py @@ -4,64 +4,25 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. +import re from typing import Generator from ... import ir # type: ignore from ...dialects._aie_enum_gen import WireBundle # type: ignore -from ...dialects.aie import AIEDevice, tile, TileOp, get_target_model # type: ignore +from ...dialects.aie import ( # type: ignore + AIEDevice, + AIETileType, + logical_tile, + LogicalTileOp, + get_target_model, +) from ..resolvable import Resolvable from .tile import Tile -import re - class Device(Resolvable): - """ - A base class for representations of a device of a specific type. - - Note: this class is abstract because it does not implement Resolve - """ - - class __DeviceTile(Resolvable): - """ - Interior class for tiles objects owned by a particular device. - This is needed to ensure we don't generate more than one MLIR operation corresponding - to the same logical tile within a device. - """ - - def __init__(self, col: int, row: int) -> None: - self._col: int = col - self._row: int = row - self._op: TileOp | None = None - super().__init__() - - def resolve( - self, - loc: ir.Location | None = None, - ip: ir.InsertionPoint | None = None, - allocation_scheme: str | None = None, - ) -> None: - if self._op == None: - self._op = tile( - self._col, - self._row, - loc=loc, - ip=ip, - allocation_scheme=allocation_scheme, - ) - - @property - def op(self) -> TileOp: - if not self._op: - raise ValueError("Cannot get operation before it is set.") - return self._op - - @op.setter - def op(self, op: TileOp): - if self._op: - raise ValueError("Cannot set operation more than once.") - self._op = op + """A base class for representations of a device of a specific type.""" def __init__(self, device: AIEDevice) -> None: """Initialize a representation of a device. @@ -70,20 +31,31 @@ def __init__(self, device: AIEDevice) -> None: device (AIEDevice): aie device """ self._device = device - self._tiles: list[list[Device.__DeviceTile]] = [] self._tm = get_target_model(device) - for c in range(self._tm.columns()): - self._tiles.append([]) - for r in range(self._tm.rows()): - self._tiles[c].append(Device.__DeviceTile(c, r)) + self._allocated_compute_tiles: set[tuple[int, int]] = set() - def tile_iterator(self) -> Generator[Tile, None, None]: - """ - Iterates over the device tiles deterministically + def _get_tile_type_from_coords(self, col: int, row: int) -> str: + """Query device target model for IRON tile type at coordinates. + + Returns: + Tile type string (Tile.COMPUTE, Tile.MEMORY, or Tile.SHIM) """ + if self._tm.is_core_tile(col, row): + return Tile.COMPUTE + elif self._tm.is_mem_tile(col, row): + return Tile.MEMORY + elif self._tm.is_shim_noc_tile(col, row): + return Tile.SHIM + elif self._tm.is_shim_pl_tile(col, row): + return Tile.SHIM + else: + raise ValueError(f"Unknown tile type for coordinates ({col}, {row})") + + def tile_iterator(self) -> Generator[Tile, None, None]: + """Iterates over the device tiles deterministically.""" for c in range(self._tm.columns()): for r in range(self._tm.rows()): - yield self._tiles[c][r] + yield Tile(c, r) return None @property @@ -101,9 +73,10 @@ def get_shim_tiles(self) -> list[Tile]: list[Tile]: A list of shim tiles. """ return [ - Tile(t._col, t._row) - for t in self.tile_iterator() - if self._tm.is_shim_noc_or_pl_tile(t._col, t._row) + Tile(c, r) + for c in range(self._tm.columns()) + for r in range(self._tm.rows()) + if self._tm.is_shim_noc_or_pl_tile(c, r) ] def get_mem_tiles(self) -> list[Tile]: @@ -113,9 +86,10 @@ def get_mem_tiles(self) -> list[Tile]: list[Tile]: A list of mem tiles. """ return [ - Tile(t._col, t._row) - for t in self.tile_iterator() - if self._tm.is_mem_tile(t._col, t._row) + Tile(c, r) + for c in range(self._tm.columns()) + for r in range(self._tm.rows()) + if self._tm.is_mem_tile(c, r) ] def get_compute_tiles(self) -> list[Tile]: @@ -125,9 +99,10 @@ def get_compute_tiles(self) -> list[Tile]: list[Tile]: A list of compute tiles. """ return [ - Tile(t._col, t._row) - for t in self.tile_iterator() - if self._tm.is_core_tile(t._col, t._row) + Tile(c, r) + for c in range(self._tm.columns()) + for r in range(self._tm.rows()) + if self._tm.is_core_tile(c, r) ] def get_num_source_switchbox_connections(self, t: Tile) -> int: @@ -136,10 +111,13 @@ def get_num_source_switchbox_connections(self, t: Tile) -> int: Returns: int: Number of DMA source ports. """ - col = t.col - row = t.row + if t.col is None or t.row is None: + raise ValueError( + f"get_num_source_switchbox_connections requires Tile with concrete coordinates, " + f"got Tile(col={t.col}, row={t.row})" + ) bundle = WireBundle.DMA - return self._tm.get_num_source_switchbox_connections(col, row, bundle) + return self._tm.get_num_source_switchbox_connections(t.col, t.row, bundle) def get_num_dest_switchbox_connections(self, t: Tile) -> int: """Returns number of DMA dest ports in the switchbox for the given tile on the device. @@ -147,10 +125,13 @@ def get_num_dest_switchbox_connections(self, t: Tile) -> int: Returns: int: Number of DMA dest ports. """ - col = t.col - row = t.row + if t.col is None or t.row is None: + raise ValueError( + f"get_num_dest_switchbox_connections requires Tile with concrete coordinates, " + f"got Tile(col={t.col}, row={t.row})" + ) bundle = WireBundle.DMA - return self._tm.get_num_dest_switchbox_connections(col, row, bundle) + return self._tm.get_num_dest_switchbox_connections(t.col, t.row, bundle) def get_num_source_shim_mux_connections(self, t: Tile) -> int: """Returns number of DMA source ports in the shim mux for the given tile on the device. @@ -158,10 +139,13 @@ def get_num_source_shim_mux_connections(self, t: Tile) -> int: Returns: int: Number of DMA source ports. """ - col = t.col - row = t.row + if t.col is None or t.row is None: + raise ValueError( + f"get_num_source_shim_mux_connections requires Tile with concrete coordinates, " + f"got Tile(col={t.col}, row={t.row})" + ) bundle = WireBundle.DMA - return self._tm.get_num_source_shim_mux_connections(col, row, bundle) + return self._tm.get_num_source_shim_mux_connections(t.col, t.row, bundle) def get_num_dest_shim_mux_connections(self, t: Tile) -> int: """Returns number of DMA dest ports in the shim mux for the given tile on the device. @@ -169,16 +153,25 @@ def get_num_dest_shim_mux_connections(self, t: Tile) -> int: Returns: int: Number of DMA dest ports. """ - col = t.col - row = t.row + if t.col is None or t.row is None: + raise ValueError( + f"get_num_dest_shim_mux_connections requires Tile with concrete coordinates, " + f"got Tile(col={t.col}, row={t.row})" + ) bundle = WireBundle.DMA - return self._tm.get_num_dest_shim_mux_connections(col, row, bundle) + return self._tm.get_num_dest_shim_mux_connections(t.col, t.row, bundle) def get_num_connections(self, tile: Tile, output: bool) -> int: """Returns number of DMA input or output "channels" available on the tile. + Returns: int: Number of connections (channels) available on the tile """ + if tile.col is None or tile.row is None: + raise ValueError( + f"get_num_connections requires Tile with concrete coordinates, " + f"got Tile(col={tile.col}, row={tile.row})" + ) if tile.row == 0: if output: return self.get_num_source_shim_mux_connections(tile) @@ -189,57 +182,102 @@ def get_num_connections(self, tile: Tile, output: bool) -> int: else: return self.get_num_dest_switchbox_connections(tile) - def is_mem_accessible(self, source_tile: Tile, tiles: list[Tile]) -> bool: - """Returns whether there exists a memory region on source_tile which all destination tiles can access. + def resolve_tile( + self, + placement_tile: Tile, + loc: ir.Location | None = None, + ip: ir.InsertionPoint | None = None, + ) -> LogicalTileOp: + """Resolve a Tile to a LogicalTileOp. + + Tile can be fully constrained, partially constrained, or unconstrained. + Tile type is inferred from coordinates if not explicitly specified. + + If the tile already has a LogicalTileOp stored, it is returned directly + without creating a new one. This ensures tile reuse. + + Args: + placement_tile: A Tile object (possibly with partial/no coordinates) + loc: MLIR location + ip: Insertion point + Returns: - int: Number of connections (channels) available on the tile + LogicalTileOp: The created (or existing) logical tile operation """ - if not isinstance(source_tile, Tile): - raise ValueError(f"Expected a source Tile, but got {source_tile}") - for t in tiles: - if not isinstance(t, Tile): - raise ValueError(f"Expected a Tile, but got {t}") - if not tiles: - return True - - source_is_compute = self._tm.is_core_tile(source_tile.col, source_tile.row) - source_is_mem = self._tm.is_mem_tile(source_tile.col, source_tile.row) - source_is_shim = self._tm.is_shim_noc_or_pl_tile( - source_tile.col, source_tile.row - ) + if not isinstance(placement_tile, Tile): + raise ValueError( + f"resolve_tile expects Tile object, got {type(placement_tile)}" + ) + + # If tile already has an op, return it (tile reuse) + if placement_tile._op is not None: + return placement_tile._op + + iron_to_mlir = { + Tile.COMPUTE: AIETileType.CoreTile, + Tile.MEMORY: AIETileType.MemTile, + Tile.SHIM: AIETileType.ShimNOCTile, + } + + # Determine IRON tile type + iron_tile_type = placement_tile.tile_type + + # Infer tile type if not set + if iron_tile_type is None: + if placement_tile.col is not None and placement_tile.row is not None: + iron_tile_type = self._get_tile_type_from_coords( + placement_tile.col, placement_tile.row + ) + else: + # tile_type should have been set by Worker/RuntimeEndpoint/ObjectFifoLink + raise ValueError( + f"Tile type not set by context and cannot be inferred from coordinates. " + f"Tile(col={placement_tile.col}, row={placement_tile.row}) must have " + f"tile_type set by the context (Worker, RuntimeEndpoint, ObjectFifoLink)." + ) - if source_is_compute and not all( - [self._tm.is_core_tile(dst_tile.col, dst_tile.row) for dst_tile in tiles] - ): - return False - if source_is_mem and not all( - [self._tm.is_mem_tile(dst_tile.col, dst_tile.row) for dst_tile in tiles] + # If user specified both tile_type AND coordinates, check they match + if ( + placement_tile.tile_type is not None + and placement_tile.col is not None + and placement_tile.row is not None ): - return False - if source_is_shim or any( - [ - self._tm.is_shim_noc_or_pl_tile(dst_tile.col, dst_tile.row) - for dst_tile in tiles - ] - ): - # No neighbor sharing from shim tiles. - return False + actual_iron_type = self._get_tile_type_from_coords( + placement_tile.col, placement_tile.row + ) + + if placement_tile.tile_type != actual_iron_type: + raise ValueError( + f"Tile type mismatch: specified tile_type='{placement_tile.tile_type}' " + f"but coordinates ({placement_tile.col}, {placement_tile.row}) " + f"correspond to '{actual_iron_type}' on this device" + ) - for t in tiles: - if not self._tm.is_legal_mem_affinity( - source_tile.col, source_tile.row, t.col, t.row - ): - return False - return True + # Compute tiles cannot be shared, check for duplicates + if iron_tile_type == Tile.COMPUTE: + if placement_tile.col is not None and placement_tile.row is not None: + coord_tuple = (placement_tile.col, placement_tile.row) + if coord_tuple in self._allocated_compute_tiles: + raise ValueError( + f"Compute tile ({placement_tile.col}, {placement_tile.row}) " + f"already allocated. Each Worker requires a unique compute tile. " + f"Compute tiles cannot be shared between Workers." + ) + self._allocated_compute_tiles.add(coord_tuple) + + mlir_tile_type = iron_to_mlir[iron_tile_type] + + logical_tile_op = logical_tile( + mlir_tile_type, + col=placement_tile.col, + row=placement_tile.row, + loc=loc, + ip=ip, + allocation_scheme=placement_tile.allocation_scheme, + ) - def resolve_tile( - self, - tile: Tile, - loc: ir.Location | None = None, - ip: ir.InsertionPoint | None = None, - ) -> None: - self._tiles[tile.col][tile.row].resolve(loc, ip, tile.allocation_scheme) - tile.op = self._tiles[tile.col][tile.row].op + placement_tile._op = logical_tile_op + return logical_tile_op def create_class(class_name, device): diff --git a/python/iron/device/tile.py b/python/iron/device/tile.py index f4e93ab38ac..d785c47cf4f 100644 --- a/python/iron/device/tile.py +++ b/python/iron/device/tile.py @@ -4,29 +4,65 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. -from ...dialects.aie import TileOp +from ...dialects.aie import LogicalTileOp class Tile: - """An object representing a single component denoted by coordinates on a device.""" - - def __init__(self, col: int, row: int, allocation_scheme: str = None) -> None: - self.col: int = col - self.row: int = row + """An object representing a component to be placed on a device, optionally denoted by coordinates.""" + + # Tile type constants + COMPUTE = "compute" # Compute tiles + MEMORY = "memory" # Memory tiles + SHIM = "shim" # Shim tiles + + def __init__( + self, + col: int | None = None, + row: int | None = None, + *, + tile_type: str | None = None, + allocation_scheme: str | None = None, + ) -> None: + """Initialize a Tile with full, partial, or no coordinates. + + Args: + col: Column coordinate (None for unconstrained) + row: Row coordinate (None for unconstrained) + tile_type: Tile type - use Tile.COMPUTE, Tile.MEMORY, or Tile.SHIM. + Can be inferred from context or coordinates. + allocation_scheme: Optional allocation scheme string + + Examples: + Tile(2, 3) # Full coords, type inferred + Tile(col=2) # Partial, type from context + Tile() # Unconstrained, type from context + Tile(2, 3, tile_type=Tile.COMPUTE) # Explicit validation + """ + self.col: int | None = col + self.row: int | None = row + self.tile_type: str | None = tile_type self.allocation_scheme: str | None = allocation_scheme - self._op: TileOp | None = None - # TODO: each tile should probably have a type, e.g., Shim or Mem or Compute + self._op: LogicalTileOp | None = None + + # Validate tile_type if specified + if tile_type is not None: + valid_types = [Tile.COMPUTE, Tile.MEMORY, Tile.SHIM] + if tile_type not in valid_types: + raise ValueError( + f"Invalid tile_type '{tile_type}'. Must be one of: " + f"Tile.COMPUTE, Tile.MEMORY, Tile.SHIM" + ) @property - def op(self) -> TileOp: + def op(self) -> LogicalTileOp: if not self._op: raise ValueError("Cannot get op before it is set.") return self._op @op.setter - def op(self, op: TileOp): + def op(self, op: LogicalTileOp): if self._op and self._op != op: raise ValueError("Cannot change operation once it is set.") self._op = op @@ -41,24 +77,3 @@ def __str__(self) -> str: def __hash__(self): return hash(str(self)) - - -class AnyShimTile: - """A placeholder that should be replaced with a concrete Tile() representing a Shim tile on a device.""" - - pass - - -class AnyMemTile: - """A placeholder that should be replaced with a concrete Tile() representing a Mem tile on a device.""" - - pass - - -class AnyComputeTile: - """A placeholder that should be replaced with a concrete Tile() representing a Compute tile on a device.""" - - pass - - -PlacementTile = Tile | AnyShimTile | AnyMemTile | AnyComputeTile diff --git a/python/iron/placeable.py b/python/iron/placeable.py deleted file mode 100644 index 655eaa342f1..00000000000 --- a/python/iron/placeable.py +++ /dev/null @@ -1,58 +0,0 @@ -# placeable.py -*- Python -*- -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. -from .device import Tile, PlacementTile - - -class Placeable: - """Placeable is a base class of an object that might be Placed by a Placer.""" - - def __init__(self, tile: PlacementTile | None): - """Initialize a Placeable object. - - Args: - tile (PlacementTile): A placeable object has a tile. This may be None during construction. - """ - self._tile = tile - - def place(self, tile: Tile) -> None: - """Place the object by assigning the object to a Tile. - - Args: - tile (Tile): The placement tile. - - Raises: - AlreadyPlacedError: If the object's tile is already set to a Tile object. - """ - if isinstance(self._tile, Tile): - raise AlreadyPlacedError(self.__class__, self._tile, tile) - self._tile = tile - - @property - def tile(self) -> PlacementTile | None: - """Return the tile of the placeable object. - - Returns: - PlacementTile: The current placement of the object. - """ - return self._tile - - -class AlreadyPlacedError(Exception): - """Placeable objects may raise this error if one attempts to assign them to a Tile more than once.""" - - def __init__(self, cls, current_tile: Tile, new_tile: Tile): - """Create an AlreadyPlacedError - - Args: - current_tile (Tile): The current placement tile - new_tile (Tile): The placement tile given for the second attempt to place the object. - """ - self.message = ( - f"{cls} already placed at {current_tile}; cannot place at {new_tile}" - ) - super().__init__(self.message) diff --git a/python/iron/placers.py b/python/iron/placers.py deleted file mode 100644 index b4555431ac9..00000000000 --- a/python/iron/placers.py +++ /dev/null @@ -1,374 +0,0 @@ -# placers.py -*- Python -*- -# -# This file is licensed under the Apache License v2.0 with LLVM Exceptions. -# See https://llvm.org/LICENSE.txt for license information. -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -# -# (c) Copyright 2024 Advanced Micro Devices, Inc. - -from abc import ABCMeta, abstractmethod -from typing import Optional -import statistics - -from .device import Device -from .runtime import Runtime -from .worker import Worker -from .device import AnyComputeTile, AnyMemTile, AnyShimTile, Tile -from .dataflow import ObjectFifoHandle, ObjectFifoLink, ObjectFifoEndpoint - - -class Placer(metaclass=ABCMeta): - """Placer is an abstract class to define the interface between the Program - and the Placer. - """ - - @abstractmethod - def make_placement( - self, - device: Device, - rt: Runtime, - workers: list[Worker], - object_fifos: list[ObjectFifoHandle], - ): - """Assign placement informatio to a program. - - Args: - device (Device): The device to use for placement. - rt (Runtime): The runtime information for the program. - workers (list[Worker]): The workers included in the program. - object_fifos (list[ObjectFifoHandle]): The object fifos used by the program. - """ - ... - - -class SequentialPlacer(Placer): - """SequentialPlacer is a simple implementation of a placer. The SequentialPlacer is so named - because it will sequentially place workers to Compute Tiles. After workers are placed, Memory Tiles and - Shim Tiles are placed as close to the column of the given compute tile as possible. - - The SequentialPlacer only does validation of placement with respect to available DMA channels on the tiles. - However, it can yield invalid placements that exceed other resource limits, such as memory, For complex or - resource sensitive designs, a more complex placer or manual placement is required. - - The user may define a limited number of cores per column, which could help with issues in using packet- - switched tracing. By limiting the number of cores per column, the placer will assign workers to compute - tiles in a row-wise direction up to the defined limit then move to the next column for subsequent placement. - """ - - def __init__(self, cores_per_col: Optional[int] = None): - super().__init__() - self.cores_per_col = cores_per_col - - def make_placement( - self, - device: Device, - rt: Runtime, - workers: list[Worker], - object_fifos: list[ObjectFifoHandle], - ): - # Keep track of tiles available for placement based - # on number of available input / output DMA channels - shims_in = device.get_shim_tiles() - shims_out = device.get_shim_tiles() - - mems_in = device.get_mem_tiles() - mems_out = device.get_mem_tiles() - - computes = device.get_compute_tiles() - computes_in = device.get_compute_tiles() - computes_out = device.get_compute_tiles() - compute_idx = 0 - - # For each tile keep track of how many input and output endpoints there are - # Note: defaultdict(list) automatically assigns an empty list as the default value for - # keys that don’t exist - channels_in: dict[Tile, tuple[ObjectFifoEndpoint, int]] = {} - channels_out: dict[Tile, tuple[ObjectFifoEndpoint, int]] = {} - - # If some workers are already taken, remove them from the available set - for worker in workers: - # This worker has already been placed - if isinstance(worker.tile, Tile): - if not worker.tile in computes: - raise ValueError( - f"Partial Placement Error: " - f"Tile {worker.tile} not available on " - f"device {device} or has already been used." - ) - computes.remove(worker.tile) - - # Shorten the list of compute tiles available if the cores per column value is set - if self.cores_per_col is not None: - unused_computes_at_col = { - column: [tile for tile in computes if tile.col == column] - for column in range(device.cols) - } - computes = [] - for col, tiles in unused_computes_at_col.items(): - if len(tiles) < self.cores_per_col: - raise ValueError(f"Not enough compute tiles at column {col}!") - else: - computes.extend(tiles[: self.cores_per_col]) - - for worker in workers: - if worker.tile == AnyComputeTile: - if compute_idx >= len(computes): - raise ValueError("Ran out of compute tiles for placement!") - worker.place(computes[compute_idx]) - compute_idx += 1 - - for buffer in worker.buffers: - buffer.place(worker.tile) - - # Account for channels used by Workers, which are already placed - prod_fifos = [of for of in worker.fifos if of._is_prod] - cons_fifos = [of for of in worker.fifos if not of._is_prod] - self._update_channels( - worker, - worker.tile, - True, - len(prod_fifos), - channels_out, - computes_out, - device, - ) - self._update_channels( - worker, - worker.tile, - False, - len(cons_fifos), - channels_in, - computes_in, - device, - ) - - # Prepare to loop - if len(computes) > 0: - compute_idx = compute_idx % len(computes) - - for ofh in object_fifos: - of_endpoints = ofh.all_of_endpoints() - of_handle_endpoints = ofh._object_fifo._get_endpoint(is_prod=ofh._is_prod) - of_compute_endpoints_tiles = [ - ofe.tile for ofe in of_endpoints if ofe.tile in computes - ] - common_col = self._get_common_col(of_compute_endpoints_tiles) - of_link_endpoints = [ - ofe for ofe in of_endpoints if isinstance(ofe, ObjectFifoLink) - ] - # Place "closest" to the compute endpoints - for ofe in of_handle_endpoints: - if isinstance(ofe, Worker): - continue - - if ofe.tile == AnyMemTile: - if ofh._is_prod: - self._place_endpoint( - ofe, - mems_out, - common_col, - channels_out, - device, - output=True, - ) - else: - self._place_endpoint( - ofe, - mems_in, - common_col, - channels_in, - device, - ) - - elif ofe.tile == AnyShimTile: - if ofh._is_prod: - self._place_endpoint( - ofe, - shims_out, - common_col, - channels_out, - device, - output=True, - ) - else: - self._place_endpoint( - ofe, shims_in, common_col, channels_in, device - ) - - for ofe in of_link_endpoints: - # When placing ObjectFifoLink endpoints account for both - # input and output channel requirements - if ofe.tile == AnyMemTile: - if ofh._is_prod: - self._place_endpoint( - ofe, - mems_out, - common_col, - channels_out, - device, - output=True, - link_tiles=mems_in, - link_channels=channels_in, - ) - else: - self._place_endpoint( - ofe, - mems_in, - common_col, - channels_in, - device, - link_tiles=mems_out, - link_channels=channels_out, - ) - - elif ofe.tile == AnyComputeTile: - if ofh._is_prod: - self._place_endpoint( - ofe, - computes_out, - common_col, - channels_out, - device, - output=True, - link_tiles=computes_in, - link_channels=channels_in, - ) - else: - self._place_endpoint( - ofe, - computes_in, - common_col, - channels_in, - device, - link_tiles=computes_out, - link_channels=channels_out, - ) - - def _get_common_col(self, tiles: list[Tile]) -> int: - """ - A utility function that calculates a column that is "close" or "common" - to a set of tiles. It is a simple heuristic using the average to represent "distance". - """ - cols = [t.col for t in tiles if isinstance(t, Tile)] - if len(cols) == 0: - return 0 - avg_col = round(statistics.mean(cols)) - return avg_col - - def _find_col_match(self, col: int, tiles: list[Tile], device: Device) -> Tile: - """ - A utility function that sequentially searches a list of tiles to find one with a matching column. - The column is increased until a tile is found in the device, or an error is signaled. - """ - new_col = col - while new_col < device.cols: - for t in tiles: - if t.col == new_col: - return t - new_col += 1 - raise ValueError( - f"Failed to find a tile matching column {col}: tried until column {new_col}. Try using a device with more columns." - ) - - def _update_channels( - self, - ofe: ObjectFifoEndpoint, - tile: Tile, - output: bool, - num_required_channels: int, - channels: dict[Tile, tuple[ObjectFifoEndpoint, int]], - tiles: list[Tile], - device: Device, - ): - """ - A utility function that updates given channel and tile lists. It appends a new - (endpoint, num_required_channels) entry to the channels dict for the given tile key, then - verifies whether the total entries for that tile surpass the maximum number of available - channels. If so, it removes the tile from the list of available tiles. - """ - if num_required_channels == 0: - return - if tile not in channels: - channels[tile] = [] - channels[tile].append((ofe, num_required_channels)) - used_channels = 0 - for _, c in channels[tile]: - used_channels += c - max_tile_channels = device.get_num_connections(tile, output) - if used_channels >= max_tile_channels: - tiles.remove(tile) - - def _place_endpoint( - self, - ofe: ObjectFifoEndpoint, - tiles: list[Tile], - common_col: int, - channels: dict[Tile, tuple[ObjectFifoEndpoint, int]], - device: Device, - output=False, - link_tiles=[], - link_channels={}, - ): - """ - A utility function that places a given endpoint based on available DMA channels. If the endpoint is a - link, both input and output channels should be accounted for. Calls _update_channels() to update channel - dictionaries and tile lists. - """ - is_shim = False - num_required_channels = 1 - if isinstance(ofe, ObjectFifoLink): - # If endpoint is a link, account for both input and output DMA channels - if output: - num_required_channels = len(ofe._srcs) - link_required_channels = len(ofe._dsts) - else: - num_required_channels = len(ofe._dsts) - link_required_channels = len(ofe._srcs) - - # Check if placing is possible - test_tiles = tiles.copy() - while True: - tile = self._find_col_match(common_col, test_tiles, device) - total_channels = num_required_channels - if tile in channels: - for _, c in channels[tile]: - total_channels += c - max_tile_channels = device.get_num_connections(tile, output) - if total_channels <= max_tile_channels: - if isinstance(ofe, ObjectFifoLink): - # Also check for channels in the other link direction - total_link_channels = link_required_channels - if tile in link_channels: - for _, c in link_channels[tile]: - total_link_channels += c - max_link_channels = device.get_num_connections(tile, not output) - if total_link_channels <= max_link_channels: - break - else: - break - test_tiles.remove(tile) - - # If no error was signaled by _find_col_match(), placement is possible - ofe.place(tile) - - # Account for channels that were used by this placement - self._update_channels( - ofe, - tile, - output, - num_required_channels, - channels, - tiles, - device, - ) - - if isinstance(ofe, ObjectFifoLink): - self._update_channels( - ofe, - tile, - not output, - link_required_channels, - link_channels, - link_tiles, - device, - ) diff --git a/python/iron/program.py b/python/iron/program.py index e5fb7b57648..527b7a1f69f 100644 --- a/python/iron/program.py +++ b/python/iron/program.py @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. from ..extras.context import mlir_mod_ctx # type: ignore from ..helpers.dialects.func import FuncBase @@ -12,7 +12,6 @@ from .device import Device from .runtime import Runtime -from .placers import Placer from .resolvable import Resolvable from ..utils import trace as trace_utils @@ -35,12 +34,11 @@ def __init__( self._device = device self._rt = rt - def resolve_program(self, placer: Placer | None = None, device_name="main"): + def resolve_program(self, device_name="main"): """This method resolves the program components in order to generate MLIR. Args: - placer (Placer | None, optional): The placer that will assign placement to unplaced components. - If a placer is not given, all components must be fully placed. Defaults to None. + device_name (str, optional): Symbol name for the device operation. Defaults to "main". Returns: module (Module): The module containing the MLIR context information. @@ -63,12 +61,6 @@ def device_body(): # Sort fifos for deterministic resolve all_fifos = sorted(all_fifos, key=lambda obj: obj.name) - if placer: - # TODO: should maybe just take runtime? - placer.make_placement( - self._device, self._rt, self._rt.workers, all_fifos - ) - # Collect all tiles all_tiles = [] for w in self._rt.workers: @@ -76,8 +68,17 @@ def device_body(): for f in all_fifos: all_tiles.extend([e.tile for e in f.all_of_endpoints()]) - # Resolve tiles + # Deduplicate tiles by object ID to avoid creating LogicalTileOp multiple times + # for the same Tile object. Use dict to preserve order (first occurrence). + seen_tile_ids = {} for t in all_tiles: + tile_id = id(t) + if tile_id not in seen_tile_ids: + seen_tile_ids[tile_id] = t + unique_tiles = list(seen_tile_ids.values()) + + # Resolve tiles + for t in unique_tiles: self._device.resolve_tile(t) # Generate fifos diff --git a/python/iron/runtime/endpoint.py b/python/iron/runtime/endpoint.py index b6daccc0932..aad447bebec 100644 --- a/python/iron/runtime/endpoint.py +++ b/python/iron/runtime/endpoint.py @@ -4,12 +4,12 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. from __future__ import annotations from ..dataflow.endpoint import ObjectFifoEndpoint -from ..device import PlacementTile +from ..device import Tile class RuntimeEndpoint(ObjectFifoEndpoint): @@ -17,7 +17,7 @@ class RuntimeEndpoint(ObjectFifoEndpoint): The placement of this Endpoint should be a Shim Tile. """ - def __init__(self, placement: PlacementTile) -> RuntimeEndpoint: + def __init__(self, placement: Tile) -> RuntimeEndpoint: super().__init__(placement) def __eq__(self, other: object) -> bool: diff --git a/python/iron/runtime/runtime.py b/python/iron/runtime/runtime.py index 15239d15f42..3a56adc5fcf 100644 --- a/python/iron/runtime/runtime.py +++ b/python/iron/runtime/runtime.py @@ -21,7 +21,7 @@ from ...dialects._aiex_ops_gen import dma_await_task, dma_free_task # type: ignore from ...helpers.taplib import TensorAccessPattern from ..dataflow import ObjectFifoHandle -from ..device import PlacementTile, AnyShimTile +from ..device import Tile from ..resolvable import Resolvable from ..worker import Worker, WorkerRuntimeBarrier, _BarrierSetOp from .dmatask import DMATask @@ -142,7 +142,7 @@ def fill( tap: TensorAccessPattern | None = None, task_group: RuntimeTaskGroup | None = None, wait: bool = False, - placement: PlacementTile = AnyShimTile, + placement: Tile | None = None, ) -> None: """Conceptually fill an ObjectFifoHandle (of type producer) with data from a runtime buffer. This should be called within a Runtime.sequence() context. @@ -154,7 +154,7 @@ def fill( If None is given, this will default to a linear transfer containing all data in the source buffer. Defaults to None. task_group (RuntimeTaskGroup | None, optional): A TaskGroup to associate this task with. Defaults to None. wait (bool, optional): Whether this Task should be awaited on or not. If not, it will be freed when the task group is finished. Defaults to False. - placement (PlacementTile, optional): The Shim tile to associate the data transfer with. Defaults to AnyShimTile. + placement (Tile, optional): The Shim tile to associate the data transfer with. Raises: ValueError: Arguments are validated. @@ -163,6 +163,14 @@ def fill( raise ValueError( f"Source {source} is not a RuntimeData object generated by sequence()" ) + # RuntimeEndpoint must be placed on Shim tile + placement = placement if placement else Tile() + if placement.tile_type and placement.tile_type != Tile.SHIM: + raise ValueError( + f"RuntimeEndpoint requires Tile.SHIM, got tile_type='{placement.tile_type}'" + ) + placement.tile_type = Tile.SHIM + rt_endpoint = RuntimeEndpoint(placement) if tap is None: @@ -179,7 +187,7 @@ def drain( tap: TensorAccessPattern | None = None, task_group: RuntimeTaskGroup | None = None, wait: bool = False, - placement: PlacementTile = AnyShimTile, + placement: Tile | None = None, ) -> None: """Conceptually fill an ObjectFifoHandle (of type consumer) of data and write that data to a runtime buffer. This should be called within a Runtime.sequence() context. @@ -191,7 +199,7 @@ def drain( If None is given, this will default to a linear transfer containing all data in the source buffer. Defaults to None. task_group (RuntimeTaskGroup | None, optional): A TaskGroup to associate this task with. Defaults to None. Defaults to None. wait (bool, optional): Whether this Task should be awaited on or not. If not, it will be freed when the task group is finished. Defaults to False. - placement (PlacementTile, optional): The Shim tile to associate the data transfer with. Defaults to AnyShimTile. + placement (Tile, optional): The Shim tile to associate the data transfer with. Raises: ValueError: Arguments are validated. @@ -200,6 +208,14 @@ def drain( raise ValueError( f"Destination {dest} is not a RuntimeData object generated by sequence()" ) + # RuntimeEndpoint must be placed on Shim tile + placement = placement if placement else Tile() + if placement.tile_type and placement.tile_type != Tile.SHIM: + raise ValueError( + f"RuntimeEndpoint requires Tile.SHIM, got tile_type='{placement.tile_type}'" + ) + placement.tile_type = Tile.SHIM + rt_endpoint = RuntimeEndpoint(placement) if tap is None: tap = dest.default_tap() diff --git a/python/iron/worker.py b/python/iron/worker.py index ac75242a8c5..b9073912d86 100644 --- a/python/iron/worker.py +++ b/python/iron/worker.py @@ -4,7 +4,7 @@ # See https://llvm.org/LICENSE.txt for license information. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # -# (c) Copyright 2024 Advanced Micro Devices, Inc. +# (c) Copyright 2024-2026 Advanced Micro Devices, Inc. import sys from typing import Callable @@ -12,7 +12,7 @@ from ..dialects.aie import core, lock, use_lock from ..dialects.aiex import set_lock_value, LockAction from ..helpers.dialects.scf import _for as range_ -from .device import PlacementTile, AnyComputeTile, Tile +from .device import Tile from .dataflow.objectfifo import ObjectFifoHandle, ObjectFifo from .dataflow.endpoint import ObjectFifoEndpoint from .kernel import Kernel, ExternalFunction @@ -32,7 +32,7 @@ def __init__( self, core_fn: Callable | None, fn_args: list = [], - placement: PlacementTile | None = AnyComputeTile, + placement: Tile | None = None, while_true: bool = True, stack_size: int = None, allocation_scheme: str = None, @@ -44,7 +44,7 @@ def __init__( Args: core_fn (Callable | None): The task to run on a core. If None, a busy-loop (`while(true): pass`) core will be generated. fn_args (list, optional): Pointers to arguments, which should include all context the core_fn needs to run. Defaults to []. - placement (PlacementTile | None, optional): The placement for the Worker. Defaults to AnyComputeTile. + placement (Tile | None, optional): The placement for the Worker. Defaults to compute tile. while_true (bool, optional): If true, will wrap the core_fn in a while(true) loop to ensure it runs until reconfiguration. Defaults to True. stack_size (int, optional): The stack_size in bytes to be allocated for the worker. Defaults to 1024 bytes. allocation_scheme (str, optional): The memory allocation scheme to use for the Worker, either 'basic-sequential' or 'bank-aware'. If None, defaults to bank-aware. @@ -54,11 +54,21 @@ def __init__( Raises: ValueError: Parameters are validated. """ - self._tile = placement + # Setup and validate placement for Worker (must be COMPUTE tile) + self._tile = placement if placement else Tile() + if not isinstance(self._tile, Tile): + raise ValueError(f"Worker requires Tile, got {type(self._tile)}") + if self._tile.tile_type and self._tile.tile_type != Tile.COMPUTE: + raise ValueError( + f"Worker requires Tile.COMPUTE, got tile_type='{self._tile.tile_type}'" + ) + self._tile.tile_type = Tile.COMPUTE # Always set to COMPUTE + self._while_true = while_true self.stack_size = stack_size self.allocation_scheme = allocation_scheme - if allocation_scheme: + # Set allocation_scheme on the tile if specified + if allocation_scheme and self._tile is not None: self._tile.allocation_scheme = allocation_scheme self.trace = trace self.trace_events = trace_events @@ -89,6 +99,8 @@ def do_nothing_core_fun(*args) -> None: self._fifos.append(arg) elif isinstance(arg, Buffer): self._buffers.append(arg) + # Buffers are placed on the same tile as the Worker + arg._tile = self._tile elif isinstance(arg, ObjectFifo): # This is an easy error to make, so we catch it early raise ValueError( diff --git a/test/dialect/AIE/logical_tile_op_bad.mlir b/test/dialect/AIE/logical_tile_op_bad.mlir new file mode 100644 index 00000000000..c66848bb7ae --- /dev/null +++ b/test/dialect/AIE/logical_tile_op_bad.mlir @@ -0,0 +1,49 @@ +//===- logical_tile_op_bad.mlir --------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: not aie-opt --split-input-file %s 2>&1 | FileCheck %s + +// CHECK: error{{.*}}'aie.logical_tile' op column index (100) must be less than the number of columns in the device +module @test_logical_tile_bad_col { + aie.device(npu2) { + %tile = aie.logical_tile(100, ?) + aie.end + } +} + +// ----- + +// CHECK: error{{.*}}'aie.logical_tile' op row index (100) must be less than the number of rows in the device +module @test_logical_tile_bad_row { + aie.device(npu2) { + %tile = aie.logical_tile(?, 100) + aie.end + } +} + +// ----- + +// CHECK: error{{.*}}'aie.logical_tile' op declared logical tile type does not match the tile type at coordinates (0, 0) +module @test_logical_tile_type_mismatch_fixed { + aie.device(npu2) { + %tile = aie.logical_tile(0, 0) + aie.end + } +} + +// ----- + +// CHECK: error{{.*}}'aie.logical_tile' op Shim tiles cannot have an allocation scheme +module @test_logical_tile_shim_allocation_scheme { + aie.device(npu2) { + %tile = aie.logical_tile(0, 0) {allocation_scheme = "basic-sequential"} + aie.end + } +} diff --git a/test/dialect/AIE/logical_tile_op_basic.mlir b/test/dialect/AIE/logical_tile_op_basic.mlir new file mode 100644 index 00000000000..fe2ce3bd610 --- /dev/null +++ b/test/dialect/AIE/logical_tile_op_basic.mlir @@ -0,0 +1,74 @@ +//===- logical_tile_op_basic.mlir ------------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --split-input-file %s | FileCheck %s + +// Test LogicalTileOp parsing with all tile types and placement variations + +// CHECK-LABEL: @test_logical_tile +// CHECK: %[[CORE_UNPLACED:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[CORE_COL:.*]] = aie.logical_tile(0, ?) +// CHECK: %[[CORE_ROW:.*]] = aie.logical_tile(?, 2) +// CHECK: %[[CORE_FIXED:.*]] = aie.logical_tile(2, 3) +// CHECK: %[[MEM_UNPLACED:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[MEM_COL:.*]] = aie.logical_tile(1, ?) +// CHECK: %[[MEM_ROW:.*]] = aie.logical_tile(?, 1) +// CHECK: %[[MEM_FIXED:.*]] = aie.logical_tile(1, 1) +// CHECK: %[[SHIM_NOC_UNPLACED:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[SHIM_NOC_COL:.*]] = aie.logical_tile(3, ?) +// CHECK: %[[SHIM_NOC_ROW:.*]] = aie.logical_tile(?, 0) +// CHECK: %[[SHIM_NOC_FIXED:.*]] = aie.logical_tile(2, 0) +// CHECK: %[[SHIM_PL_UNPLACED:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[SHIM_PL_COL:.*]] = aie.logical_tile(0, ?) +// CHECK: %[[SHIM_PL_ROW:.*]] = aie.logical_tile(?, 0) +// CHECK: %[[SHIM_PL_FIXED:.*]] = aie.logical_tile(0, 0) +module @test_logical_tile { + aie.device(xcve2802) { + %core_unplaced = aie.logical_tile(?, ?) + %core_col = aie.logical_tile(0, ?) + %core_row = aie.logical_tile(?, 2) + %core_fixed = aie.logical_tile(2, 3) + + %mem_unplaced = aie.logical_tile(?, ?) + %mem_col = aie.logical_tile(1, ?) + %mem_row = aie.logical_tile(?, 1) + %mem_fixed = aie.logical_tile(1, 1) + + %shim_noc_unplaced = aie.logical_tile(?, ?) + %shim_noc_col = aie.logical_tile(3, ?) + %shim_noc_row = aie.logical_tile(?, 0) + %shim_noc_fixed = aie.logical_tile(2, 0) + + %shim_pl_unplaced = aie.logical_tile(?, ?) + %shim_pl_col = aie.logical_tile(0, ?) + %shim_pl_row = aie.logical_tile(?, 0) + %shim_pl_fixed = aie.logical_tile(0, 0) + aie.end + } +} + +// ----- + +// CHECK-LABEL: @test_ssa_names +// CHECK: %logical_core = aie.logical_tile(?, ?) +// CHECK: %logical_core_0 = aie.logical_tile(?, ?) +// CHECK: %logical_shim_noc = aie.logical_tile(?, ?) +// CHECK: %logical_shim_pl = aie.logical_tile(?, ?) +// CHECK: %logical_mem = aie.logical_tile(?, ?) +module @test_ssa_names { + aie.device(xcve2802) { + %t0 = aie.logical_tile(?, ?) + %t1 = aie.logical_tile(?, ?) + %t2 = aie.logical_tile(?, ?) + %t3 = aie.logical_tile(?, ?) + %t4 = aie.logical_tile(?, ?) + aie.end + } +} diff --git a/test/dialect/AIE/logical_tile_op_withops.mlir b/test/dialect/AIE/logical_tile_op_withops.mlir new file mode 100644 index 00000000000..6af7dbec862 --- /dev/null +++ b/test/dialect/AIE/logical_tile_op_withops.mlir @@ -0,0 +1,183 @@ +//===- logical_tile_op_withops.mlir ----------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: aie-opt --split-input-file %s | FileCheck %s + +// Test LogicalTileOp with expected ops + +// Test CoreOp, MemOp, BufferOp, LockOp with LogicalTileOp +// CHECK-LABEL: @test_core_tile_elements +// CHECK: %[[TILE:.*]] = aie.logical_tile(?, ?) +// CHECK: aie.core(%[[TILE]]) +// CHECK: aie.mem(%[[TILE]]) +// CHECK: aie.buffer(%[[TILE]]) +// CHECK: aie.lock(%[[TILE]]) +module @test_core_tile_elements { + aie.device(npu2) { + %core_tile = aie.logical_tile(?, ?) + %core = aie.core(%core_tile) { + aie.end + } + %mem = aie.mem(%core_tile) { + aie.end + } + %buf = aie.buffer(%core_tile) : memref<256xi32> + %lock = aie.lock(%core_tile) + aie.end + } +} + +// ----- + +// Test MemTileDMAOp, BufferOp, LockOp with LogicalTileOp +// CHECK-LABEL: @test_mem_tile_elements +// CHECK: %[[TILE:.*]] = aie.logical_tile(?, ?) +// CHECK: aie.memtile_dma(%[[TILE]]) +// CHECK: aie.buffer(%[[TILE]]) +// CHECK: aie.lock(%[[TILE]]) +module @test_mem_tile_elements { + aie.device(npu2) { + %mem_tile = aie.logical_tile(?, ?) + %memtile_dma = aie.memtile_dma(%mem_tile) { + aie.end + } + %buf = aie.buffer(%mem_tile) : memref<256xi32> + %lock = aie.lock(%mem_tile) + aie.end + } +} + +// ----- + +// Test ShimDMAOp, LockOp with LogicalTileOp +// CHECK-LABEL: @test_shim_noc_tile_elements +// CHECK: %[[TILE:.*]] = aie.logical_tile(?, ?) +// CHECK: aie.shim_dma(%[[TILE]]) +// CHECK: aie.lock(%[[TILE]]) +module @test_shim_noc_tile_elements { + aie.device(npu2) { + %shim_tile = aie.logical_tile(?, ?) + %shim_dma = aie.shim_dma(%shim_tile) { + aie.end + } + %lock = aie.lock(%shim_tile) + aie.end + } +} + +// ----- + +// Test ObjectFifoCreateOp with LogicalTileOp +// CHECK-LABEL: @test_objectfifo_shim_to_core +// CHECK: %[[SHIM:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[CORE:.*]] = aie.logical_tile(?, ?) +// CHECK: aie.objectfifo @of2(%[[SHIM]], {%[[CORE]]}, 2 : i32) +module @test_objectfifo_shim_to_core { + aie.device(npu2) { + %shim = aie.logical_tile(?, ?) + %core = aie.logical_tile(?, ?) + aie.objectfifo @of2(%shim, {%core}, 2 : i32) : !aie.objectfifo> + aie.end + } +} + +// ----- + +// Test ObjectFifoLinkOp with LogicalTileOp +// CHECK-LABEL: @test_objectfifo_link +// CHECK: %[[CORE:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[MEM:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[CORE2:.*]] = aie.logical_tile(?, ?) +// CHECK: aie.objectfifo @of_in(%[[CORE]], {%[[MEM]]}, 2 : i32) +// CHECK: aie.objectfifo @of_out(%[[MEM]], {%[[CORE2]]}, 2 : i32) +// CHECK: aie.objectfifo.link [@of_in] -> [@of_out]([] []) +module @test_objectfifo_link { + aie.device(npu2) { + %core1 = aie.logical_tile(?, ?) + %mem = aie.logical_tile(?, ?) + %core2 = aie.logical_tile(?, ?) + aie.objectfifo @of_in(%core1, {%mem}, 2 : i32) : !aie.objectfifo> + aie.objectfifo @of_out(%mem, {%core2}, 2 : i32) : !aie.objectfifo> + aie.objectfifo.link [@of_in] -> [@of_out]([] []) + aie.end + } +} + +// ----- + +// Test ShimDMAAllocationOp with LogicalTileOp +// CHECK-LABEL: @test_shim_dma_allocation +// CHECK: %[[SHIM:.*]] = aie.logical_tile(?, ?) +// CHECK: %[[CORE:.*]] = aie.logical_tile(?, ?) +// CHECK: aie.objectfifo @of_alloc(%[[SHIM]], {%[[CORE]]}, 2 : i32) +// CHECK: aie.shim_dma_allocation @of_alloc_dma(%[[SHIM]], MM2S, 0) +module @test_shim_dma_allocation { + aie.device(npu2) { + %shim = aie.logical_tile(?, ?) + %core = aie.logical_tile(?, ?) + aie.objectfifo @of_alloc(%shim, {%core}, 2 : i32) : !aie.objectfifo> + aie.shim_dma_allocation @of_alloc_dma(%shim, MM2S, 0) + aie.end + } +} + +// ----- + +// Mixed LogicalTileOp and TileOp usage +// CHECK-LABEL: @test_mixed_tile_types +// CHECK: %[[TILE:.*]] = aie.tile(0, 2) +// CHECK: %[[LOGICAL:.*]] = aie.logical_tile(?, ?) +// CHECK: aie.objectfifo @of_mixed(%[[TILE]], {%[[LOGICAL]]}, 2 : i32) +module @test_mixed_tile_types { + aie.device(npu2) { + %tile = aie.tile(0, 2) + %logical = aie.logical_tile(?, ?) + aie.objectfifo @of_mixed(%tile, {%logical}, 2 : i32) : !aie.objectfifo> + aie.end + } +} + +// ----- + +// Test DMAConfigureTaskOp with LogicalTileOp +module @test_dma_configure_task { + aie.device(npu2) { + %shim_tile = aie.logical_tile(?, ?) + %buffer = aie.external_buffer {sym_name = "ext_buffer"} : memref<1024xi32> + + aie.runtime_sequence(%arg0: memref<1024xi32>) { + %task = aiex.dma_configure_task(%shim_tile, MM2S, 0) { + aie.dma_bd(%buffer : memref<1024xi32>, 0, 1024) {bd_id = 0 : i32} + aie.end + } + aiex.dma_start_task(%task) + } + aie.end + } +} + +// ----- + +// Test TileElement ops with LogicalTileOp (DMAConfigureTaskOp on MemTile) +module @test_dma_configure_task_memtile { + aie.device(npu2) { + %mem_tile = aie.logical_tile(?, ?) + %buffer_in = aie.buffer(%mem_tile) {sym_name = "buf_in"} : memref<256xi32> + + aie.runtime_sequence(%arg0: memref<256xi32>) { + %task = aiex.dma_configure_task(%mem_tile, S2MM, 0) { + aie.dma_bd(%buffer_in : memref<256xi32>, 0, 256) {bd_id = 0 : i32} + aie.end + } + aiex.dma_start_task(%task) + } + aie.end + } +} diff --git a/test/dialect/AIE/logical_tile_op_withops_bad.mlir b/test/dialect/AIE/logical_tile_op_withops_bad.mlir new file mode 100644 index 00000000000..0cb65f3ab91 --- /dev/null +++ b/test/dialect/AIE/logical_tile_op_withops_bad.mlir @@ -0,0 +1,205 @@ +//===- logical_tile_op_withops_bad.mlir ------------------------*- MLIR -*-===// +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// (c) Copyright 2026 Advanced Micro Devices, Inc. +// +//===----------------------------------------------------------------------===// + +// RUN: not aie-opt --split-input-file %s 2>&1 | FileCheck %s + +// Test verification errors when using LogicalTileOp with TileElement ops + +// Interconnect ops explicitly require placed TileOp +// CHECK: error{{.*}}'aie.switchbox' op requires a placed tile (aie.tile), not a logical tile +module @test_switchbox_with_logical_tile { + aie.device(npu2) { + %tile = aie.logical_tile(?, ?) + // Switchbox requires aie.tile, not aie.logical_tile + aie.switchbox(%tile) { + aie.end + } + aie.end + } +} + +// ----- + +// CHECK: error{{.*}}'aie.shim_mux' op requires a placed tile (aie.tile), not a logical tile +module @test_shim_mux_with_logical_tile { + aie.device(npu2) { + %tile = aie.logical_tile(?, ?) + // ShimMux requires aie.tile, not aie.logical_tile + aie.shim_mux(%tile) { + aie.connect + } + aie.end + } +} + +// ----- + +// MemOp with wrong tile type +// CHECK: error{{.*}}'aie.mem' op failed to verify that op exists in a core tile +module @test_mem_wrong_tile_type { + aie.device(npu2) { + // MemTile cannot have MemOp (use MemTileDMAOp instead) + %mem_tile = aie.logical_tile(?, ?) + %mem = aie.mem(%mem_tile) { + aie.end + } + aie.end + } +} + +// ----- + +// MemTileDMAOp with wrong tile type +// CHECK: error{{.*}}'aie.memtile_dma' op failed to verify that op exists in a MemTile +module @test_memtile_dma_wrong_tile_type { + aie.device(npu2) { + // CoreTile cannot have MemTileDMAOp + %core_tile = aie.logical_tile(?, ?) + %memtile_dma = aie.memtile_dma(%core_tile) { + aie.end + } + aie.end + } +} + +// ----- + +// ShimDMAOp with wrong tile type +// CHECK: error{{.*}}'aie.shim_dma' op failed to verify that op exists in a shim tile with NOC connection +module @test_shim_dma_wrong_tile_type { + aie.device(npu2) { + // CoreTile cannot have ShimDMAOp + %core_tile = aie.logical_tile(?, ?) + %shim_dma = aie.shim_dma(%core_tile) { + aie.end + } + aie.end + } +} + +// ----- + +// BufferOp on tile without memory +// CHECK: error{{.*}}'aie.buffer' op failed to verify that op exists in a tile with local memory +module @test_buffer_on_shim_tile { + aie.device(npu2) { + // ShimNOCTile has no memory for buffers + %shim_tile = aie.logical_tile(?, ?) + %buf = aie.buffer(%shim_tile) : memref<256xi32> + aie.end + } +} + +// ----- + +// ShimDMAAllocationOp with wrong tile type +// CHECK: error{{.*}}'aie.shim_dma_allocation' op tile must be a shim tile +module @test_shim_dma_allocation_wrong_tile_type { + aie.device(npu2) { + %core = aie.logical_tile(?, ?) + %shim = aie.logical_tile(?, ?) + aie.objectfifo @of_bad(%shim, {%core}, 2 : i32) : !aie.objectfifo> + // CoreTile cannot be used for ShimDMAAllocationOp + aie.shim_dma_allocation @of_bad(%core, MM2S, 0) + aie.end + } +} + +// ----- + +// HasValidDMAChannels trait verification with LogicalTileOp +// CHECK: error{{.*}}'aie.mem' op uses more output channels than available on this tile +module @test_has_valid_dma_channels { + aie.device(npu2) { + %tile = aie.logical_tile(?, ?) + // CoreTile only has 2 MM2S channels, try to use 3 + aie.mem(%tile) { + %dma0 = aie.dma_start(MM2S, 0, ^bd0, ^dma1) + ^dma1: + %dma1_token = aie.dma_start(MM2S, 1, ^bd0, ^dma2) + ^dma2: + %dma2_token = aie.dma_start(MM2S, 2, ^bd0, ^end) + ^bd0: + aie.end + ^end: + aie.end + } + aie.end + } +} + +// ----- + +// HasValidBDs trait verification with LogicalTileOp +// CHECK: error{{.*}}'aie.mem' op has more than 16 blocks +module @test_has_valid_bds { + aie.device(npu2) { + %tile = aie.logical_tile(?, ?) + %buf = aie.buffer(%tile) : memref<256xi32> + aie.mem(%tile) { + %dma = aie.dma_start(MM2S, 0, ^bd0, ^end) + ^bd0: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd1 + ^bd1: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd2 + ^bd2: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd3 + ^bd3: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd4 + ^bd4: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd5 + ^bd5: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd6 + ^bd6: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd7 + ^bd7: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd8 + ^bd8: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd9 + ^bd9: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd10 + ^bd10: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd11 + ^bd11: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd12 + ^bd12: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd13 + ^bd13: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd14 + ^bd14: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd15 + ^bd15: + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd16 + ^bd16: + // This is the 17th BD, should fail + aie.dma_bd(%buf : memref<256xi32>, 0, 256) + aie.next_bd ^bd0 + ^end: + aie.end + } + aie.end + } +} diff --git a/test/python/aie_ops.py b/test/python/aie_ops.py index 1ec19ad6f00..b6eefc26e53 100644 --- a/test/python/aie_ops.py +++ b/test/python/aie_ops.py @@ -7,6 +7,7 @@ from aie.dialects.aie import ( AIEDevice, + AIETileType, Core, Device, MemOp, @@ -18,6 +19,7 @@ object_fifo, object_fifo_link, tile, + logical_tile, cascade_flow, WireBundle, packetflow, @@ -44,6 +46,20 @@ def tileOpAllocationScheme(): t = tile(col=2, row=2, allocation_scheme="basic-sequential") +# CHECK-LABEL: logicalTileOpUnconstrained +# CHECK: %[[CORE:.*]] = aie.logical_tile(?, ?) +@construct_and_print_module +def logicalTileOpUnconstrained(): + t = logical_tile(AIETileType.CoreTile) + + +# CHECK-LABEL: logicalTileOpPartialPlaced +# CHECK: %[[MEM:.*]] = aie.logical_tile(1, ?) +@construct_and_print_module +def logicalTileOpPartialPlaced(): + t = logical_tile(AIETileType.MemTile, col=1) + + # CHECK-LABEL: coreOp # CHECK: %[[VAL1:.*]] = aie.tile(1, 1) # CHECK: %[[VAL2:.*]] = aie.core(%[[VAL1]]) { diff --git a/test/python/barrier.py b/test/python/barrier.py index fe8c2f59d2d..d786b1abe62 100644 --- a/test/python/barrier.py +++ b/test/python/barrier.py @@ -1,4 +1,4 @@ -# Copyright (C) 2025, Advanced Micro Devices, Inc. +# Copyright (C) 2025-2026, Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # RUN: %python %s | FileCheck %s @@ -14,19 +14,18 @@ Tile, NPU2Col1, ) -from aie.iron.placers import SequentialPlacer # CHECK: module { # CHECK: aie.device(npu2_1col) { -# CHECK: %tile_0_2 = aie.tile(0, 2) -# CHECK: %lock_0_2 = aie.lock(%tile_0_2) -# CHECK: %core_0_2 = aie.core(%tile_0_2) { -# CHECK: aie.use_lock(%lock_0_2, Acquire, 1) -# CHECK: aie.use_lock(%lock_0_2, Release, 1) +# CHECK: %[[WORKER:.*]] = aie.logical_tile +# CHECK: %[[LOCK:.*]] = aie.lock(%[[WORKER]]) +# CHECK: %{{.*}} = aie.core(%[[WORKER]]) { +# CHECK: aie.use_lock(%[[LOCK]], Acquire, 1) +# CHECK: aie.use_lock(%[[LOCK]], Release, 1) # CHECK: } # CHECK: aie.runtime_sequence(%arg0: memref<16xi32>) { -# CHECK: aiex.set_lock(%lock_0_2, 1) +# CHECK: aiex.set_lock(%[[LOCK]], 1) # CHECK: } # CHECK: } # CHECK: } @@ -54,7 +53,7 @@ def task(barrier): rt.set_barrier(workerBarrier, 1) # Place components (assign them resources on the device) and generate an MLIR module - return print(Program(NPU2Col1(), rt).resolve_program(SequentialPlacer())) + return print(Program(NPU2Col1(), rt).resolve_program()) my_barrier() diff --git a/test/python/control_flow.py b/test/python/control_flow.py index c20806bf078..c87ea352b78 100644 --- a/test/python/control_flow.py +++ b/test/python/control_flow.py @@ -1,7 +1,6 @@ import numpy as np from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2 from aie.iron.controlflow import range_ from aie.helpers.dialects.func import func @@ -53,12 +52,12 @@ def core_fn(of_in, of_out, passthrough_fn): my_program = Program(NPU2(), rt) # Place components (assign them resources on the device) and generate an MLIR module - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() return module # CHECK-LABEL: range_with_int32 -# CHECK: scf.for %arg1 = %c0_0 to %c4 step %c1_1 { +# CHECK: scf.for %arg1 = %c0_{{.*}} to %c4 step %c1_{{.*}} { def range_with_int32(): print("range_with_int32") print(custom_loop_type(np.int32)) @@ -68,7 +67,7 @@ def range_with_int32(): # CHECK-LABEL: range_with_int64 -# CHECK: scf.for %arg1 = %c0_0 to %c4 step %c1_1 { +# CHECK: scf.for %arg1 = %c0_{{.*}} to %c4 step %c1_{{.*}} { def range_with_int64(): print("range_with_int64") print(custom_loop_type(np.int64)) diff --git a/test/python/iron_objectfifo_workers.py b/test/python/iron_objectfifo_workers.py new file mode 100644 index 00000000000..e944008fd73 --- /dev/null +++ b/test/python/iron_objectfifo_workers.py @@ -0,0 +1,90 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# RUN: %python %s | FileCheck %s + +""" +Test IRON patterns with Workers and ObjectFifos generate +separate shim mem tiles but reuse compute tiles. +""" + +import numpy as np +from aie.iron import ObjectFifo, Program, Runtime, Worker +from aie.iron.device import NPU2 +from aie.iron.controlflow import range_ +from util import construct_and_print_module + + +# CHECK-LABEL: TEST: passthrough_dma_forward +# CHECK: aie.device(npu2) { +# CHECK: %[[SHIM_IN:.*]] = aie.logical_tile +# CHECK: %[[MEM:.*]] = aie.logical_tile +# CHECK: %[[SHIM_OUT:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in(%[[SHIM_IN]], {%[[MEM]]}, {{.*}}) +# CHECK: aie.objectfifo @in_fwd(%[[MEM]], {%[[SHIM_OUT]]}, {{.*}}) +# CHECK: aie.objectfifo.link [@in] -> [@in_fwd] +@construct_and_print_module +def passthrough_dma_forward(module): + """Test ObjectFifo.forward() creates link with LogicalTileOps.""" + N = 4096 + line_size = 1024 + vector_ty = np.ndarray[(N,), np.dtype[np.int32]] + line_ty = np.ndarray[(line_size,), np.dtype[np.int32]] + + of_in = ObjectFifo(line_ty, name="in") + of_out = of_in.cons().forward() + + rt = Runtime() + with rt.sequence(vector_ty, vector_ty, vector_ty) as (a_in, _, c_out): + rt.fill(of_in.prod(), a_in) + rt.drain(of_out.cons(), c_out, wait=True) + + module = Program(NPU2(), rt).resolve_program() + return module + + +# CHECK-LABEL: TEST: worker_multiple_objectfifos +# CHECK: aie.device(npu2) { +# CHECK: %[[WORKER:.*]] = aie.logical_tile(?, ?) +# CHECK: %[[SHIM1:.*]] = aie.logical_tile +# CHECK: %[[SHIM2:.*]] = aie.logical_tile +# CHECK: %[[SHIM3:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in1(%[[SHIM1]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @in2(%[[SHIM2]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @out(%[[WORKER]], {%[[SHIM3]]}, {{.*}}) +# CHECK: aie.core(%[[WORKER]]) +# CHECK: aie.runtime_sequence +@construct_and_print_module +def worker_multiple_objectfifos(module): + """Test Worker with multiple ObjectFifos""" + N = 256 + n = 16 + tensor_ty = np.ndarray[(N,), np.dtype[np.int32]] + tile_ty = np.ndarray[(n,), np.dtype[np.int32]] + + of_in1 = ObjectFifo(tile_ty, name="in1") + of_in2 = ObjectFifo(tile_ty, name="in2") + of_out = ObjectFifo(tile_ty, name="out") + + def core_body(of_in1, of_in2, of_out): + for _ in range_(4): + elem_in1 = of_in1.acquire(1) + elem_in2 = of_in2.acquire(1) + elem_out = of_out.acquire(1) + for i in range_(n): + elem_out[i] = elem_in1[i] * elem_in2[i] + of_in1.release(1) + of_in2.release(1) + of_out.release(1) + + worker = Worker(core_body, [of_in1.cons(), of_in2.cons(), of_out.prod()]) + + rt = Runtime() + with rt.sequence(tensor_ty, tensor_ty, tensor_ty) as (A, B, C): + rt.start(worker) + rt.fill(of_in1.prod(), A) + rt.fill(of_in2.prod(), B) + rt.drain(of_out.cons(), C, wait=True) + + module = Program(NPU2(), rt).resolve_program() + return module diff --git a/test/python/localbuffer.py b/test/python/localbuffer.py index 5a8c802b579..43592d1206e 100644 --- a/test/python/localbuffer.py +++ b/test/python/localbuffer.py @@ -1,18 +1,20 @@ -# Copyright (C) 2025, Advanced Micro Devices, Inc. +# Copyright (C) 2025-2026, Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception # RUN: %python %s | FileCheck %s import numpy as np from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker, Buffer -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2Col1 # CHECK: module { # CHECK: aie.device(npu2_1col) { -# CHECK: %tile_0_2 = aie.tile(0, 2) -# CHECK: %uninit_local_buf = aie.buffer(%tile_0_2) {sym_name = "uninit_local_buf"} : memref<4096xui8> -# CHECK: %init_local_buf = aie.buffer(%tile_0_2) {sym_name = "init_local_buf"} : memref<4096xui8> = dense<0> +# CHECK: %[[WORKER:.*]] = aie.logical_tile +# CHECK: %{{.*}} = aie.logical_tile +# CHECK: %{{.*}} = aie.logical_tile +# CHECK: %uninit_local_buf = aie.buffer(%[[WORKER]]) {sym_name = "uninit_local_buf"} : memref<4096xui8> +# CHECK: %init_local_buf = aie.buffer(%[[WORKER]]) {sym_name = "init_local_buf"} : memref<4096xui8> = dense<0> +# CHECK: aie.core(%[[WORKER]]) def passthrough_local_buff(): in1_size = 4096 in1_dtype = np.uint8 @@ -66,7 +68,7 @@ def core_fn(of_in, of_out, buf1, buf2, passThroughLine): rt.drain(of_out.cons(), b_out, wait=True) # Place components (assign them resources on the device) and generate an MLIR module - return Program(NPU2Col1(), rt).resolve_program(SequentialPlacer()) + return Program(NPU2Col1(), rt).resolve_program() print(passthrough_local_buff()) diff --git a/test/python/logical_tile.py b/test/python/logical_tile.py new file mode 100644 index 00000000000..04ae9fce92d --- /dev/null +++ b/test/python/logical_tile.py @@ -0,0 +1,75 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# RUN: %python %s | FileCheck %s + +import numpy as np +from aie.iron import ObjectFifo, Program, Runtime, Worker +from aie.iron.device import NPU2, Tile +from util import construct_and_print_module + + +# CHECK-LABEL: TEST: logical_tile_worker_unconstrained +# CHECK: aie.device(npu2) { +# CHECK: %[[WORKER:.*]] = aie.logical_tile(?, ?) +# CHECK: %[[SHIM_IN:.*]] = aie.logical_tile +# CHECK: %[[SHIM_OUT:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in(%[[SHIM_IN]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @out(%[[WORKER]], {%[[SHIM_OUT]]}, {{.*}}) +# CHECK: aie.core(%[[WORKER]]) +@construct_and_print_module +def logical_tile_worker_unconstrained(module): + """Test unconstrained Worker, ObjectFifos and core consume LogicalTileOp.""" + n = 1024 + n_ty = np.ndarray[(n,), np.dtype[np.int32]] + + of_in = ObjectFifo(n_ty, name="in") + of_out = ObjectFifo(n_ty, name="out") + + def core_fn(of_in, of_out): + pass + + # Worker with default placement (AnyComputeTile) + worker = Worker(core_fn, [of_in.cons(), of_out.prod()]) + + rt = Runtime() + with rt.sequence(n_ty, n_ty, n_ty) as (A, B, C): + rt.start(worker) + rt.fill(of_in.prod(), A) + rt.drain(of_out.cons(), C, wait=True) + + module = Program(NPU2(), rt).resolve_program() + return module + + +# CHECK-LABEL: TEST: logical_tile_worker_constrained +# CHECK: aie.device(npu2) { +# CHECK: %[[WORKER:.*]] = aie.logical_tile(0, 2) +# CHECK: %[[SHIM_IN:.*]] = aie.logical_tile +# CHECK: %[[SHIM_OUT:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in(%[[SHIM_IN]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @out(%[[WORKER]], {%[[SHIM_OUT]]}, {{.*}}) +# CHECK: aie.core(%[[WORKER]]) +@construct_and_print_module +def logical_tile_worker_constrained(module): + """Test constrained Worker, ObjectFifos and core consume same LogicalTileOp.""" + n = 1024 + n_ty = np.ndarray[(n,), np.dtype[np.int32]] + + of_in = ObjectFifo(n_ty, name="in") + of_out = ObjectFifo(n_ty, name="out") + + def core_fn(of_in, of_out): + pass + + # Worker with explicit tile placement + worker = Worker(core_fn, [of_in.cons(), of_out.prod()], placement=Tile(0, 2)) + + rt = Runtime() + with rt.sequence(n_ty, n_ty, n_ty) as (A, B, C): + rt.start(worker) + rt.fill(of_in.prod(), A) + rt.drain(of_out.cons(), C, wait=True) + + module = Program(NPU2(), rt).resolve_program() + return module diff --git a/test/python/npu-xrt/test_cached_xrt_runtime.py b/test/python/npu-xrt/test_cached_xrt_runtime.py index 082fc25538d..45adc475d67 100644 --- a/test/python/npu-xrt/test_cached_xrt_runtime.py +++ b/test/python/npu-xrt/test_cached_xrt_runtime.py @@ -14,7 +14,6 @@ import os import aie.iron as iron from aie.iron import ObjectFifo, Worker, Runtime, Program -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.utils import aie.utils.jit @@ -101,7 +100,7 @@ def core_body(of_in, of_out, func_to_apply): rt.drain(of_out.cons(), B, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() def test_runtime_caching_reuse(runtime): diff --git a/test/python/npu-xrt/test_cached_xrt_runtime_insts.py b/test/python/npu-xrt/test_cached_xrt_runtime_insts.py index e402f97b1d3..976fd2f30da 100644 --- a/test/python/npu-xrt/test_cached_xrt_runtime_insts.py +++ b/test/python/npu-xrt/test_cached_xrt_runtime_insts.py @@ -14,7 +14,6 @@ import os import aie.iron as iron from aie.iron import ObjectFifo, Worker, Runtime, Program -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ import aie.utils import aie.utils.jit @@ -101,7 +100,7 @@ def core_body(of_in, of_out, func_to_apply): rt.drain(of_out.cons(), B, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() def test_insts_caching(runtime): diff --git a/test/python/npu-xrt/test_compile_cache_functionality.py b/test/python/npu-xrt/test_compile_cache_functionality.py index cd3f44fcea5..854c32d7e66 100644 --- a/test/python/npu-xrt/test_compile_cache_functionality.py +++ b/test/python/npu-xrt/test_compile_cache_functionality.py @@ -16,7 +16,6 @@ import aie.iron as iron from aie.iron import ExternalFunction from aie.iron import ObjectFifo, Worker, Runtime, Program -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ @@ -79,7 +78,7 @@ def core_body(of_in, of_out, func_to_apply): rt.drain(of_out.cons(), B, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() def test_cache_lambda_functions(): diff --git a/test/python/npu-xrt/test_jit_compilation.py b/test/python/npu-xrt/test_jit_compilation.py index d73fcbf0c03..418a273ed2f 100644 --- a/test/python/npu-xrt/test_jit_compilation.py +++ b/test/python/npu-xrt/test_jit_compilation.py @@ -13,7 +13,6 @@ import aie.iron as iron from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ @@ -81,7 +80,7 @@ def core_body(of_in1, of_in2, of_out): rt.drain(of_out.cons(), C, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() @pytest.mark.parametrize("num_elements", [16, 64]) diff --git a/test/python/npu-xrt/test_jit_extern_functions.py b/test/python/npu-xrt/test_jit_extern_functions.py index e3db9127ef2..515574fa9d3 100644 --- a/test/python/npu-xrt/test_jit_extern_functions.py +++ b/test/python/npu-xrt/test_jit_extern_functions.py @@ -16,7 +16,6 @@ import aie.iron as iron from aie.iron import ExternalFunction, jit from aie.iron import ObjectFifo, Worker, Runtime, Program -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ @@ -82,7 +81,7 @@ def core_body(of_in, of_out, func_to_apply): rt.drain(of_out.cons(), B, wait=True) # Place program components (assign them resources on the device) and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() def test_simple_add_one(): diff --git a/test/python/npu-xrt/test_jit_extern_functions_inside_jit.py b/test/python/npu-xrt/test_jit_extern_functions_inside_jit.py index 12f3c3e7476..0f41ff42fc7 100644 --- a/test/python/npu-xrt/test_jit_extern_functions_inside_jit.py +++ b/test/python/npu-xrt/test_jit_extern_functions_inside_jit.py @@ -14,7 +14,6 @@ import aie.iron as iron from aie.iron import ExternalFunction, jit from aie.iron import ObjectFifo, Worker, Runtime, Program -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ @@ -93,7 +92,7 @@ def core_body(of_in, of_out, func_to_apply): rt.drain(of_out.cons(), B, wait=True) # Place program components and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() @jit(is_placed=False) @@ -177,7 +176,7 @@ def core_body(of_in, of_out, func_to_apply): rt.drain(of_out.cons(), B, wait=True) # Place program components and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() @jit(is_placed=False) @@ -254,7 +253,7 @@ def core_body(of_in, of_out, func_to_apply): rt.drain(of_out.cons(), B, wait=True) # Place program components and generate an MLIR module - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() def test_transform_with_internal_func_with_options_inside(): diff --git a/test/python/npu-xrt/test_jit_trace.py b/test/python/npu-xrt/test_jit_trace.py index 3a8e4298d74..963a9423556 100644 --- a/test/python/npu-xrt/test_jit_trace.py +++ b/test/python/npu-xrt/test_jit_trace.py @@ -18,7 +18,6 @@ from aie.utils import tensor from aie.utils.trace import TraceConfig, parse_trace from aie.iron import Kernel, ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.controlflow import range_ @@ -55,7 +54,7 @@ def design(a_in, c_out, trace_config=None): rt.fill(of_in.prod(), a) rt.start(worker) rt.drain(of_out.cons(), c, wait=True) - return Program(iron.get_current_device(), rt).resolve_program(SequentialPlacer()) + return Program(iron.get_current_device(), rt).resolve_program() @pytest.mark.parametrize("trace_size", [8192]) diff --git a/test/python/npu-xrt/test_objectfifo.py b/test/python/npu-xrt/test_objectfifo.py index d582b1c3a1d..2dcb7128a5c 100644 --- a/test/python/npu-xrt/test_objectfifo.py +++ b/test/python/npu-xrt/test_objectfifo.py @@ -16,9 +16,6 @@ NPU1, NPU2, Tile, - AnyMemTile, - AnyComputeTile, - AnyShimTile, ) from aie.iron.dataflow.objectfifo import ObjectFifo from aie.iron.dataflow.endpoint import ObjectFifoEndpoint @@ -30,14 +27,22 @@ def device(request): def test_can_used_shared_mem(device): + """NOTE: can_used_shared_mem() has been removed. + + Memory affinity validation now happens in MLIR placement pass after + tiles have concrete coordinates. This test is deprecated.""" + pytest.skip( + "can_used_shared_mem() removed - validation moved to MLIR placement pass" + ) + n_ty = np.ndarray[(1024,), np.dtype[np.int32]] # Legal affinity of_legal = ObjectFifo(n_ty) of_legal.prod().endpoint = ObjectFifoEndpoint(Tile(1, 2)) of_legal.cons().endpoint = ObjectFifoEndpoint(Tile(1, 3)) - assert of_legal.can_used_shared_mem(device) - assert of_legal.can_used_shared_mem(device, cons_only=True) + # assert of_legal.can_used_shared_mem(device) + # assert of_legal.can_used_shared_mem(device, cons_only=True) # Illegal affinity of_illegal = ObjectFifo(n_ty) @@ -72,16 +77,18 @@ def test_can_used_shared_mem(device): # Forwarded ObjectFifo of_forward = ObjectFifo(n_ty) of_forward.prod().endpoint = ObjectFifoEndpoint(Tile(1, 2)) - forwarded = of_forward.cons().forward(placement=AnyMemTile) + forwarded = of_forward.cons().forward() # Default MemTile placement forwarded.cons().endpoint = ObjectFifoEndpoint(Tile(1, 3)) with pytest.raises(ValueError): of_forward.can_used_shared_mem(device) with pytest.raises(ValueError): forwarded.can_used_shared_mem(device) - # AnyComputeTile + # Unconstrained Tile (would need type set by context) + # This test is deprecated - can't validate without placement + pytest.skip("Any*Tile removed - validation moved to MLIR placement pass") of_any_compute = ObjectFifo(n_ty) - of_any_compute.prod().endpoint = ObjectFifoEndpoint(AnyComputeTile) + of_any_compute.prod().endpoint = ObjectFifoEndpoint(Tile()) of_any_compute.cons().endpoint = ObjectFifoEndpoint(Tile(1, 3)) with pytest.raises(ValueError): of_any_compute.can_used_shared_mem(device) diff --git a/test/python/objFifo_bd_chain_repeat.py b/test/python/objFifo_bd_chain_repeat.py index 6e176574292..3df4b53118b 100644 --- a/test/python/objFifo_bd_chain_repeat.py +++ b/test/python/objFifo_bd_chain_repeat.py @@ -8,7 +8,6 @@ import sys import numpy as np from aie.iron import ObjectFifo, Program, Runtime -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1Col1 @@ -34,7 +33,7 @@ def test_objectfifo_bd_chain_scenarios(): rt.drain(of_mem_to_compute.cons(), c_out, wait=True) my_program = Program(dev, rt) - module = my_program.resolve_program(SequentialPlacer()) + module = my_program.resolve_program() print(module) diff --git a/test/python/placer_tests.py b/test/python/placer_tests.py index 0c2fc166862..1dce9f08a18 100644 --- a/test/python/placer_tests.py +++ b/test/python/placer_tests.py @@ -1,20 +1,26 @@ -# Copyright (C) 2025, Advanced Micro Devices, Inc. +# Copyright (C) 20252026, Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import numpy as np from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer -from aie.iron.device import NPU2, AnyComputeTile, Tile +from aie.iron.device import NPU2, Tile from aie.helpers.util import np_ndarray_type_get_shape from util import construct_and_print_module # RUN: %python %s | FileCheck %s +# This test checks that IRON emits aie.logical_tile operations correctly. +# Tile placement is handled by the MLIR -aie-place-tiles pass. + # CHECK-LABEL: TEST: objectfifo_order -# CHECK: aie.objectfifo @in_A -# CHECK: aie.objectfifo @in_B -# CHECK: aie.objectfifo @out_C +# CHECK: %[[WORKER:.*]] = aie.logical_tile +# CHECK: %[[SHIM_A:.*]] = aie.logical_tile +# CHECK: %[[SHIM_B:.*]] = aie.logical_tile +# CHECK: %[[SHIM_C:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in_A(%[[SHIM_A]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @in_B(%[[SHIM_B]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @out_C(%[[WORKER]], {%[[SHIM_C]]}, {{.*}}) @construct_and_print_module def objectfifo_order(module): N = 4096 @@ -38,14 +44,20 @@ def core_fn(in_A, in_B, out_C): rt.fill(of_in_B.prod(), B) rt.drain(of_out_C.cons(), C, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: shim_three_in -# CHECK: %[[shim_noc_tile_0_0:.+]] = aie.tile -# CHECK: %[[shim_noc_tile_1_0:.+]] = aie.tile -# CHECK-NOT: %[[shim_noc_tile_2_0:.+]] = aie.tile(2, 0) +# CHECK: %[[WORKER1:.*]] = aie.logical_tile +# CHECK: %[[WORKER2:.*]] = aie.logical_tile +# CHECK: %[[WORKER3:.*]] = aie.logical_tile +# CHECK: %[[SHIM1:.*]] = aie.logical_tile +# CHECK: %[[SHIM2:.*]] = aie.logical_tile +# CHECK: %[[SHIM3:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in_0(%[[SHIM1]], {%[[WORKER1]]}, {{.*}}) +# CHECK: aie.objectfifo @in_1(%[[SHIM2]], {%[[WORKER2]]}, {{.*}}) +# CHECK: aie.objectfifo @in_2(%[[SHIM3]], {%[[WORKER3]]}, {{.*}}) @construct_and_print_module def shim_three_in(module): N = 4096 @@ -72,13 +84,18 @@ def core_fn(of_in): rt.fill(of_ins[1].prod(), B) rt.fill(of_ins[2].prod(), C) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: shim_two_in_one_out -# CHECK: %[[shim_noc_tile_0_0:.+]] = aie.tile(0, 0) -# CHECK-NOT: %[[shim_noc_tile_1_0:.+]] = aie.tile(1, 0) +# CHECK: %[[WORKER:.*]] = aie.logical_tile +# CHECK: %[[SHIM_A:.*]] = aie.logical_tile +# CHECK: %[[SHIM_B:.*]] = aie.logical_tile +# CHECK: %[[SHIM_C:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in_A(%[[SHIM_A]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @in_B(%[[SHIM_B]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @out_C(%[[WORKER]], {%[[SHIM_C]]}, {{.*}}) @construct_and_print_module def shim_two_in_one_out(module): N = 4096 @@ -102,13 +119,16 @@ def core_fn(in_A, in_B, out_C): rt.fill(of_in_B.prod(), B) rt.drain(of_out_C.cons(), C, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: compute_three_in -# CHECK: %[[tile_0_2:.+]] = aie.tile(0, 2) -# CHECK-NOT: %[[tile_0_3:.+]] = aie.tile(0, 3) +# CHECK: %[[WORKER:.*]] = aie.logical_tile +# CHECK: %{{.*}} = aie.logical_tile +# CHECK: aie.objectfifo @iof2 +# CHECK: aie.objectfifo @of0 +# CHECK: aie.objectfifo @of1 @construct_and_print_module def compute_three_in(module): n = 1024 @@ -131,13 +151,24 @@ def core_fn(of_0, of_1, of_2): rt.fill(of_1.prod(), B) rt.fill(of_2.prod(), C) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: compute_one_in_two_links -# CHECK: %[[tile_0_2:.+]] = aie.tile -# CHECK: %[[tile_0_3:.+]] = aie.tile +# CHECK: %[[WORKER:.*]] = aie.logical_tile +# CHECK: %[[SHIM_IN1:.*]] = aie.logical_tile +# CHECK: %[[MEM1:.*]] = aie.logical_tile +# CHECK: %[[SHIM_IN2:.*]] = aie.logical_tile +# CHECK: %[[MEM2:.*]] = aie.logical_tile +# CHECK: %[[SHIM_OF0:.*]] = aie.logical_tile +# CHECK: %[[SHIM_OUT1:.*]] = aie.logical_tile +# CHECK: %[[SHIM_OUT2:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in1(%[[SHIM_IN1]], {%[[MEM1]]}, {{.*}}) +# CHECK: aie.objectfifo @out1(%[[MEM1]], {%[[SHIM_OUT1]]}, {{.*}}) +# CHECK: aie.objectfifo @in2(%[[SHIM_IN2]], {%[[MEM2]]}, {{.*}}) +# CHECK: aie.objectfifo @out_2(%[[MEM2]], {%[[SHIM_OUT2]]}, {{.*}}) +# CHECK: aie.objectfifo @of0(%[[SHIM_OF0]], {%[[WORKER]]}, {{.*}}) @construct_and_print_module def compute_one_in_two_links(module): n = 1024 @@ -147,12 +178,9 @@ def compute_one_in_two_links(module): of_0 = ObjectFifo(n_ty, name="of0") of_in1 = ObjectFifo(n_ty, name="in1") of_in2 = ObjectFifo(n_ty, name="in2") - of_out1 = of_in1.cons().forward( - obj_type=n_ty, name="out1", placement=AnyComputeTile - ) - of_out2 = of_in2.cons().forward( - obj_type=n_ty, name="out_2", placement=AnyComputeTile - ) + # Use default MemTile placement for forward() + of_out1 = of_in1.cons().forward(obj_type=n_ty, name="out1") + of_out2 = of_in2.cons().forward(obj_type=n_ty, name="out_2") def core_fn(of_in0): pass @@ -168,13 +196,16 @@ def core_fn(of_in0): rt.drain(of_out1.cons(), D, wait=True) rt.drain(of_out2.cons(), E, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: compute_partial_placement -# CHECK: %[[tile_0_2:.+]] = aie.tile -# CHECK: %[[tile_0_4:.+]] = aie.tile +# CHECK: %[[WORKER:.*]] = aie.logical_tile(0, 4) +# CHECK: %[[SHIM_IN1:.*]] = aie.logical_tile +# CHECK: %[[MEM1:.*]] = aie.logical_tile +# CHECK: %[[SHIM_IN2:.*]] = aie.logical_tile +# CHECK: %[[MEM2:.*]] = aie.logical_tile @construct_and_print_module def compute_partial_placement(module): n = 1024 @@ -184,12 +215,9 @@ def compute_partial_placement(module): of_0 = ObjectFifo(n_ty, name="of0") of_in1 = ObjectFifo(n_ty, name="in1") of_in2 = ObjectFifo(n_ty, name="in2") - of_out1 = of_in1.cons().forward( - obj_type=n_ty, name="out1", placement=AnyComputeTile - ) - of_out2 = of_in2.cons().forward( - obj_type=n_ty, name="out_2", placement=AnyComputeTile - ) + # Use default MemTile placement for forward() + of_out1 = of_in1.cons().forward(obj_type=n_ty, name="out1") + of_out2 = of_in2.cons().forward(obj_type=n_ty, name="out_2") def core_fn(of_in0): pass @@ -205,15 +233,20 @@ def core_fn(of_in0): rt.drain(of_out1.cons(), D, wait=True) rt.drain(of_out2.cons(), E, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: mem_eight_in_three_out -# CHECK: %[[mem_tile_0_1:.+]] = aie.tile -# CHECK: %[[shim_noc_tile_0_0:.+]] = aie.tile -# CHECK: %[[mem_tile_1_1:.+]] = aie.tile -# CHECK: %[[shim_noc_tile_1_0:.+]] = aie.tile +# CHECK-DAG: aie.logical_tile +# CHECK-DAG: aie.logical_tile +# CHECK-DAG: aie.logical_tile +# CHECK-DAG: aie.objectfifo @of_mem_in_0 +# CHECK-DAG: aie.objectfifo @of_mem_in_6 +# CHECK-DAG: aie.objectfifo @out_A +# CHECK-DAG: aie.objectfifo @out_B +# CHECK-DAG: aie.objectfifo @out_C +# CHECK: aie.core @construct_and_print_module def mem_eight_in_three_out(module): N = 6000 @@ -254,14 +287,17 @@ def core_fn(of_out): rt.drain(of_out_B.cons(), B, wait=True) rt.drain(of_out_C.cons(), C, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: compute_three_in_col_lim -# CHECK: %[[tile_0_2:.+]] = aie.tile -# CHECK: %[[tile_0_3:.+]] = aie.tile -# CHECK: %[[tile_1_2:.+]] = aie.tile +# CHECK: %[[WORKER1:.*]] = aie.logical_tile +# CHECK: %[[WORKER2:.*]] = aie.logical_tile +# CHECK: %[[WORKER3:.*]] = aie.logical_tile +# CHECK: %[[SHIM1:.*]] = aie.logical_tile +# CHECK: %[[SHIM2:.*]] = aie.logical_tile +# CHECK: %[[SHIM3:.*]] = aie.logical_tile @construct_and_print_module def compute_three_in_col_lim(module): n = 1024 @@ -289,5 +325,6 @@ def core_fn(of): rt.fill(of_1.prod(), B) rt.fill(of_2.prod(), C) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer(cores_per_col)) + # NOTE: cores_per_col parameter will be supported by MLIR placement pass + module = Program(NPU2(), rt).resolve_program() return module diff --git a/test/python/runtimesequence.py b/test/python/runtimesequence.py index 0062a5cdc9f..a6a3ee99b06 100644 --- a/test/python/runtimesequence.py +++ b/test/python/runtimesequence.py @@ -1,8 +1,7 @@ -# Copyright (C) 2025, Advanced Micro Devices, Inc. +# Copyright (C) 2025-2026, Advanced Micro Devices, Inc. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import numpy as np from aie.iron import ObjectFifo, Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU2 from util import construct_and_print_module @@ -10,12 +9,12 @@ # CHECK-LABEL: TEST: task_group_drain_sequence -# CHECK: aiex.dma_start_task(%0) # CHECK: aiex.dma_start_task(%1) # CHECK: aiex.dma_start_task(%2) -# CHECK: aiex.dma_await_task(%1) +# CHECK: aiex.dma_start_task(%3) # CHECK: aiex.dma_await_task(%2) -# CHECK: aiex.dma_free_task(%0) +# CHECK: aiex.dma_await_task(%3) +# CHECK: aiex.dma_free_task(%1) @construct_and_print_module def task_group_drain_sequence(module): n = 1024 @@ -41,17 +40,17 @@ def core_fn(of_0, of_1, of_2): rt.drain(of_2.cons(), C, task_group=tg, wait=True) rt.finish_task_group(tg) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: default_rt_drain_sequence -# CHECK: aiex.dma_start_task(%0) # CHECK: aiex.dma_start_task(%1) # CHECK: aiex.dma_start_task(%2) -# CHECK: aiex.dma_await_task(%1) +# CHECK: aiex.dma_start_task(%3) # CHECK: aiex.dma_await_task(%2) -# CHECK: aiex.dma_free_task(%0) +# CHECK: aiex.dma_await_task(%3) +# CHECK: aiex.dma_free_task(%1) @construct_and_print_module def default_rt_drain_sequence(module): n = 1024 @@ -75,15 +74,15 @@ def core_fn(of_0, of_1, of_2): rt.drain(of_1.cons(), B, wait=True) rt.drain(of_2.cons(), C, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: default_rt_basic_sequence -# CHECK: aiex.dma_start_task(%0) # CHECK: aiex.dma_start_task(%1) -# CHECK: aiex.dma_await_task(%1) -# CHECK: aiex.dma_free_task(%0) +# CHECK: aiex.dma_start_task(%2) +# CHECK: aiex.dma_await_task(%2) +# CHECK: aiex.dma_free_task(%1) @construct_and_print_module def default_rt_basic_sequence(module): n = 1024 @@ -105,17 +104,17 @@ def core_fn(of_0, of_1): rt.fill(of_0.prod(), A) rt.drain(of_1.cons(), B, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: default_rt_fill_sequence -# CHECK: aiex.dma_start_task(%0) # CHECK: aiex.dma_start_task(%1) # CHECK: aiex.dma_start_task(%2) -# CHECK: aiex.dma_await_task(%2) -# CHECK: aiex.dma_free_task(%0) +# CHECK: aiex.dma_start_task(%3) +# CHECK: aiex.dma_await_task(%3) # CHECK: aiex.dma_free_task(%1) +# CHECK: aiex.dma_free_task(%2) @construct_and_print_module def default_rt_fill_sequence(module): n = 1024 @@ -139,17 +138,17 @@ def core_fn(of_0, of_1, of_2): rt.fill(of_1.prod(), B) rt.drain(of_2.cons(), C, wait=True) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: rt_drain_then_fill_sequence -# CHECK: aiex.dma_start_task(%0) # CHECK: aiex.dma_start_task(%1) # CHECK: aiex.dma_start_task(%2) -# CHECK: aiex.dma_await_task(%0) -# CHECK: aiex.dma_free_task(%1) +# CHECK: aiex.dma_start_task(%3) +# CHECK: aiex.dma_await_task(%1) # CHECK: aiex.dma_free_task(%2) +# CHECK: aiex.dma_free_task(%3) @construct_and_print_module def rt_drain_then_fill_sequence(module): n = 1024 @@ -173,7 +172,7 @@ def core_fn(of_0, of_1, of_2): rt.fill(of_0.prod(), A) rt.fill(of_1.prod(), B) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module @@ -205,19 +204,19 @@ def core_fn(of_0, of_1, of_2): rt.finish_task_group(tg) try: - Program(NPU2(), rt).resolve_program(SequentialPlacer()) + Program(NPU2(), rt).resolve_program() except Exception as e: print("success!") return module # CHECK-LABEL: TEST: rt_not_strict_mixed_sequence -# CHECK: aiex.dma_start_task(%0) # CHECK: aiex.dma_start_task(%1) # CHECK: aiex.dma_start_task(%2) -# CHECK: aiex.dma_await_task(%0) -# CHECK: aiex.dma_free_task(%1) +# CHECK: aiex.dma_start_task(%3) +# CHECK: aiex.dma_await_task(%1) # CHECK: aiex.dma_free_task(%2) +# CHECK: aiex.dma_free_task(%3) @construct_and_print_module def rt_not_strict_mixed_sequence(module): n = 1024 @@ -243,17 +242,17 @@ def core_fn(of_0, of_1, of_2): rt.fill(of_1.prod(), B) rt.finish_task_group(tg) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module # CHECK-LABEL: TEST: rt_two_task_group_sequence -# CHECK: aiex.dma_start_task(%0) # CHECK: aiex.dma_start_task(%1) # CHECK: aiex.dma_start_task(%2) -# CHECK: aiex.dma_await_task(%0) -# CHECK: aiex.dma_free_task(%1) +# CHECK: aiex.dma_start_task(%3) +# CHECK: aiex.dma_await_task(%1) # CHECK: aiex.dma_free_task(%2) +# CHECK: aiex.dma_free_task(%3) @construct_and_print_module def rt_two_task_group_sequence(module): n = 1024 @@ -281,5 +280,5 @@ def core_fn(of_0, of_1, of_2): rt.finish_task_group(tg) rt.finish_task_group(tg2) - module = Program(NPU2(), rt).resolve_program(SequentialPlacer()) + module = Program(NPU2(), rt).resolve_program() return module diff --git a/test/python/stack_size_definition.py b/test/python/stack_size_definition.py index 3a82c87d5ce..58444d4103d 100644 --- a/test/python/stack_size_definition.py +++ b/test/python/stack_size_definition.py @@ -4,7 +4,6 @@ # RUN: %python %s | FileCheck %s from aie.iron import Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1 # CHECK: {stack_size = 2048 : i32} @@ -17,7 +16,7 @@ my_program = Program(NPU1(), rt) -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() print(module) diff --git a/test/python/tile_allocation_scheme.py b/test/python/tile_allocation_scheme.py index dc8d4b192e5..54cfeb5e6fe 100644 --- a/test/python/tile_allocation_scheme.py +++ b/test/python/tile_allocation_scheme.py @@ -4,7 +4,6 @@ # RUN: %python %s | FileCheck %s from aie.iron import Program, Runtime, Worker -from aie.iron.placers import SequentialPlacer from aie.iron.device import NPU1, Tile # ============================================================================= @@ -18,7 +17,7 @@ my_program = Program(NPU1(), rt) -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() print(module) @@ -33,7 +32,7 @@ my_program = Program(NPU1(), rt) -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() print(module) @@ -52,7 +51,7 @@ my_program = Program(NPU1(), rt) -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() print(module) @@ -67,7 +66,7 @@ my_program = Program(NPU1(), rt) -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() print(module) @@ -82,6 +81,6 @@ my_program = Program(NPU1(), rt) -module = my_program.resolve_program(SequentialPlacer()) +module = my_program.resolve_program() print(module) diff --git a/test/python/tile_placement_patterns.py b/test/python/tile_placement_patterns.py new file mode 100644 index 00000000000..33cde515ce1 --- /dev/null +++ b/test/python/tile_placement_patterns.py @@ -0,0 +1,86 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# RUN: %python %s | FileCheck %s + +""" +Test tile placement patterns: partial coordinates, full coordinates, and mixed placement. +""" + +import numpy as np +from aie.iron import ObjectFifo, Program, Runtime, Worker +from aie.iron.device import NPU2, Tile +from util import construct_and_print_module + + +# CHECK-LABEL: TEST: partial_coordinates_col_only +# CHECK: aie.device(npu2) { +# CHECK: %[[WORKER:.*]] = aie.logical_tile(2, ?) +# CHECK: %[[SHIM_IN:.*]] = aie.logical_tile +# CHECK: %[[SHIM_OUT:.*]] = aie.logical_tile +# CHECK: aie.objectfifo @in(%[[SHIM_IN]], {%[[WORKER]]}, {{.*}}) +# CHECK: aie.objectfifo @out(%[[WORKER]], {%[[SHIM_OUT]]}, {{.*}}) +# CHECK: aie.core(%[[WORKER]]) +@construct_and_print_module +def partial_coordinates_col_only(module): + """Test partial coordinates with ObjectFifos referencing the LogicalTileOp.""" + n = 1024 + n_ty = np.ndarray[(n,), np.dtype[np.int32]] + + of_in = ObjectFifo(n_ty, name="in") + of_out = ObjectFifo(n_ty, name="out") + + def core_fn(of_in, of_out): + pass + + # Partial placement - column constrained, row unconstrained + worker = Worker(core_fn, [of_in.cons(), of_out.prod()], placement=Tile(col=2)) + + rt = Runtime() + with rt.sequence(n_ty, n_ty, n_ty) as (A, B, C): + rt.start(worker) + rt.fill(of_in.prod(), A) + rt.drain(of_out.cons(), C, wait=True) + + module = Program(NPU2(), rt).resolve_program() + return module + + +# CHECK-LABEL: TEST: multiple_workers_mixed_placement +# CHECK: aie.device(npu2) { +# CHECK-DAG: %[[W0:.*]] = aie.logical_tile(?, ?) +# CHECK-DAG: %[[W1:.*]] = aie.logical_tile(1, ?) +# CHECK-DAG: %[[W2:.*]] = aie.logical_tile(0, 2) +# CHECK-DAG: aie.objectfifo @of0({{.*}}, {%[[W0]]}, {{.*}}) +# CHECK-DAG: aie.objectfifo @of1({{.*}}, {%[[W1]]}, {{.*}}) +# CHECK-DAG: aie.objectfifo @of2({{.*}}, {%[[W2]]}, {{.*}}) +# CHECK-DAG: aie.core(%[[W0]]) +# CHECK-DAG: aie.core(%[[W1]]) +# CHECK-DAG: aie.core(%[[W2]]) +@construct_and_print_module +def multiple_workers_mixed_placement(module): + """Test mixed placement types - ObjectFifos reference correct LogicalTileOps.""" + n = 1024 + n_ty = np.ndarray[(n,), np.dtype[np.int32]] + + of_0 = ObjectFifo(n_ty, name="of0") + of_1 = ObjectFifo(n_ty, name="of1") + of_2 = ObjectFifo(n_ty, name="of2") + + def core_fn(of_in): + pass + + # Mix of placement strategies + worker0 = Worker(core_fn, [of_0.cons()]) # Unconstrained + worker1 = Worker(core_fn, [of_1.cons()], placement=Tile(col=1)) # Partial + worker2 = Worker(core_fn, [of_2.cons()], placement=Tile(0, 2)) # Full + + rt = Runtime() + with rt.sequence(n_ty, n_ty, n_ty) as (A, B, C): + rt.start(worker0, worker1, worker2) + rt.fill(of_0.prod(), A) + rt.fill(of_1.prod(), B) + rt.fill(of_2.prod(), C) + + module = Program(NPU2(), rt).resolve_program() + return module diff --git a/test/python/tile_validation.py b/test/python/tile_validation.py new file mode 100644 index 00000000000..c9be834d137 --- /dev/null +++ b/test/python/tile_validation.py @@ -0,0 +1,121 @@ +# Copyright (C) 2026, Advanced Micro Devices, Inc. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# RUN: %python %s + +""" +Test Tile validation: error checking for type mismatches and duplicate compute tiles. +""" + +import pytest +import numpy as np +from aie.iron import Worker, ObjectFifo +from aie.iron.device import Tile, NPU2 +from aie.extras.context import mlir_mod_ctx +from aie.dialects.aie import device as aie_device, AIEDevice + + +def test_worker_rejects_wrong_tile_type(): + """Worker should reject non-compute tile types.""" + + def kernel(): + pass + + # Should reject mem tile + with pytest.raises(ValueError, match="Worker requires Tile.COMPUTE"): + Worker(kernel, placement=Tile(tile_type=Tile.MEMORY)) + + # Should reject shim tile + with pytest.raises(ValueError, match="Worker requires Tile.COMPUTE"): + Worker(kernel, placement=Tile(tile_type=Tile.SHIM)) + + # Should accept compute tile + worker = Worker(kernel, placement=Tile(tile_type=Tile.COMPUTE)) + assert worker.tile.tile_type == Tile.COMPUTE + + # Should accept None and set to compute + worker = Worker(kernel, placement=Tile()) + assert worker.tile.tile_type == Tile.COMPUTE + + +def test_tile_type_coordinate_mismatch(): + """Tile type should match device coordinates.""" + + def kernel(): + pass + + dev = NPU2() + + with mlir_mod_ctx() as ctx: + + @aie_device(AIEDevice.npu2) + def device_body(): + # User tries to set worker on shim tile, should error + worker = Worker(kernel, placement=Tile(0, 0, tile_type=Tile.COMPUTE)) + + with pytest.raises(ValueError, match="Tile type mismatch"): + dev.resolve_tile(worker.tile) + + +def test_duplicate_compute_tile_error(): + """Same compute tile cannot be allocated twice.""" + + def kernel1(): + pass + + def kernel2(): + pass + + dev = NPU2() + + with mlir_mod_ctx() as ctx: + + @aie_device(AIEDevice.npu2) + def device_body(): + # Two workers with same coordinates + worker1 = Worker(kernel1, placement=Tile(0, 2)) + worker2 = Worker(kernel2, placement=Tile(0, 2)) + + # First worker succeeds + dev.resolve_tile(worker1.tile) + + # Second worker should fail - duplicate compute tile + with pytest.raises(ValueError, match="already allocated"): + dev.resolve_tile(worker2.tile) + + +def test_invalid_tile_type_string(): + """Invalid tile_type string should be rejected.""" + + with pytest.raises(ValueError, match="Invalid tile_type"): + Tile(tile_type="invalid") + + with pytest.raises(ValueError, match="Invalid tile_type"): + Tile(tile_type="core") # Should be "compute" not "core" + + +def test_objectfifo_link_rejects_shim(): + """ObjectFifoLink should reject shim tile type (only memory or compute allowed).""" + + n_ty = np.ndarray[(1024,), np.dtype[np.int32]] + of_in = ObjectFifo(n_ty, name="in") + + # Should reject shim type for forward() + with pytest.raises( + ValueError, match="ObjectFifoLink requires Tile.MEMORY or Tile.COMPUTE" + ): + of_out = of_in.cons().forward(placement=Tile(tile_type=Tile.SHIM)) + + # Should accept memory (default) + of_out = of_in.cons().forward() # OK - defaults to MEMORY + + # Should accept compute (special case) + of_out2 = of_in.cons().forward(placement=Tile(0, 2)) # OK if (0,2) is compute + + +if __name__ == "__main__": + test_worker_rejects_wrong_tile_type() + test_tile_type_coordinate_mismatch() + test_duplicate_compute_tile_error() + test_invalid_tile_type_string() + test_objectfifo_link_rejects_shim()